1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 46 using namespace clang; 47 using namespace CodeGen; 48 using namespace llvm::omp; 49 50 namespace { 51 /// Base class for handling code generation inside OpenMP regions. 52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 53 public: 54 /// Kinds of OpenMP regions used in codegen. 55 enum CGOpenMPRegionKind { 56 /// Region with outlined function for standalone 'parallel' 57 /// directive. 58 ParallelOutlinedRegion, 59 /// Region with outlined function for standalone 'task' directive. 60 TaskOutlinedRegion, 61 /// Region for constructs that do not require function outlining, 62 /// like 'for', 'sections', 'atomic' etc. directives. 63 InlinedRegion, 64 /// Region with outlined function for standalone 'target' directive. 65 TargetRegion, 66 }; 67 68 CGOpenMPRegionInfo(const CapturedStmt &CS, 69 const CGOpenMPRegionKind RegionKind, 70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 71 bool HasCancel) 72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 74 75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 77 bool HasCancel) 78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 79 Kind(Kind), HasCancel(HasCancel) {} 80 81 /// Get a variable or parameter for storing global thread id 82 /// inside OpenMP construct. 83 virtual const VarDecl *getThreadIDVariable() const = 0; 84 85 /// Emit the captured statement body. 86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 87 88 /// Get an LValue for the current ThreadID variable. 89 /// \return LValue for thread id variable. This LValue always has type int32*. 90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 91 92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 93 94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 95 96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 97 98 bool hasCancel() const { return HasCancel; } 99 100 static bool classof(const CGCapturedStmtInfo *Info) { 101 return Info->getKind() == CR_OpenMP; 102 } 103 104 ~CGOpenMPRegionInfo() override = default; 105 106 protected: 107 CGOpenMPRegionKind RegionKind; 108 RegionCodeGenTy CodeGen; 109 OpenMPDirectiveKind Kind; 110 bool HasCancel; 111 }; 112 113 /// API for captured statement code generation in OpenMP constructs. 114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 115 public: 116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 117 const RegionCodeGenTy &CodeGen, 118 OpenMPDirectiveKind Kind, bool HasCancel, 119 StringRef HelperName) 120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 121 HasCancel), 122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 124 } 125 126 /// Get a variable or parameter for storing global thread id 127 /// inside OpenMP construct. 128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 129 130 /// Get the name of the capture helper. 131 StringRef getHelperName() const override { return HelperName; } 132 133 static bool classof(const CGCapturedStmtInfo *Info) { 134 return CGOpenMPRegionInfo::classof(Info) && 135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 136 ParallelOutlinedRegion; 137 } 138 139 private: 140 /// A variable or parameter storing global thread id for OpenMP 141 /// constructs. 142 const VarDecl *ThreadIDVar; 143 StringRef HelperName; 144 }; 145 146 /// API for captured statement code generation in OpenMP constructs. 147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 148 public: 149 class UntiedTaskActionTy final : public PrePostActionTy { 150 bool Untied; 151 const VarDecl *PartIDVar; 152 const RegionCodeGenTy UntiedCodeGen; 153 llvm::SwitchInst *UntiedSwitch = nullptr; 154 155 public: 156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 157 const RegionCodeGenTy &UntiedCodeGen) 158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 159 void Enter(CodeGenFunction &CGF) override { 160 if (Untied) { 161 // Emit task switching point. 162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 llvm::Value *Res = 166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 169 CGF.EmitBlock(DoneBB); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 173 CGF.Builder.GetInsertBlock()); 174 emitUntiedSwitch(CGF); 175 } 176 } 177 void emitUntiedSwitch(CodeGenFunction &CGF) const { 178 if (Untied) { 179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 180 CGF.GetAddrOfLocalVar(PartIDVar), 181 PartIDVar->getType()->castAs<PointerType>()); 182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 183 PartIdLVal); 184 UntiedCodeGen(CGF); 185 CodeGenFunction::JumpDest CurPoint = 186 CGF.getJumpDestInCurrentScope(".untied.next."); 187 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 190 CGF.Builder.GetInsertBlock()); 191 CGF.EmitBranchThroughCleanup(CurPoint); 192 CGF.EmitBlock(CurPoint.getBlock()); 193 } 194 } 195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 196 }; 197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 198 const VarDecl *ThreadIDVar, 199 const RegionCodeGenTy &CodeGen, 200 OpenMPDirectiveKind Kind, bool HasCancel, 201 const UntiedTaskActionTy &Action) 202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 203 ThreadIDVar(ThreadIDVar), Action(Action) { 204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 205 } 206 207 /// Get a variable or parameter for storing global thread id 208 /// inside OpenMP construct. 209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 210 211 /// Get an LValue for the current ThreadID variable. 212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 213 214 /// Get the name of the capture helper. 215 StringRef getHelperName() const override { return ".omp_outlined."; } 216 217 void emitUntiedSwitch(CodeGenFunction &CGF) override { 218 Action.emitUntiedSwitch(CGF); 219 } 220 221 static bool classof(const CGCapturedStmtInfo *Info) { 222 return CGOpenMPRegionInfo::classof(Info) && 223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 224 TaskOutlinedRegion; 225 } 226 227 private: 228 /// A variable or parameter storing global thread id for OpenMP 229 /// constructs. 230 const VarDecl *ThreadIDVar; 231 /// Action for emitting code for untied tasks. 232 const UntiedTaskActionTy &Action; 233 }; 234 235 /// API for inlined captured statement code generation in OpenMP 236 /// constructs. 237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 238 public: 239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 240 const RegionCodeGenTy &CodeGen, 241 OpenMPDirectiveKind Kind, bool HasCancel) 242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 243 OldCSI(OldCSI), 244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 245 246 // Retrieve the value of the context parameter. 247 llvm::Value *getContextValue() const override { 248 if (OuterRegionInfo) 249 return OuterRegionInfo->getContextValue(); 250 llvm_unreachable("No context value for inlined OpenMP region"); 251 } 252 253 void setContextValue(llvm::Value *V) override { 254 if (OuterRegionInfo) { 255 OuterRegionInfo->setContextValue(V); 256 return; 257 } 258 llvm_unreachable("No context value for inlined OpenMP region"); 259 } 260 261 /// Lookup the captured field decl for a variable. 262 const FieldDecl *lookup(const VarDecl *VD) const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->lookup(VD); 265 // If there is no outer outlined region,no need to lookup in a list of 266 // captured variables, we can use the original one. 267 return nullptr; 268 } 269 270 FieldDecl *getThisFieldDecl() const override { 271 if (OuterRegionInfo) 272 return OuterRegionInfo->getThisFieldDecl(); 273 return nullptr; 274 } 275 276 /// Get a variable or parameter for storing global thread id 277 /// inside OpenMP construct. 278 const VarDecl *getThreadIDVariable() const override { 279 if (OuterRegionInfo) 280 return OuterRegionInfo->getThreadIDVariable(); 281 return nullptr; 282 } 283 284 /// Get an LValue for the current ThreadID variable. 285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 286 if (OuterRegionInfo) 287 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 288 llvm_unreachable("No LValue for inlined OpenMP construct"); 289 } 290 291 /// Get the name of the capture helper. 292 StringRef getHelperName() const override { 293 if (auto *OuterRegionInfo = getOldCSI()) 294 return OuterRegionInfo->getHelperName(); 295 llvm_unreachable("No helper name for inlined OpenMP construct"); 296 } 297 298 void emitUntiedSwitch(CodeGenFunction &CGF) override { 299 if (OuterRegionInfo) 300 OuterRegionInfo->emitUntiedSwitch(CGF); 301 } 302 303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 304 305 static bool classof(const CGCapturedStmtInfo *Info) { 306 return CGOpenMPRegionInfo::classof(Info) && 307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 308 } 309 310 ~CGOpenMPInlinedRegionInfo() override = default; 311 312 private: 313 /// CodeGen info about outer OpenMP region. 314 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 315 CGOpenMPRegionInfo *OuterRegionInfo; 316 }; 317 318 /// API for captured statement code generation in OpenMP target 319 /// constructs. For this captures, implicit parameters are used instead of the 320 /// captured fields. The name of the target region has to be unique in a given 321 /// application so it is provided by the client, because only the client has 322 /// the information to generate that. 323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 324 public: 325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 326 const RegionCodeGenTy &CodeGen, StringRef HelperName) 327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 328 /*HasCancel=*/false), 329 HelperName(HelperName) {} 330 331 /// This is unused for target regions because each starts executing 332 /// with a single thread. 333 const VarDecl *getThreadIDVariable() const override { return nullptr; } 334 335 /// Get the name of the capture helper. 336 StringRef getHelperName() const override { return HelperName; } 337 338 static bool classof(const CGCapturedStmtInfo *Info) { 339 return CGOpenMPRegionInfo::classof(Info) && 340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 341 } 342 343 private: 344 StringRef HelperName; 345 }; 346 347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 348 llvm_unreachable("No codegen for expressions"); 349 } 350 /// API for generation of expressions captured in a innermost OpenMP 351 /// region. 352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 353 public: 354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 356 OMPD_unknown, 357 /*HasCancel=*/false), 358 PrivScope(CGF) { 359 // Make sure the globals captured in the provided statement are local by 360 // using the privatization logic. We assume the same variable is not 361 // captured more than once. 362 for (const auto &C : CS.captures()) { 363 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 364 continue; 365 366 const VarDecl *VD = C.getCapturedVar(); 367 if (VD->isLocalVarDeclOrParm()) 368 continue; 369 370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 371 /*RefersToEnclosingVariableOrCapture=*/false, 372 VD->getType().getNonReferenceType(), VK_LValue, 373 C.getLocation()); 374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 if (DRD) 691 SrcAddr = 692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 693 694 llvm::Value *SrcBegin = nullptr; 695 if (DRD) 696 SrcBegin = SrcAddr.getPointer(); 697 llvm::Value *DestBegin = DestAddr.getPointer(); 698 // Cast from pointer to array type to pointer to single element. 699 llvm::Value *DestEnd = 700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, SrcAddr.getElementType(), 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, DestAddr.getElementType(), 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 746 "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 753 "omp.arraycpy.dest.element"); 754 // Check whether we've reached the end. 755 llvm::Value *Done = 756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 759 760 // Done. 761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 762 } 763 764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 765 return CGF.EmitOMPSharedLValue(E); 766 } 767 768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 769 const Expr *E) { 770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 772 return LValue(); 773 } 774 775 void ReductionCodeGen::emitAggregateInitialization( 776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 777 const OMPDeclareReductionDecl *DRD) { 778 // Emit VarDecl with copy init for arrays. 779 // Get the address of the original variable captured in current 780 // captured region. 781 const auto *PrivateVD = 782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 783 bool EmitDeclareReductionInit = 784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 786 EmitDeclareReductionInit, 787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 788 : PrivateVD->getInit(), 789 DRD, SharedAddr); 790 } 791 792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 793 ArrayRef<const Expr *> Origs, 794 ArrayRef<const Expr *> Privates, 795 ArrayRef<const Expr *> ReductionOps) { 796 ClausesData.reserve(Shareds.size()); 797 SharedAddresses.reserve(Shareds.size()); 798 Sizes.reserve(Shareds.size()); 799 BaseDecls.reserve(Shareds.size()); 800 const auto *IOrig = Origs.begin(); 801 const auto *IPriv = Privates.begin(); 802 const auto *IRed = ReductionOps.begin(); 803 for (const Expr *Ref : Shareds) { 804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 805 std::advance(IOrig, 1); 806 std::advance(IPriv, 1); 807 std::advance(IRed, 1); 808 } 809 } 810 811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 813 "Number of generated lvalues must be exactly N."); 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 816 SharedAddresses.emplace_back(First, Second); 817 if (ClausesData[N].Shared == ClausesData[N].Ref) { 818 OrigAddresses.emplace_back(First, Second); 819 } else { 820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 822 OrigAddresses.emplace_back(First, Second); 823 } 824 } 825 826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 827 QualType PrivateType = getPrivateType(N); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 839 if (AsArraySection) { 840 Size = CGF.Builder.CreatePtrDiff(ElemType, 841 OrigAddresses[N].second.getPointer(CGF), 842 OrigAddresses[N].first.getPointer(CGF)); 843 Size = CGF.Builder.CreateNUWAdd( 844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 846 } else { 847 SizeInChars = 848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 850 } 851 Sizes.emplace_back(SizeInChars, Size); 852 CodeGenFunction::OpaqueValueMapping OpaqueMap( 853 CGF, 854 cast<OpaqueValueExpr>( 855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 856 RValue::get(Size)); 857 CGF.EmitVariablyModifiedType(PrivateType); 858 } 859 860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 861 llvm::Value *Size) { 862 QualType PrivateType = getPrivateType(N); 863 if (!PrivateType->isVariablyModifiedType()) { 864 assert(!Size && !Sizes[N].second && 865 "Size should be nullptr for non-variably modified reduction " 866 "items."); 867 return; 868 } 869 CodeGenFunction::OpaqueValueMapping OpaqueMap( 870 CGF, 871 cast<OpaqueValueExpr>( 872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 873 RValue::get(Size)); 874 CGF.EmitVariablyModifiedType(PrivateType); 875 } 876 877 void ReductionCodeGen::emitInitialization( 878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 880 assert(SharedAddresses.size() > N && "No variable was generated"); 881 const auto *PrivateVD = 882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 883 const OMPDeclareReductionDecl *DRD = 884 getReductionInit(ClausesData[N].ReductionOp); 885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 886 if (DRD && DRD->getInitializer()) 887 (void)DefaultInit(CGF); 888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 890 (void)DefaultInit(CGF); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 893 PrivateAddr, SharedAddr, SharedType); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 QualType PrivateType = getPrivateType(N); 904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 905 return DTorKind != QualType::DK_none; 906 } 907 908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 909 Address PrivateAddr) { 910 QualType PrivateType = getPrivateType(N); 911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 912 if (needCleanups(N)) { 913 PrivateAddr = CGF.Builder.CreateElementBitCast( 914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 916 } 917 } 918 919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 920 LValue BaseLV) { 921 BaseTy = BaseTy.getNonReferenceType(); 922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 923 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 926 } else { 927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 929 } 930 BaseTy = BaseTy->getPointeeType(); 931 } 932 return CGF.MakeAddrLValue( 933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 934 CGF.ConvertTypeForMem(ElTy)), 935 BaseLV.getType(), BaseLV.getBaseInfo(), 936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 937 } 938 939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 Address OriginalBaseAddress, llvm::Value *Addr) { 941 Address Tmp = Address::invalid(); 942 Address TopTmp = Address::invalid(); 943 Address MostTopTmp = Address::invalid(); 944 BaseTy = BaseTy.getNonReferenceType(); 945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 946 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 947 Tmp = CGF.CreateMemTemp(BaseTy); 948 if (TopTmp.isValid()) 949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 950 else 951 MostTopTmp = Tmp; 952 TopTmp = Tmp; 953 BaseTy = BaseTy->getPointeeType(); 954 } 955 956 if (Tmp.isValid()) { 957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 958 Addr, Tmp.getElementType()); 959 CGF.Builder.CreateStore(Addr, Tmp); 960 return MostTopTmp; 961 } 962 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 964 Addr, OriginalBaseAddress.getType()); 965 return OriginalBaseAddress.withPointer(Addr); 966 } 967 968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 969 const VarDecl *OrigVD = nullptr; 970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 973 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 975 Base = TempASE->getBase()->IgnoreParenImpCasts(); 976 DE = cast<DeclRefExpr>(Base); 977 OrigVD = cast<VarDecl>(DE->getDecl()); 978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 981 Base = TempASE->getBase()->IgnoreParenImpCasts(); 982 DE = cast<DeclRefExpr>(Base); 983 OrigVD = cast<VarDecl>(DE->getDecl()); 984 } 985 return OrigVD; 986 } 987 988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 989 Address PrivateAddr) { 990 const DeclRefExpr *DE; 991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 992 BaseDecls.emplace_back(OrigVD); 993 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 994 LValue BaseLValue = 995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 996 OriginalBaseLValue); 997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1000 SharedAddr.getPointer()); 1001 llvm::Value *PrivatePointer = 1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1003 PrivateAddr.getPointer(), SharedAddr.getType()); 1004 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1005 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1006 return castToBase(CGF, OrigVD->getType(), 1007 SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue.getAddress(CGF), Ptr); 1009 } 1010 BaseDecls.emplace_back( 1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1012 return PrivateAddr; 1013 } 1014 1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1016 const OMPDeclareReductionDecl *DRD = 1017 getReductionInit(ClausesData[N].ReductionOp); 1018 return DRD && DRD->getInitializer(); 1019 } 1020 1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1022 return CGF.EmitLoadOfPointerLValue( 1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1024 getThreadIDVariable()->getType()->castAs<PointerType>()); 1025 } 1026 1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1028 if (!CGF.HaveInsertPoint()) 1029 return; 1030 // 1.2.2 OpenMP Language Terminology 1031 // Structured block - An executable statement with a single entry at the 1032 // top and a single exit at the bottom. 1033 // The point of exit cannot be a branch out of the structured block. 1034 // longjmp() and throw() must not violate the entry/exit criteria. 1035 CGF.EHStack.pushTerminate(); 1036 if (S) 1037 CGF.incrementProfileCounter(S); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1065 1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1067 OMPBuilder.initialize(); 1068 loadOffloadInfoMetadata(); 1069 } 1070 1071 void CGOpenMPRuntime::clear() { 1072 InternalVars.clear(); 1073 // Clean non-target variable declarations possibly used only in debug info. 1074 for (const auto &Data : EmittedNonTargetVariables) { 1075 if (!Data.getValue().pointsToAliveValue()) 1076 continue; 1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1078 if (!GV) 1079 continue; 1080 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1081 continue; 1082 GV->eraseFromParent(); 1083 } 1084 } 1085 1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1087 SmallString<128> Buffer; 1088 llvm::raw_svector_ostream OS(Buffer); 1089 StringRef Sep = FirstSeparator; 1090 for (StringRef Part : Parts) { 1091 OS << Sep << Part; 1092 Sep = Separator; 1093 } 1094 return std::string(OS.str()); 1095 } 1096 1097 static llvm::Function * 1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1099 const Expr *CombinerInitializer, const VarDecl *In, 1100 const VarDecl *Out, bool IsCombiner) { 1101 // void .omp_combiner.(Ty *in, Ty *out); 1102 ASTContext &C = CGM.getContext(); 1103 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1104 FunctionArgList Args; 1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 Args.push_back(&OmpOutParm); 1110 Args.push_back(&OmpInParm); 1111 const CGFunctionInfo &FnInfo = 1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1114 std::string Name = CGM.getOpenMPRuntime().getName( 1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1117 Name, &CGM.getModule()); 1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1119 if (CGM.getLangOpts().Optimize) { 1120 Fn->removeFnAttr(llvm::Attribute::NoInline); 1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1123 } 1124 CodeGenFunction CGF(CGM); 1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1128 Out->getLocation()); 1129 CodeGenFunction::OMPPrivateScope Scope(CGF); 1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1131 Scope.addPrivate( 1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1133 .getAddress(CGF)); 1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1135 Scope.addPrivate( 1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1137 .getAddress(CGF)); 1138 (void)Scope.Privatize(); 1139 if (!IsCombiner && Out->hasInit() && 1140 !CGF.isTrivialInitializer(Out->getInit())) { 1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1142 Out->getType().getQualifiers(), 1143 /*IsInitializer=*/true); 1144 } 1145 if (CombinerInitializer) 1146 CGF.EmitIgnoredExpr(CombinerInitializer); 1147 Scope.ForceCleanup(); 1148 CGF.FinishFunction(); 1149 return Fn; 1150 } 1151 1152 void CGOpenMPRuntime::emitUserDefinedReduction( 1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1154 if (UDRMap.count(D) > 0) 1155 return; 1156 llvm::Function *Combiner = emitCombinerOrInitializer( 1157 CGM, D->getType(), D->getCombiner(), 1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1160 /*IsCombiner=*/true); 1161 llvm::Function *Initializer = nullptr; 1162 if (const Expr *Init = D->getInitializer()) { 1163 Initializer = emitCombinerOrInitializer( 1164 CGM, D->getType(), 1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1166 : nullptr, 1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1169 /*IsCombiner=*/false); 1170 } 1171 UDRMap.try_emplace(D, Combiner, Initializer); 1172 if (CGF) { 1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1174 Decls.second.push_back(D); 1175 } 1176 } 1177 1178 std::pair<llvm::Function *, llvm::Function *> 1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1180 auto I = UDRMap.find(D); 1181 if (I != UDRMap.end()) 1182 return I->second; 1183 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1184 return UDRMap.lookup(D); 1185 } 1186 1187 namespace { 1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1189 // Builder if one is present. 1190 struct PushAndPopStackRAII { 1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1192 bool HasCancel, llvm::omp::Directive Kind) 1193 : OMPBuilder(OMPBuilder) { 1194 if (!OMPBuilder) 1195 return; 1196 1197 // The following callback is the crucial part of clangs cleanup process. 1198 // 1199 // NOTE: 1200 // Once the OpenMPIRBuilder is used to create parallel regions (and 1201 // similar), the cancellation destination (Dest below) is determined via 1202 // IP. That means if we have variables to finalize we split the block at IP, 1203 // use the new block (=BB) as destination to build a JumpDest (via 1204 // getJumpDestInCurrentScope(BB)) which then is fed to 1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1206 // to push & pop an FinalizationInfo object. 1207 // The FiniCB will still be needed but at the point where the 1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1210 assert(IP.getBlock()->end() == IP.getPoint() && 1211 "Clang CG should cause non-terminated block!"); 1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1213 CGF.Builder.restoreIP(IP); 1214 CodeGenFunction::JumpDest Dest = 1215 CGF.getOMPCancelDestination(OMPD_parallel); 1216 CGF.EmitBranchThroughCleanup(Dest); 1217 }; 1218 1219 // TODO: Remove this once we emit parallel regions through the 1220 // OpenMPIRBuilder as it can do this setup internally. 1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1222 OMPBuilder->pushFinalizationCB(std::move(FI)); 1223 } 1224 ~PushAndPopStackRAII() { 1225 if (OMPBuilder) 1226 OMPBuilder->popFinalizationCB(); 1227 } 1228 llvm::OpenMPIRBuilder *OMPBuilder; 1229 }; 1230 } // namespace 1231 1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1236 assert(ThreadIDVar->getType()->isPointerType() && 1237 "thread id variable must be of type kmp_int32 *"); 1238 CodeGenFunction CGF(CGM, true); 1239 bool HasCancel = false; 1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1241 HasCancel = OPD->hasCancel(); 1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1243 HasCancel = OPD->hasCancel(); 1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1245 HasCancel = OPSD->hasCancel(); 1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1247 HasCancel = OPFD->hasCancel(); 1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1249 HasCancel = OPFD->hasCancel(); 1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1251 HasCancel = OPFD->hasCancel(); 1252 else if (const auto *OPFD = 1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = 1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1257 HasCancel = OPFD->hasCancel(); 1258 1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1260 // parallel region to make cancellation barriers work properly. 1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1264 HasCancel, OutlinedHelperName); 1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1267 } 1268 1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1273 return emitParallelOrTeamsOutlinedFunction( 1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1275 } 1276 1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1281 return emitParallelOrTeamsOutlinedFunction( 1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1283 } 1284 1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1289 bool Tied, unsigned &NumberOfParts) { 1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1291 PrePostActionTy &) { 1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1294 llvm::Value *TaskArgs[] = { 1295 UpLoc, ThreadID, 1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1297 TaskTVar->getType()->castAs<PointerType>()) 1298 .getPointer(CGF)}; 1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1300 CGM.getModule(), OMPRTL___kmpc_omp_task), 1301 TaskArgs); 1302 }; 1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1304 UntiedCodeGen); 1305 CodeGen.setAction(Action); 1306 assert(!ThreadIDVar->getType()->isPointerType() && 1307 "thread id variable must be of type kmp_int32 for tasks"); 1308 const OpenMPDirectiveKind Region = 1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1310 : OMPD_task; 1311 const CapturedStmt *CS = D.getCapturedStmt(Region); 1312 bool HasCancel = false; 1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1318 HasCancel = TD->hasCancel(); 1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1320 HasCancel = TD->hasCancel(); 1321 1322 CodeGenFunction CGF(CGM, true); 1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1324 InnermostKind, HasCancel, Action); 1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1327 if (!Tied) 1328 NumberOfParts = Action.getNumberOfParts(); 1329 return Res; 1330 } 1331 1332 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1333 const RecordDecl *RD, const CGRecordLayout &RL, 1334 ArrayRef<llvm::Constant *> Data) { 1335 llvm::StructType *StructTy = RL.getLLVMType(); 1336 unsigned PrevIdx = 0; 1337 ConstantInitBuilder CIBuilder(CGM); 1338 const auto *DI = Data.begin(); 1339 for (const FieldDecl *FD : RD->fields()) { 1340 unsigned Idx = RL.getLLVMFieldNo(FD); 1341 // Fill the alignment. 1342 for (unsigned I = PrevIdx; I < Idx; ++I) 1343 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1344 PrevIdx = Idx + 1; 1345 Fields.add(*DI); 1346 ++DI; 1347 } 1348 } 1349 1350 template <class... As> 1351 static llvm::GlobalVariable * 1352 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1353 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1354 As &&... Args) { 1355 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1356 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1357 ConstantInitBuilder CIBuilder(CGM); 1358 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1359 buildStructValue(Fields, CGM, RD, RL, Data); 1360 return Fields.finishAndCreateGlobal( 1361 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1362 std::forward<As>(Args)...); 1363 } 1364 1365 template <typename T> 1366 static void 1367 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1368 ArrayRef<llvm::Constant *> Data, 1369 T &Parent) { 1370 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1371 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1372 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1373 buildStructValue(Fields, CGM, RD, RL, Data); 1374 Fields.finishAndAddTo(Parent); 1375 } 1376 1377 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1378 bool AtCurrentPoint) { 1379 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1380 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1381 1382 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1383 if (AtCurrentPoint) { 1384 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1385 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1386 } else { 1387 Elem.second.ServiceInsertPt = 1388 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1389 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1390 } 1391 } 1392 1393 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1394 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1395 if (Elem.second.ServiceInsertPt) { 1396 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1397 Elem.second.ServiceInsertPt = nullptr; 1398 Ptr->eraseFromParent(); 1399 } 1400 } 1401 1402 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1403 SourceLocation Loc, 1404 SmallString<128> &Buffer) { 1405 llvm::raw_svector_ostream OS(Buffer); 1406 // Build debug location 1407 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1408 OS << ";" << PLoc.getFilename() << ";"; 1409 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1410 OS << FD->getQualifiedNameAsString(); 1411 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1412 return OS.str(); 1413 } 1414 1415 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1416 SourceLocation Loc, 1417 unsigned Flags) { 1418 uint32_t SrcLocStrSize; 1419 llvm::Constant *SrcLocStr; 1420 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1421 Loc.isInvalid()) { 1422 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1423 } else { 1424 std::string FunctionName; 1425 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1426 FunctionName = FD->getQualifiedNameAsString(); 1427 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1428 const char *FileName = PLoc.getFilename(); 1429 unsigned Line = PLoc.getLine(); 1430 unsigned Column = PLoc.getColumn(); 1431 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1432 Column, SrcLocStrSize); 1433 } 1434 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1435 return OMPBuilder.getOrCreateIdent( 1436 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1437 } 1438 1439 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1440 SourceLocation Loc) { 1441 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1442 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1443 // the clang invariants used below might be broken. 1444 if (CGM.getLangOpts().OpenMPIRBuilder) { 1445 SmallString<128> Buffer; 1446 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1447 uint32_t SrcLocStrSize; 1448 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1449 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1450 return OMPBuilder.getOrCreateThreadID( 1451 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1452 } 1453 1454 llvm::Value *ThreadID = nullptr; 1455 // Check whether we've already cached a load of the thread id in this 1456 // function. 1457 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1458 if (I != OpenMPLocThreadIDMap.end()) { 1459 ThreadID = I->second.ThreadID; 1460 if (ThreadID != nullptr) 1461 return ThreadID; 1462 } 1463 // If exceptions are enabled, do not use parameter to avoid possible crash. 1464 if (auto *OMPRegionInfo = 1465 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1466 if (OMPRegionInfo->getThreadIDVariable()) { 1467 // Check if this an outlined function with thread id passed as argument. 1468 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1469 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1470 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1471 !CGF.getLangOpts().CXXExceptions || 1472 CGF.Builder.GetInsertBlock() == TopBlock || 1473 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1474 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1475 TopBlock || 1476 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1477 CGF.Builder.GetInsertBlock()) { 1478 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1479 // If value loaded in entry block, cache it and use it everywhere in 1480 // function. 1481 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1482 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1483 Elem.second.ThreadID = ThreadID; 1484 } 1485 return ThreadID; 1486 } 1487 } 1488 } 1489 1490 // This is not an outlined function region - need to call __kmpc_int32 1491 // kmpc_global_thread_num(ident_t *loc). 1492 // Generate thread id value and cache this value for use across the 1493 // function. 1494 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1495 if (!Elem.second.ServiceInsertPt) 1496 setLocThreadIdInsertPt(CGF); 1497 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1498 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1499 llvm::CallInst *Call = CGF.Builder.CreateCall( 1500 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1501 OMPRTL___kmpc_global_thread_num), 1502 emitUpdateLocation(CGF, Loc)); 1503 Call->setCallingConv(CGF.getRuntimeCC()); 1504 Elem.second.ThreadID = Call; 1505 return Call; 1506 } 1507 1508 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1509 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1510 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1511 clearLocThreadIdInsertPt(CGF); 1512 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1513 } 1514 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1515 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1516 UDRMap.erase(D); 1517 FunctionUDRMap.erase(CGF.CurFn); 1518 } 1519 auto I = FunctionUDMMap.find(CGF.CurFn); 1520 if (I != FunctionUDMMap.end()) { 1521 for(const auto *D : I->second) 1522 UDMMap.erase(D); 1523 FunctionUDMMap.erase(I); 1524 } 1525 LastprivateConditionalToTypes.erase(CGF.CurFn); 1526 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1527 } 1528 1529 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1530 return OMPBuilder.IdentPtr; 1531 } 1532 1533 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1534 if (!Kmpc_MicroTy) { 1535 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1536 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1537 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1538 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1539 } 1540 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1541 } 1542 1543 llvm::FunctionCallee 1544 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1545 bool IsGPUDistribute) { 1546 assert((IVSize == 32 || IVSize == 64) && 1547 "IV size is not compatible with the omp runtime"); 1548 StringRef Name; 1549 if (IsGPUDistribute) 1550 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1551 : "__kmpc_distribute_static_init_4u") 1552 : (IVSigned ? "__kmpc_distribute_static_init_8" 1553 : "__kmpc_distribute_static_init_8u"); 1554 else 1555 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1556 : "__kmpc_for_static_init_4u") 1557 : (IVSigned ? "__kmpc_for_static_init_8" 1558 : "__kmpc_for_static_init_8u"); 1559 1560 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1561 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1562 llvm::Type *TypeParams[] = { 1563 getIdentTyPointerTy(), // loc 1564 CGM.Int32Ty, // tid 1565 CGM.Int32Ty, // schedtype 1566 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1567 PtrTy, // p_lower 1568 PtrTy, // p_upper 1569 PtrTy, // p_stride 1570 ITy, // incr 1571 ITy // chunk 1572 }; 1573 auto *FnTy = 1574 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1575 return CGM.CreateRuntimeFunction(FnTy, Name); 1576 } 1577 1578 llvm::FunctionCallee 1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1580 assert((IVSize == 32 || IVSize == 64) && 1581 "IV size is not compatible with the omp runtime"); 1582 StringRef Name = 1583 IVSize == 32 1584 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1585 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1586 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1587 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1588 CGM.Int32Ty, // tid 1589 CGM.Int32Ty, // schedtype 1590 ITy, // lower 1591 ITy, // upper 1592 ITy, // stride 1593 ITy // chunk 1594 }; 1595 auto *FnTy = 1596 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1597 return CGM.CreateRuntimeFunction(FnTy, Name); 1598 } 1599 1600 llvm::FunctionCallee 1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1602 assert((IVSize == 32 || IVSize == 64) && 1603 "IV size is not compatible with the omp runtime"); 1604 StringRef Name = 1605 IVSize == 32 1606 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1607 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1608 llvm::Type *TypeParams[] = { 1609 getIdentTyPointerTy(), // loc 1610 CGM.Int32Ty, // tid 1611 }; 1612 auto *FnTy = 1613 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1614 return CGM.CreateRuntimeFunction(FnTy, Name); 1615 } 1616 1617 llvm::FunctionCallee 1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1619 assert((IVSize == 32 || IVSize == 64) && 1620 "IV size is not compatible with the omp runtime"); 1621 StringRef Name = 1622 IVSize == 32 1623 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1624 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1625 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1626 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1627 llvm::Type *TypeParams[] = { 1628 getIdentTyPointerTy(), // loc 1629 CGM.Int32Ty, // tid 1630 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1631 PtrTy, // p_lower 1632 PtrTy, // p_upper 1633 PtrTy // p_stride 1634 }; 1635 auto *FnTy = 1636 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1637 return CGM.CreateRuntimeFunction(FnTy, Name); 1638 } 1639 1640 /// Obtain information that uniquely identifies a target entry. This 1641 /// consists of the file and device IDs as well as line number associated with 1642 /// the relevant entry source location. 1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1644 unsigned &DeviceID, unsigned &FileID, 1645 unsigned &LineNum) { 1646 SourceManager &SM = C.getSourceManager(); 1647 1648 // The loc should be always valid and have a file ID (the user cannot use 1649 // #pragma directives in macros) 1650 1651 assert(Loc.isValid() && "Source location is expected to be always valid."); 1652 1653 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1654 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1655 1656 llvm::sys::fs::UniqueID ID; 1657 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1658 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1659 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1660 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1661 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1662 << PLoc.getFilename() << EC.message(); 1663 } 1664 1665 DeviceID = ID.getDevice(); 1666 FileID = ID.getFile(); 1667 LineNum = PLoc.getLine(); 1668 } 1669 1670 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1671 if (CGM.getLangOpts().OpenMPSimd) 1672 return Address::invalid(); 1673 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1674 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1675 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1676 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1677 HasRequiresUnifiedSharedMemory))) { 1678 SmallString<64> PtrName; 1679 { 1680 llvm::raw_svector_ostream OS(PtrName); 1681 OS << CGM.getMangledName(GlobalDecl(VD)); 1682 if (!VD->isExternallyVisible()) { 1683 unsigned DeviceID, FileID, Line; 1684 getTargetEntryUniqueInfo(CGM.getContext(), 1685 VD->getCanonicalDecl()->getBeginLoc(), 1686 DeviceID, FileID, Line); 1687 OS << llvm::format("_%x", FileID); 1688 } 1689 OS << "_decl_tgt_ref_ptr"; 1690 } 1691 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1692 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1693 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); 1694 if (!Ptr) { 1695 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName); 1696 1697 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1698 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1699 1700 if (!CGM.getLangOpts().OpenMPIsDevice) 1701 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1702 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1703 } 1704 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1705 } 1706 return Address::invalid(); 1707 } 1708 1709 llvm::Constant * 1710 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1711 assert(!CGM.getLangOpts().OpenMPUseTLS || 1712 !CGM.getContext().getTargetInfo().isTLSSupported()); 1713 // Lookup the entry, lazily creating it if necessary. 1714 std::string Suffix = getName({"cache", ""}); 1715 return getOrCreateInternalVariable( 1716 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1717 } 1718 1719 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1720 const VarDecl *VD, 1721 Address VDAddr, 1722 SourceLocation Loc) { 1723 if (CGM.getLangOpts().OpenMPUseTLS && 1724 CGM.getContext().getTargetInfo().isTLSSupported()) 1725 return VDAddr; 1726 1727 llvm::Type *VarTy = VDAddr.getElementType(); 1728 llvm::Value *Args[] = { 1729 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1730 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1731 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1732 getOrCreateThreadPrivateCache(VD)}; 1733 return Address( 1734 CGF.EmitRuntimeCall( 1735 OMPBuilder.getOrCreateRuntimeFunction( 1736 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1737 Args), 1738 CGF.Int8Ty, VDAddr.getAlignment()); 1739 } 1740 1741 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1742 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1743 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1744 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1745 // library. 1746 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1747 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1748 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1749 OMPLoc); 1750 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1751 // to register constructor/destructor for variable. 1752 llvm::Value *Args[] = { 1753 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1754 Ctor, CopyCtor, Dtor}; 1755 CGF.EmitRuntimeCall( 1756 OMPBuilder.getOrCreateRuntimeFunction( 1757 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1758 Args); 1759 } 1760 1761 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1762 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1763 bool PerformInit, CodeGenFunction *CGF) { 1764 if (CGM.getLangOpts().OpenMPUseTLS && 1765 CGM.getContext().getTargetInfo().isTLSSupported()) 1766 return nullptr; 1767 1768 VD = VD->getDefinition(CGM.getContext()); 1769 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1770 QualType ASTTy = VD->getType(); 1771 1772 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1773 const Expr *Init = VD->getAnyInitializer(); 1774 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1775 // Generate function that re-emits the declaration's initializer into the 1776 // threadprivate copy of the variable VD 1777 CodeGenFunction CtorCGF(CGM); 1778 FunctionArgList Args; 1779 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1780 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1781 ImplicitParamDecl::Other); 1782 Args.push_back(&Dst); 1783 1784 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1785 CGM.getContext().VoidPtrTy, Args); 1786 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1787 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1788 llvm::Function *Fn = 1789 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1790 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1791 Args, Loc, Loc); 1792 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1793 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1794 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1795 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); 1796 Arg = CtorCGF.Builder.CreateElementBitCast( 1797 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1798 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1799 /*IsInitializer=*/true); 1800 ArgVal = CtorCGF.EmitLoadOfScalar( 1801 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1802 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1803 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1804 CtorCGF.FinishFunction(); 1805 Ctor = Fn; 1806 } 1807 if (VD->getType().isDestructedType() != QualType::DK_none) { 1808 // Generate function that emits destructor call for the threadprivate copy 1809 // of the variable VD 1810 CodeGenFunction DtorCGF(CGM); 1811 FunctionArgList Args; 1812 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1813 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1814 ImplicitParamDecl::Other); 1815 Args.push_back(&Dst); 1816 1817 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1818 CGM.getContext().VoidTy, Args); 1819 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1820 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1821 llvm::Function *Fn = 1822 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1823 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1824 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1825 Loc, Loc); 1826 // Create a scope with an artificial location for the body of this function. 1827 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1828 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1829 DtorCGF.GetAddrOfLocalVar(&Dst), 1830 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1831 DtorCGF.emitDestroy( 1832 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1833 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1834 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1835 DtorCGF.FinishFunction(); 1836 Dtor = Fn; 1837 } 1838 // Do not emit init function if it is not required. 1839 if (!Ctor && !Dtor) 1840 return nullptr; 1841 1842 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1843 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1844 /*isVarArg=*/false) 1845 ->getPointerTo(); 1846 // Copying constructor for the threadprivate variable. 1847 // Must be NULL - reserved by runtime, but currently it requires that this 1848 // parameter is always NULL. Otherwise it fires assertion. 1849 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1850 if (Ctor == nullptr) { 1851 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1852 /*isVarArg=*/false) 1853 ->getPointerTo(); 1854 Ctor = llvm::Constant::getNullValue(CtorTy); 1855 } 1856 if (Dtor == nullptr) { 1857 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1858 /*isVarArg=*/false) 1859 ->getPointerTo(); 1860 Dtor = llvm::Constant::getNullValue(DtorTy); 1861 } 1862 if (!CGF) { 1863 auto *InitFunctionTy = 1864 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1865 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1866 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1867 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1868 CodeGenFunction InitCGF(CGM); 1869 FunctionArgList ArgList; 1870 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1871 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1872 Loc, Loc); 1873 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1874 InitCGF.FinishFunction(); 1875 return InitFunction; 1876 } 1877 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1878 } 1879 return nullptr; 1880 } 1881 1882 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1883 llvm::GlobalVariable *Addr, 1884 bool PerformInit) { 1885 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1886 !CGM.getLangOpts().OpenMPIsDevice) 1887 return false; 1888 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1889 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1890 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1891 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1892 HasRequiresUnifiedSharedMemory)) 1893 return CGM.getLangOpts().OpenMPIsDevice; 1894 VD = VD->getDefinition(CGM.getContext()); 1895 assert(VD && "Unknown VarDecl"); 1896 1897 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1898 return CGM.getLangOpts().OpenMPIsDevice; 1899 1900 QualType ASTTy = VD->getType(); 1901 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1902 1903 // Produce the unique prefix to identify the new target regions. We use 1904 // the source location of the variable declaration which we know to not 1905 // conflict with any target region. 1906 unsigned DeviceID; 1907 unsigned FileID; 1908 unsigned Line; 1909 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1910 SmallString<128> Buffer, Out; 1911 { 1912 llvm::raw_svector_ostream OS(Buffer); 1913 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1914 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1915 } 1916 1917 const Expr *Init = VD->getAnyInitializer(); 1918 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1919 llvm::Constant *Ctor; 1920 llvm::Constant *ID; 1921 if (CGM.getLangOpts().OpenMPIsDevice) { 1922 // Generate function that re-emits the declaration's initializer into 1923 // the threadprivate copy of the variable VD 1924 CodeGenFunction CtorCGF(CGM); 1925 1926 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1927 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1928 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1929 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1930 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1931 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1932 FunctionArgList(), Loc, Loc); 1933 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1934 llvm::Constant *AddrInAS0 = Addr; 1935 if (Addr->getAddressSpace() != 0) 1936 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1937 Addr, llvm::PointerType::getWithSamePointeeType( 1938 cast<llvm::PointerType>(Addr->getType()), 0)); 1939 CtorCGF.EmitAnyExprToMem(Init, 1940 Address(AddrInAS0, Addr->getValueType(), 1941 CGM.getContext().getDeclAlign(VD)), 1942 Init->getType().getQualifiers(), 1943 /*IsInitializer=*/true); 1944 CtorCGF.FinishFunction(); 1945 Ctor = Fn; 1946 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1947 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1948 } else { 1949 Ctor = new llvm::GlobalVariable( 1950 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1951 llvm::GlobalValue::PrivateLinkage, 1952 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1953 ID = Ctor; 1954 } 1955 1956 // Register the information for the entry associated with the constructor. 1957 Out.clear(); 1958 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1959 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1960 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1961 } 1962 if (VD->getType().isDestructedType() != QualType::DK_none) { 1963 llvm::Constant *Dtor; 1964 llvm::Constant *ID; 1965 if (CGM.getLangOpts().OpenMPIsDevice) { 1966 // Generate function that emits destructor call for the threadprivate 1967 // copy of the variable VD 1968 CodeGenFunction DtorCGF(CGM); 1969 1970 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1971 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1972 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1973 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1974 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1975 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1976 FunctionArgList(), Loc, Loc); 1977 // Create a scope with an artificial location for the body of this 1978 // function. 1979 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1980 llvm::Constant *AddrInAS0 = Addr; 1981 if (Addr->getAddressSpace() != 0) 1982 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1983 Addr, llvm::PointerType::getWithSamePointeeType( 1984 cast<llvm::PointerType>(Addr->getType()), 0)); 1985 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), 1986 CGM.getContext().getDeclAlign(VD)), 1987 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1988 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1989 DtorCGF.FinishFunction(); 1990 Dtor = Fn; 1991 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1992 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1993 } else { 1994 Dtor = new llvm::GlobalVariable( 1995 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1996 llvm::GlobalValue::PrivateLinkage, 1997 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1998 ID = Dtor; 1999 } 2000 // Register the information for the entry associated with the destructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2005 } 2006 return CGM.getLangOpts().OpenMPIsDevice; 2007 } 2008 2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2010 QualType VarType, 2011 StringRef Name) { 2012 std::string Suffix = getName({"artificial", ""}); 2013 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2014 llvm::GlobalVariable *GAddr = 2015 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2016 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2017 CGM.getTarget().isTLSSupported()) { 2018 GAddr->setThreadLocal(/*Val=*/true); 2019 return Address(GAddr, GAddr->getValueType(), 2020 CGM.getContext().getTypeAlignInChars(VarType)); 2021 } 2022 std::string CacheSuffix = getName({"cache", ""}); 2023 llvm::Value *Args[] = { 2024 emitUpdateLocation(CGF, SourceLocation()), 2025 getThreadID(CGF, SourceLocation()), 2026 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2027 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2028 /*isSigned=*/false), 2029 getOrCreateInternalVariable( 2030 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2031 return Address( 2032 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2033 CGF.EmitRuntimeCall( 2034 OMPBuilder.getOrCreateRuntimeFunction( 2035 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2036 Args), 2037 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2038 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2039 } 2040 2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2042 const RegionCodeGenTy &ThenGen, 2043 const RegionCodeGenTy &ElseGen) { 2044 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2045 2046 // If the condition constant folds and can be elided, try to avoid emitting 2047 // the condition and the dead arm of the if/else. 2048 bool CondConstant; 2049 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2050 if (CondConstant) 2051 ThenGen(CGF); 2052 else 2053 ElseGen(CGF); 2054 return; 2055 } 2056 2057 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2058 // emit the conditional branch. 2059 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2060 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2061 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2062 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2063 2064 // Emit the 'then' code. 2065 CGF.EmitBlock(ThenBlock); 2066 ThenGen(CGF); 2067 CGF.EmitBranch(ContBlock); 2068 // Emit the 'else' code if present. 2069 // There is no need to emit line number for unconditional branch. 2070 (void)ApplyDebugLocation::CreateEmpty(CGF); 2071 CGF.EmitBlock(ElseBlock); 2072 ElseGen(CGF); 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBranch(ContBlock); 2076 // Emit the continuation block for code after the if. 2077 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2078 } 2079 2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2081 llvm::Function *OutlinedFn, 2082 ArrayRef<llvm::Value *> CapturedVars, 2083 const Expr *IfCond, 2084 llvm::Value *NumThreads) { 2085 if (!CGF.HaveInsertPoint()) 2086 return; 2087 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2088 auto &M = CGM.getModule(); 2089 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2090 this](CodeGenFunction &CGF, PrePostActionTy &) { 2091 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2092 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2093 llvm::Value *Args[] = { 2094 RTLoc, 2095 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2096 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2097 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2098 RealArgs.append(std::begin(Args), std::end(Args)); 2099 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2100 2101 llvm::FunctionCallee RTLFn = 2102 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2103 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2104 }; 2105 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2106 this](CodeGenFunction &CGF, PrePostActionTy &) { 2107 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2108 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2109 // Build calls: 2110 // __kmpc_serialized_parallel(&Loc, GTid); 2111 llvm::Value *Args[] = {RTLoc, ThreadID}; 2112 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2113 M, OMPRTL___kmpc_serialized_parallel), 2114 Args); 2115 2116 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2117 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2118 Address ZeroAddrBound = 2119 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2120 /*Name=*/".bound.zero.addr"); 2121 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2122 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2123 // ThreadId for serialized parallels is 0. 2124 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2125 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2126 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2127 2128 // Ensure we do not inline the function. This is trivially true for the ones 2129 // passed to __kmpc_fork_call but the ones called in serialized regions 2130 // could be inlined. This is not a perfect but it is closer to the invariant 2131 // we want, namely, every data environment starts with a new function. 2132 // TODO: We should pass the if condition to the runtime function and do the 2133 // handling there. Much cleaner code. 2134 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2135 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2136 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2137 2138 // __kmpc_end_serialized_parallel(&Loc, GTid); 2139 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2141 M, OMPRTL___kmpc_end_serialized_parallel), 2142 EndArgs); 2143 }; 2144 if (IfCond) { 2145 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2146 } else { 2147 RegionCodeGenTy ThenRCG(ThenGen); 2148 ThenRCG(CGF); 2149 } 2150 } 2151 2152 // If we're inside an (outlined) parallel region, use the region info's 2153 // thread-ID variable (it is passed in a first argument of the outlined function 2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2155 // regular serial code region, get thread ID by calling kmp_int32 2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2157 // return the address of that temp. 2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2159 SourceLocation Loc) { 2160 if (auto *OMPRegionInfo = 2161 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2162 if (OMPRegionInfo->getThreadIDVariable()) 2163 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2164 2165 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2166 QualType Int32Ty = 2167 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2168 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2169 CGF.EmitStoreOfScalar(ThreadID, 2170 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2171 2172 return ThreadIDTemp; 2173 } 2174 2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2176 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2177 SmallString<256> Buffer; 2178 llvm::raw_svector_ostream Out(Buffer); 2179 Out << Name; 2180 StringRef RuntimeName = Out.str(); 2181 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2182 if (Elem.second) { 2183 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2184 "OMP internal variable has different type than requested"); 2185 return &*Elem.second; 2186 } 2187 2188 return Elem.second = new llvm::GlobalVariable( 2189 CGM.getModule(), Ty, /*IsConstant*/ false, 2190 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2191 Elem.first(), /*InsertBefore=*/nullptr, 2192 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2193 } 2194 2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2196 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2197 std::string Name = getName({Prefix, "var"}); 2198 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2199 } 2200 2201 namespace { 2202 /// Common pre(post)-action for different OpenMP constructs. 2203 class CommonActionTy final : public PrePostActionTy { 2204 llvm::FunctionCallee EnterCallee; 2205 ArrayRef<llvm::Value *> EnterArgs; 2206 llvm::FunctionCallee ExitCallee; 2207 ArrayRef<llvm::Value *> ExitArgs; 2208 bool Conditional; 2209 llvm::BasicBlock *ContBlock = nullptr; 2210 2211 public: 2212 CommonActionTy(llvm::FunctionCallee EnterCallee, 2213 ArrayRef<llvm::Value *> EnterArgs, 2214 llvm::FunctionCallee ExitCallee, 2215 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2216 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2217 ExitArgs(ExitArgs), Conditional(Conditional) {} 2218 void Enter(CodeGenFunction &CGF) override { 2219 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2220 if (Conditional) { 2221 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2222 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2223 ContBlock = CGF.createBasicBlock("omp_if.end"); 2224 // Generate the branch (If-stmt) 2225 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2226 CGF.EmitBlock(ThenBlock); 2227 } 2228 } 2229 void Done(CodeGenFunction &CGF) { 2230 // Emit the rest of blocks/branches 2231 CGF.EmitBranch(ContBlock); 2232 CGF.EmitBlock(ContBlock, true); 2233 } 2234 void Exit(CodeGenFunction &CGF) override { 2235 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2236 } 2237 }; 2238 } // anonymous namespace 2239 2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2241 StringRef CriticalName, 2242 const RegionCodeGenTy &CriticalOpGen, 2243 SourceLocation Loc, const Expr *Hint) { 2244 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2245 // CriticalOpGen(); 2246 // __kmpc_end_critical(ident_t *, gtid, Lock); 2247 // Prepare arguments and build a call to __kmpc_critical 2248 if (!CGF.HaveInsertPoint()) 2249 return; 2250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2251 getCriticalRegionLock(CriticalName)}; 2252 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2253 std::end(Args)); 2254 if (Hint) { 2255 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2256 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2257 } 2258 CommonActionTy Action( 2259 OMPBuilder.getOrCreateRuntimeFunction( 2260 CGM.getModule(), 2261 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2262 EnterArgs, 2263 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2264 OMPRTL___kmpc_end_critical), 2265 Args); 2266 CriticalOpGen.setAction(Action); 2267 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2268 } 2269 2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2271 const RegionCodeGenTy &MasterOpGen, 2272 SourceLocation Loc) { 2273 if (!CGF.HaveInsertPoint()) 2274 return; 2275 // if(__kmpc_master(ident_t *, gtid)) { 2276 // MasterOpGen(); 2277 // __kmpc_end_master(ident_t *, gtid); 2278 // } 2279 // Prepare arguments and build a call to __kmpc_master 2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2281 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2282 CGM.getModule(), OMPRTL___kmpc_master), 2283 Args, 2284 OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_end_master), 2286 Args, 2287 /*Conditional=*/true); 2288 MasterOpGen.setAction(Action); 2289 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2290 Action.Done(CGF); 2291 } 2292 2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2294 const RegionCodeGenTy &MaskedOpGen, 2295 SourceLocation Loc, const Expr *Filter) { 2296 if (!CGF.HaveInsertPoint()) 2297 return; 2298 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2299 // MaskedOpGen(); 2300 // __kmpc_end_masked(iden_t *, gtid); 2301 // } 2302 // Prepare arguments and build a call to __kmpc_masked 2303 llvm::Value *FilterVal = Filter 2304 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2305 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2307 FilterVal}; 2308 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2309 getThreadID(CGF, Loc)}; 2310 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2311 CGM.getModule(), OMPRTL___kmpc_masked), 2312 Args, 2313 OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_end_masked), 2315 ArgsEnd, 2316 /*Conditional=*/true); 2317 MaskedOpGen.setAction(Action); 2318 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2319 Action.Done(CGF); 2320 } 2321 2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2323 SourceLocation Loc) { 2324 if (!CGF.HaveInsertPoint()) 2325 return; 2326 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2327 OMPBuilder.createTaskyield(CGF.Builder); 2328 } else { 2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2330 llvm::Value *Args[] = { 2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2333 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2335 Args); 2336 } 2337 2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2339 Region->emitUntiedSwitch(CGF); 2340 } 2341 2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &TaskgroupOpGen, 2344 SourceLocation Loc) { 2345 if (!CGF.HaveInsertPoint()) 2346 return; 2347 // __kmpc_taskgroup(ident_t *, gtid); 2348 // TaskgroupOpGen(); 2349 // __kmpc_end_taskgroup(ident_t *, gtid); 2350 // Prepare arguments and build a call to __kmpc_taskgroup 2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2352 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2353 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2354 Args, 2355 OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2357 Args); 2358 TaskgroupOpGen.setAction(Action); 2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2360 } 2361 2362 /// Given an array of pointers to variables, project the address of a 2363 /// given variable. 2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2365 unsigned Index, const VarDecl *Var) { 2366 // Pull out the pointer to the variable. 2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2369 2370 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2371 return Address( 2372 CGF.Builder.CreateBitCast( 2373 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2374 ElemTy, CGF.getContext().getDeclAlign(Var)); 2375 } 2376 2377 static llvm::Value *emitCopyprivateCopyFunction( 2378 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2379 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2380 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2381 SourceLocation Loc) { 2382 ASTContext &C = CGM.getContext(); 2383 // void copy_func(void *LHSArg, void *RHSArg); 2384 FunctionArgList Args; 2385 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2386 ImplicitParamDecl::Other); 2387 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2388 ImplicitParamDecl::Other); 2389 Args.push_back(&LHSArg); 2390 Args.push_back(&RHSArg); 2391 const auto &CGFI = 2392 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2393 std::string Name = 2394 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2395 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2396 llvm::GlobalValue::InternalLinkage, Name, 2397 &CGM.getModule()); 2398 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2399 Fn->setDoesNotRecurse(); 2400 CodeGenFunction CGF(CGM); 2401 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2402 // Dest = (void*[n])(LHSArg); 2403 // Src = (void*[n])(RHSArg); 2404 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2405 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2406 ArgsElemType->getPointerTo()), 2407 ArgsElemType, CGF.getPointerAlign()); 2408 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2410 ArgsElemType->getPointerTo()), 2411 ArgsElemType, CGF.getPointerAlign()); 2412 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2413 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2414 // ... 2415 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2416 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2417 const auto *DestVar = 2418 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2419 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2420 2421 const auto *SrcVar = 2422 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2423 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2424 2425 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2426 QualType Type = VD->getType(); 2427 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2428 } 2429 CGF.FinishFunction(); 2430 return Fn; 2431 } 2432 2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2434 const RegionCodeGenTy &SingleOpGen, 2435 SourceLocation Loc, 2436 ArrayRef<const Expr *> CopyprivateVars, 2437 ArrayRef<const Expr *> SrcExprs, 2438 ArrayRef<const Expr *> DstExprs, 2439 ArrayRef<const Expr *> AssignmentOps) { 2440 if (!CGF.HaveInsertPoint()) 2441 return; 2442 assert(CopyprivateVars.size() == SrcExprs.size() && 2443 CopyprivateVars.size() == DstExprs.size() && 2444 CopyprivateVars.size() == AssignmentOps.size()); 2445 ASTContext &C = CGM.getContext(); 2446 // int32 did_it = 0; 2447 // if(__kmpc_single(ident_t *, gtid)) { 2448 // SingleOpGen(); 2449 // __kmpc_end_single(ident_t *, gtid); 2450 // did_it = 1; 2451 // } 2452 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2453 // <copy_func>, did_it); 2454 2455 Address DidIt = Address::invalid(); 2456 if (!CopyprivateVars.empty()) { 2457 // int32 did_it = 0; 2458 QualType KmpInt32Ty = 2459 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2460 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2461 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2462 } 2463 // Prepare arguments and build a call to __kmpc_single 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_single), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_single), 2470 Args, 2471 /*Conditional=*/true); 2472 SingleOpGen.setAction(Action); 2473 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2474 if (DidIt.isValid()) { 2475 // did_it = 1; 2476 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2477 } 2478 Action.Done(CGF); 2479 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2480 // <copy_func>, did_it); 2481 if (DidIt.isValid()) { 2482 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2483 QualType CopyprivateArrayTy = C.getConstantArrayType( 2484 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2485 /*IndexTypeQuals=*/0); 2486 // Create a list of all private variables for copyprivate. 2487 Address CopyprivateList = 2488 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2489 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2490 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2491 CGF.Builder.CreateStore( 2492 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2493 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2494 CGF.VoidPtrTy), 2495 Elem); 2496 } 2497 // Build function that copies private values from single region to all other 2498 // threads in the corresponding parallel region. 2499 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2500 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2501 SrcExprs, DstExprs, AssignmentOps, Loc); 2502 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2503 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2504 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2505 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2506 llvm::Value *Args[] = { 2507 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2508 getThreadID(CGF, Loc), // i32 <gtid> 2509 BufSize, // size_t <buf_size> 2510 CL.getPointer(), // void *<copyprivate list> 2511 CpyFn, // void (*) (void *, void *) <copy_func> 2512 DidItVal // i32 did_it 2513 }; 2514 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2515 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2516 Args); 2517 } 2518 } 2519 2520 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2521 const RegionCodeGenTy &OrderedOpGen, 2522 SourceLocation Loc, bool IsThreads) { 2523 if (!CGF.HaveInsertPoint()) 2524 return; 2525 // __kmpc_ordered(ident_t *, gtid); 2526 // OrderedOpGen(); 2527 // __kmpc_end_ordered(ident_t *, gtid); 2528 // Prepare arguments and build a call to __kmpc_ordered 2529 if (IsThreads) { 2530 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2531 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2532 CGM.getModule(), OMPRTL___kmpc_ordered), 2533 Args, 2534 OMPBuilder.getOrCreateRuntimeFunction( 2535 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2536 Args); 2537 OrderedOpGen.setAction(Action); 2538 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2539 return; 2540 } 2541 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2542 } 2543 2544 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2545 unsigned Flags; 2546 if (Kind == OMPD_for) 2547 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2548 else if (Kind == OMPD_sections) 2549 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2550 else if (Kind == OMPD_single) 2551 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2552 else if (Kind == OMPD_barrier) 2553 Flags = OMP_IDENT_BARRIER_EXPL; 2554 else 2555 Flags = OMP_IDENT_BARRIER_IMPL; 2556 return Flags; 2557 } 2558 2559 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2560 CodeGenFunction &CGF, const OMPLoopDirective &S, 2561 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2562 // Check if the loop directive is actually a doacross loop directive. In this 2563 // case choose static, 1 schedule. 2564 if (llvm::any_of( 2565 S.getClausesOfKind<OMPOrderedClause>(), 2566 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2567 ScheduleKind = OMPC_SCHEDULE_static; 2568 // Chunk size is 1 in this case. 2569 llvm::APInt ChunkSize(32, 1); 2570 ChunkExpr = IntegerLiteral::Create( 2571 CGF.getContext(), ChunkSize, 2572 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2573 SourceLocation()); 2574 } 2575 } 2576 2577 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2578 OpenMPDirectiveKind Kind, bool EmitChecks, 2579 bool ForceSimpleCall) { 2580 // Check if we should use the OMPBuilder 2581 auto *OMPRegionInfo = 2582 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2583 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2584 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2585 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2586 return; 2587 } 2588 2589 if (!CGF.HaveInsertPoint()) 2590 return; 2591 // Build call __kmpc_cancel_barrier(loc, thread_id); 2592 // Build call __kmpc_barrier(loc, thread_id); 2593 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2594 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2595 // thread_id); 2596 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2597 getThreadID(CGF, Loc)}; 2598 if (OMPRegionInfo) { 2599 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2600 llvm::Value *Result = CGF.EmitRuntimeCall( 2601 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2602 OMPRTL___kmpc_cancel_barrier), 2603 Args); 2604 if (EmitChecks) { 2605 // if (__kmpc_cancel_barrier()) { 2606 // exit from construct; 2607 // } 2608 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2609 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2610 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2611 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2612 CGF.EmitBlock(ExitBB); 2613 // exit from construct; 2614 CodeGenFunction::JumpDest CancelDestination = 2615 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2616 CGF.EmitBranchThroughCleanup(CancelDestination); 2617 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2618 } 2619 return; 2620 } 2621 } 2622 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2623 CGM.getModule(), OMPRTL___kmpc_barrier), 2624 Args); 2625 } 2626 2627 /// Map the OpenMP loop schedule to the runtime enumeration. 2628 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2629 bool Chunked, bool Ordered) { 2630 switch (ScheduleKind) { 2631 case OMPC_SCHEDULE_static: 2632 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2633 : (Ordered ? OMP_ord_static : OMP_sch_static); 2634 case OMPC_SCHEDULE_dynamic: 2635 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2636 case OMPC_SCHEDULE_guided: 2637 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2638 case OMPC_SCHEDULE_runtime: 2639 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2640 case OMPC_SCHEDULE_auto: 2641 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2642 case OMPC_SCHEDULE_unknown: 2643 assert(!Chunked && "chunk was specified but schedule kind not known"); 2644 return Ordered ? OMP_ord_static : OMP_sch_static; 2645 } 2646 llvm_unreachable("Unexpected runtime schedule"); 2647 } 2648 2649 /// Map the OpenMP distribute schedule to the runtime enumeration. 2650 static OpenMPSchedType 2651 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2652 // only static is allowed for dist_schedule 2653 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2654 } 2655 2656 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2657 bool Chunked) const { 2658 OpenMPSchedType Schedule = 2659 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2660 return Schedule == OMP_sch_static; 2661 } 2662 2663 bool CGOpenMPRuntime::isStaticNonchunked( 2664 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2665 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2666 return Schedule == OMP_dist_sch_static; 2667 } 2668 2669 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2670 bool Chunked) const { 2671 OpenMPSchedType Schedule = 2672 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2673 return Schedule == OMP_sch_static_chunked; 2674 } 2675 2676 bool CGOpenMPRuntime::isStaticChunked( 2677 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2678 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2679 return Schedule == OMP_dist_sch_static_chunked; 2680 } 2681 2682 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2683 OpenMPSchedType Schedule = 2684 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2685 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2686 return Schedule != OMP_sch_static; 2687 } 2688 2689 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2690 OpenMPScheduleClauseModifier M1, 2691 OpenMPScheduleClauseModifier M2) { 2692 int Modifier = 0; 2693 switch (M1) { 2694 case OMPC_SCHEDULE_MODIFIER_monotonic: 2695 Modifier = OMP_sch_modifier_monotonic; 2696 break; 2697 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2698 Modifier = OMP_sch_modifier_nonmonotonic; 2699 break; 2700 case OMPC_SCHEDULE_MODIFIER_simd: 2701 if (Schedule == OMP_sch_static_chunked) 2702 Schedule = OMP_sch_static_balanced_chunked; 2703 break; 2704 case OMPC_SCHEDULE_MODIFIER_last: 2705 case OMPC_SCHEDULE_MODIFIER_unknown: 2706 break; 2707 } 2708 switch (M2) { 2709 case OMPC_SCHEDULE_MODIFIER_monotonic: 2710 Modifier = OMP_sch_modifier_monotonic; 2711 break; 2712 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2713 Modifier = OMP_sch_modifier_nonmonotonic; 2714 break; 2715 case OMPC_SCHEDULE_MODIFIER_simd: 2716 if (Schedule == OMP_sch_static_chunked) 2717 Schedule = OMP_sch_static_balanced_chunked; 2718 break; 2719 case OMPC_SCHEDULE_MODIFIER_last: 2720 case OMPC_SCHEDULE_MODIFIER_unknown: 2721 break; 2722 } 2723 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2724 // If the static schedule kind is specified or if the ordered clause is 2725 // specified, and if the nonmonotonic modifier is not specified, the effect is 2726 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2727 // modifier is specified, the effect is as if the nonmonotonic modifier is 2728 // specified. 2729 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2730 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2731 Schedule == OMP_sch_static_balanced_chunked || 2732 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2733 Schedule == OMP_dist_sch_static_chunked || 2734 Schedule == OMP_dist_sch_static)) 2735 Modifier = OMP_sch_modifier_nonmonotonic; 2736 } 2737 return Schedule | Modifier; 2738 } 2739 2740 void CGOpenMPRuntime::emitForDispatchInit( 2741 CodeGenFunction &CGF, SourceLocation Loc, 2742 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2743 bool Ordered, const DispatchRTInput &DispatchValues) { 2744 if (!CGF.HaveInsertPoint()) 2745 return; 2746 OpenMPSchedType Schedule = getRuntimeSchedule( 2747 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2748 assert(Ordered || 2749 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2750 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2751 Schedule != OMP_sch_static_balanced_chunked)); 2752 // Call __kmpc_dispatch_init( 2753 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2754 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2755 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2756 2757 // If the Chunk was not specified in the clause - use default value 1. 2758 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2759 : CGF.Builder.getIntN(IVSize, 1); 2760 llvm::Value *Args[] = { 2761 emitUpdateLocation(CGF, Loc), 2762 getThreadID(CGF, Loc), 2763 CGF.Builder.getInt32(addMonoNonMonoModifier( 2764 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2765 DispatchValues.LB, // Lower 2766 DispatchValues.UB, // Upper 2767 CGF.Builder.getIntN(IVSize, 1), // Stride 2768 Chunk // Chunk 2769 }; 2770 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2771 } 2772 2773 static void emitForStaticInitCall( 2774 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2775 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2776 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2777 const CGOpenMPRuntime::StaticRTInput &Values) { 2778 if (!CGF.HaveInsertPoint()) 2779 return; 2780 2781 assert(!Values.Ordered); 2782 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2783 Schedule == OMP_sch_static_balanced_chunked || 2784 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2785 Schedule == OMP_dist_sch_static || 2786 Schedule == OMP_dist_sch_static_chunked); 2787 2788 // Call __kmpc_for_static_init( 2789 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2790 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2791 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2792 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2793 llvm::Value *Chunk = Values.Chunk; 2794 if (Chunk == nullptr) { 2795 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2796 Schedule == OMP_dist_sch_static) && 2797 "expected static non-chunked schedule"); 2798 // If the Chunk was not specified in the clause - use default value 1. 2799 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2800 } else { 2801 assert((Schedule == OMP_sch_static_chunked || 2802 Schedule == OMP_sch_static_balanced_chunked || 2803 Schedule == OMP_ord_static_chunked || 2804 Schedule == OMP_dist_sch_static_chunked) && 2805 "expected static chunked schedule"); 2806 } 2807 llvm::Value *Args[] = { 2808 UpdateLocation, 2809 ThreadId, 2810 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2811 M2)), // Schedule type 2812 Values.IL.getPointer(), // &isLastIter 2813 Values.LB.getPointer(), // &LB 2814 Values.UB.getPointer(), // &UB 2815 Values.ST.getPointer(), // &Stride 2816 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2817 Chunk // Chunk 2818 }; 2819 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2820 } 2821 2822 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2823 SourceLocation Loc, 2824 OpenMPDirectiveKind DKind, 2825 const OpenMPScheduleTy &ScheduleKind, 2826 const StaticRTInput &Values) { 2827 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2828 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2829 assert(isOpenMPWorksharingDirective(DKind) && 2830 "Expected loop-based or sections-based directive."); 2831 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2832 isOpenMPLoopDirective(DKind) 2833 ? OMP_IDENT_WORK_LOOP 2834 : OMP_IDENT_WORK_SECTIONS); 2835 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2836 llvm::FunctionCallee StaticInitFunction = 2837 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2838 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2839 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2840 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2841 } 2842 2843 void CGOpenMPRuntime::emitDistributeStaticInit( 2844 CodeGenFunction &CGF, SourceLocation Loc, 2845 OpenMPDistScheduleClauseKind SchedKind, 2846 const CGOpenMPRuntime::StaticRTInput &Values) { 2847 OpenMPSchedType ScheduleNum = 2848 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2849 llvm::Value *UpdatedLocation = 2850 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2851 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2852 llvm::FunctionCallee StaticInitFunction; 2853 bool isGPUDistribute = 2854 CGM.getLangOpts().OpenMPIsDevice && 2855 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2856 StaticInitFunction = createForStaticInitFunction( 2857 Values.IVSize, Values.IVSigned, isGPUDistribute); 2858 2859 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2860 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2861 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2862 } 2863 2864 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2865 SourceLocation Loc, 2866 OpenMPDirectiveKind DKind) { 2867 if (!CGF.HaveInsertPoint()) 2868 return; 2869 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2870 llvm::Value *Args[] = { 2871 emitUpdateLocation(CGF, Loc, 2872 isOpenMPDistributeDirective(DKind) 2873 ? OMP_IDENT_WORK_DISTRIBUTE 2874 : isOpenMPLoopDirective(DKind) 2875 ? OMP_IDENT_WORK_LOOP 2876 : OMP_IDENT_WORK_SECTIONS), 2877 getThreadID(CGF, Loc)}; 2878 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2879 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2880 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2881 CGF.EmitRuntimeCall( 2882 OMPBuilder.getOrCreateRuntimeFunction( 2883 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2884 Args); 2885 else 2886 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2887 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2888 Args); 2889 } 2890 2891 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2892 SourceLocation Loc, 2893 unsigned IVSize, 2894 bool IVSigned) { 2895 if (!CGF.HaveInsertPoint()) 2896 return; 2897 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2898 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2899 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2900 } 2901 2902 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2903 SourceLocation Loc, unsigned IVSize, 2904 bool IVSigned, Address IL, 2905 Address LB, Address UB, 2906 Address ST) { 2907 // Call __kmpc_dispatch_next( 2908 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2909 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2910 // kmp_int[32|64] *p_stride); 2911 llvm::Value *Args[] = { 2912 emitUpdateLocation(CGF, Loc), 2913 getThreadID(CGF, Loc), 2914 IL.getPointer(), // &isLastIter 2915 LB.getPointer(), // &Lower 2916 UB.getPointer(), // &Upper 2917 ST.getPointer() // &Stride 2918 }; 2919 llvm::Value *Call = 2920 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2921 return CGF.EmitScalarConversion( 2922 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2923 CGF.getContext().BoolTy, Loc); 2924 } 2925 2926 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2927 llvm::Value *NumThreads, 2928 SourceLocation Loc) { 2929 if (!CGF.HaveInsertPoint()) 2930 return; 2931 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2932 llvm::Value *Args[] = { 2933 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2934 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2935 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2936 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2937 Args); 2938 } 2939 2940 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2941 ProcBindKind ProcBind, 2942 SourceLocation Loc) { 2943 if (!CGF.HaveInsertPoint()) 2944 return; 2945 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2946 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2947 llvm::Value *Args[] = { 2948 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2949 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2950 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2951 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2952 Args); 2953 } 2954 2955 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2956 SourceLocation Loc, llvm::AtomicOrdering AO) { 2957 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2958 OMPBuilder.createFlush(CGF.Builder); 2959 } else { 2960 if (!CGF.HaveInsertPoint()) 2961 return; 2962 // Build call void __kmpc_flush(ident_t *loc) 2963 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2964 CGM.getModule(), OMPRTL___kmpc_flush), 2965 emitUpdateLocation(CGF, Loc)); 2966 } 2967 } 2968 2969 namespace { 2970 /// Indexes of fields for type kmp_task_t. 2971 enum KmpTaskTFields { 2972 /// List of shared variables. 2973 KmpTaskTShareds, 2974 /// Task routine. 2975 KmpTaskTRoutine, 2976 /// Partition id for the untied tasks. 2977 KmpTaskTPartId, 2978 /// Function with call of destructors for private variables. 2979 Data1, 2980 /// Task priority. 2981 Data2, 2982 /// (Taskloops only) Lower bound. 2983 KmpTaskTLowerBound, 2984 /// (Taskloops only) Upper bound. 2985 KmpTaskTUpperBound, 2986 /// (Taskloops only) Stride. 2987 KmpTaskTStride, 2988 /// (Taskloops only) Is last iteration flag. 2989 KmpTaskTLastIter, 2990 /// (Taskloops only) Reduction data. 2991 KmpTaskTReductions, 2992 }; 2993 } // anonymous namespace 2994 2995 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2996 return OffloadEntriesTargetRegion.empty() && 2997 OffloadEntriesDeviceGlobalVar.empty(); 2998 } 2999 3000 /// Initialize target region entry. 3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3002 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3003 StringRef ParentName, unsigned LineNum, 3004 unsigned Order) { 3005 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3006 "only required for the device " 3007 "code generation."); 3008 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3009 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3010 OMPTargetRegionEntryTargetRegion); 3011 ++OffloadingEntriesNum; 3012 } 3013 3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3015 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3016 StringRef ParentName, unsigned LineNum, 3017 llvm::Constant *Addr, llvm::Constant *ID, 3018 OMPTargetRegionEntryKind Flags) { 3019 // If we are emitting code for a target, the entry is already initialized, 3020 // only has to be registered. 3021 if (CGM.getLangOpts().OpenMPIsDevice) { 3022 // This could happen if the device compilation is invoked standalone. 3023 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3024 return; 3025 auto &Entry = 3026 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3027 Entry.setAddress(Addr); 3028 Entry.setID(ID); 3029 Entry.setFlags(Flags); 3030 } else { 3031 if (Flags == 3032 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3033 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3034 /*IgnoreAddressId*/ true)) 3035 return; 3036 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3037 "Target region entry already registered!"); 3038 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3039 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3040 ++OffloadingEntriesNum; 3041 } 3042 } 3043 3044 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3045 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3046 bool IgnoreAddressId) const { 3047 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3048 if (PerDevice == OffloadEntriesTargetRegion.end()) 3049 return false; 3050 auto PerFile = PerDevice->second.find(FileID); 3051 if (PerFile == PerDevice->second.end()) 3052 return false; 3053 auto PerParentName = PerFile->second.find(ParentName); 3054 if (PerParentName == PerFile->second.end()) 3055 return false; 3056 auto PerLine = PerParentName->second.find(LineNum); 3057 if (PerLine == PerParentName->second.end()) 3058 return false; 3059 // Fail if this entry is already registered. 3060 if (!IgnoreAddressId && 3061 (PerLine->second.getAddress() || PerLine->second.getID())) 3062 return false; 3063 return true; 3064 } 3065 3066 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3067 const OffloadTargetRegionEntryInfoActTy &Action) { 3068 // Scan all target region entries and perform the provided action. 3069 for (const auto &D : OffloadEntriesTargetRegion) 3070 for (const auto &F : D.second) 3071 for (const auto &P : F.second) 3072 for (const auto &L : P.second) 3073 Action(D.first, F.first, P.first(), L.first, L.second); 3074 } 3075 3076 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3077 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3078 OMPTargetGlobalVarEntryKind Flags, 3079 unsigned Order) { 3080 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3081 "only required for the device " 3082 "code generation."); 3083 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3084 ++OffloadingEntriesNum; 3085 } 3086 3087 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3088 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3089 CharUnits VarSize, 3090 OMPTargetGlobalVarEntryKind Flags, 3091 llvm::GlobalValue::LinkageTypes Linkage) { 3092 if (CGM.getLangOpts().OpenMPIsDevice) { 3093 // This could happen if the device compilation is invoked standalone. 3094 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3095 return; 3096 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3097 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3098 if (Entry.getVarSize().isZero()) { 3099 Entry.setVarSize(VarSize); 3100 Entry.setLinkage(Linkage); 3101 } 3102 return; 3103 } 3104 Entry.setVarSize(VarSize); 3105 Entry.setLinkage(Linkage); 3106 Entry.setAddress(Addr); 3107 } else { 3108 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3109 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3110 assert(Entry.isValid() && Entry.getFlags() == Flags && 3111 "Entry not initialized!"); 3112 if (Entry.getVarSize().isZero()) { 3113 Entry.setVarSize(VarSize); 3114 Entry.setLinkage(Linkage); 3115 } 3116 return; 3117 } 3118 OffloadEntriesDeviceGlobalVar.try_emplace( 3119 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3120 ++OffloadingEntriesNum; 3121 } 3122 } 3123 3124 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3125 actOnDeviceGlobalVarEntriesInfo( 3126 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3127 // Scan all target region entries and perform the provided action. 3128 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3129 Action(E.getKey(), E.getValue()); 3130 } 3131 3132 void CGOpenMPRuntime::createOffloadEntry( 3133 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3134 llvm::GlobalValue::LinkageTypes Linkage) { 3135 StringRef Name = Addr->getName(); 3136 llvm::Module &M = CGM.getModule(); 3137 llvm::LLVMContext &C = M.getContext(); 3138 3139 // Create constant string with the name. 3140 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3141 3142 std::string StringName = getName({"omp_offloading", "entry_name"}); 3143 auto *Str = new llvm::GlobalVariable( 3144 M, StrPtrInit->getType(), /*isConstant=*/true, 3145 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3146 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3147 3148 llvm::Constant *Data[] = { 3149 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3150 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3151 llvm::ConstantInt::get(CGM.SizeTy, Size), 3152 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3153 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3154 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3155 llvm::GlobalVariable *Entry = createGlobalStruct( 3156 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3157 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3158 3159 // The entry has to be created in the section the linker expects it to be. 3160 Entry->setSection("omp_offloading_entries"); 3161 } 3162 3163 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3164 // Emit the offloading entries and metadata so that the device codegen side 3165 // can easily figure out what to emit. The produced metadata looks like 3166 // this: 3167 // 3168 // !omp_offload.info = !{!1, ...} 3169 // 3170 // Right now we only generate metadata for function that contain target 3171 // regions. 3172 3173 // If we are in simd mode or there are no entries, we don't need to do 3174 // anything. 3175 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3176 return; 3177 3178 llvm::Module &M = CGM.getModule(); 3179 llvm::LLVMContext &C = M.getContext(); 3180 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3181 SourceLocation, StringRef>, 3182 16> 3183 OrderedEntries(OffloadEntriesInfoManager.size()); 3184 llvm::SmallVector<StringRef, 16> ParentFunctions( 3185 OffloadEntriesInfoManager.size()); 3186 3187 // Auxiliary methods to create metadata values and strings. 3188 auto &&GetMDInt = [this](unsigned V) { 3189 return llvm::ConstantAsMetadata::get( 3190 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3191 }; 3192 3193 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3194 3195 // Create the offloading info metadata node. 3196 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3197 3198 // Create function that emits metadata for each target region entry; 3199 auto &&TargetRegionMetadataEmitter = 3200 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3201 &GetMDString]( 3202 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3203 unsigned Line, 3204 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3205 // Generate metadata for target regions. Each entry of this metadata 3206 // contains: 3207 // - Entry 0 -> Kind of this type of metadata (0). 3208 // - Entry 1 -> Device ID of the file where the entry was identified. 3209 // - Entry 2 -> File ID of the file where the entry was identified. 3210 // - Entry 3 -> Mangled name of the function where the entry was 3211 // identified. 3212 // - Entry 4 -> Line in the file where the entry was identified. 3213 // - Entry 5 -> Order the entry was created. 3214 // The first element of the metadata node is the kind. 3215 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3216 GetMDInt(FileID), GetMDString(ParentName), 3217 GetMDInt(Line), GetMDInt(E.getOrder())}; 3218 3219 SourceLocation Loc; 3220 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3221 E = CGM.getContext().getSourceManager().fileinfo_end(); 3222 I != E; ++I) { 3223 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3224 I->getFirst()->getUniqueID().getFile() == FileID) { 3225 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3226 I->getFirst(), Line, 1); 3227 break; 3228 } 3229 } 3230 // Save this entry in the right position of the ordered entries array. 3231 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3232 ParentFunctions[E.getOrder()] = ParentName; 3233 3234 // Add metadata to the named metadata node. 3235 MD->addOperand(llvm::MDNode::get(C, Ops)); 3236 }; 3237 3238 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3239 TargetRegionMetadataEmitter); 3240 3241 // Create function that emits metadata for each device global variable entry; 3242 auto &&DeviceGlobalVarMetadataEmitter = 3243 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3244 MD](StringRef MangledName, 3245 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3246 &E) { 3247 // Generate metadata for global variables. Each entry of this metadata 3248 // contains: 3249 // - Entry 0 -> Kind of this type of metadata (1). 3250 // - Entry 1 -> Mangled name of the variable. 3251 // - Entry 2 -> Declare target kind. 3252 // - Entry 3 -> Order the entry was created. 3253 // The first element of the metadata node is the kind. 3254 llvm::Metadata *Ops[] = { 3255 GetMDInt(E.getKind()), GetMDString(MangledName), 3256 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3257 3258 // Save this entry in the right position of the ordered entries array. 3259 OrderedEntries[E.getOrder()] = 3260 std::make_tuple(&E, SourceLocation(), MangledName); 3261 3262 // Add metadata to the named metadata node. 3263 MD->addOperand(llvm::MDNode::get(C, Ops)); 3264 }; 3265 3266 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3267 DeviceGlobalVarMetadataEmitter); 3268 3269 for (const auto &E : OrderedEntries) { 3270 assert(std::get<0>(E) && "All ordered entries must exist!"); 3271 if (const auto *CE = 3272 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3273 std::get<0>(E))) { 3274 if (!CE->getID() || !CE->getAddress()) { 3275 // Do not blame the entry if the parent funtion is not emitted. 3276 StringRef FnName = ParentFunctions[CE->getOrder()]; 3277 if (!CGM.GetGlobalValue(FnName)) 3278 continue; 3279 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3280 DiagnosticsEngine::Error, 3281 "Offloading entry for target region in %0 is incorrect: either the " 3282 "address or the ID is invalid."); 3283 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3284 continue; 3285 } 3286 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3287 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3288 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3289 OffloadEntryInfoDeviceGlobalVar>( 3290 std::get<0>(E))) { 3291 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3292 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3293 CE->getFlags()); 3294 switch (Flags) { 3295 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3296 if (CGM.getLangOpts().OpenMPIsDevice && 3297 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3298 continue; 3299 if (!CE->getAddress()) { 3300 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3301 DiagnosticsEngine::Error, "Offloading entry for declare target " 3302 "variable %0 is incorrect: the " 3303 "address is invalid."); 3304 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3305 continue; 3306 } 3307 // The vaiable has no definition - no need to add the entry. 3308 if (CE->getVarSize().isZero()) 3309 continue; 3310 break; 3311 } 3312 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3313 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3314 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3315 "Declaret target link address is set."); 3316 if (CGM.getLangOpts().OpenMPIsDevice) 3317 continue; 3318 if (!CE->getAddress()) { 3319 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3320 DiagnosticsEngine::Error, 3321 "Offloading entry for declare target variable is incorrect: the " 3322 "address is invalid."); 3323 CGM.getDiags().Report(DiagID); 3324 continue; 3325 } 3326 break; 3327 } 3328 3329 // Hidden or internal symbols on the device are not externally visible. We 3330 // should not attempt to register them by creating an offloading entry. 3331 if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress())) 3332 if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) 3333 continue; 3334 3335 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3336 CE->getVarSize().getQuantity(), Flags, 3337 CE->getLinkage()); 3338 } else { 3339 llvm_unreachable("Unsupported entry kind."); 3340 } 3341 } 3342 } 3343 3344 /// Loads all the offload entries information from the host IR 3345 /// metadata. 3346 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3347 // If we are in target mode, load the metadata from the host IR. This code has 3348 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3349 3350 if (!CGM.getLangOpts().OpenMPIsDevice) 3351 return; 3352 3353 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3354 return; 3355 3356 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3357 if (auto EC = Buf.getError()) { 3358 CGM.getDiags().Report(diag::err_cannot_open_file) 3359 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3360 return; 3361 } 3362 3363 llvm::LLVMContext C; 3364 auto ME = expectedToErrorOrAndEmitErrors( 3365 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3366 3367 if (auto EC = ME.getError()) { 3368 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3369 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3370 CGM.getDiags().Report(DiagID) 3371 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3372 return; 3373 } 3374 3375 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3376 if (!MD) 3377 return; 3378 3379 for (llvm::MDNode *MN : MD->operands()) { 3380 auto &&GetMDInt = [MN](unsigned Idx) { 3381 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3382 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3383 }; 3384 3385 auto &&GetMDString = [MN](unsigned Idx) { 3386 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3387 return V->getString(); 3388 }; 3389 3390 switch (GetMDInt(0)) { 3391 default: 3392 llvm_unreachable("Unexpected metadata!"); 3393 break; 3394 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3395 OffloadingEntryInfoTargetRegion: 3396 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3397 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3398 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3399 /*Order=*/GetMDInt(5)); 3400 break; 3401 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3402 OffloadingEntryInfoDeviceGlobalVar: 3403 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3404 /*MangledName=*/GetMDString(1), 3405 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3406 /*Flags=*/GetMDInt(2)), 3407 /*Order=*/GetMDInt(3)); 3408 break; 3409 } 3410 } 3411 } 3412 3413 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3414 if (!KmpRoutineEntryPtrTy) { 3415 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3416 ASTContext &C = CGM.getContext(); 3417 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3418 FunctionProtoType::ExtProtoInfo EPI; 3419 KmpRoutineEntryPtrQTy = C.getPointerType( 3420 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3421 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3422 } 3423 } 3424 3425 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3426 // Make sure the type of the entry is already created. This is the type we 3427 // have to create: 3428 // struct __tgt_offload_entry{ 3429 // void *addr; // Pointer to the offload entry info. 3430 // // (function or global) 3431 // char *name; // Name of the function or global. 3432 // size_t size; // Size of the entry info (0 if it a function). 3433 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3434 // int32_t reserved; // Reserved, to use by the runtime library. 3435 // }; 3436 if (TgtOffloadEntryQTy.isNull()) { 3437 ASTContext &C = CGM.getContext(); 3438 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3439 RD->startDefinition(); 3440 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3441 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3442 addFieldToRecordDecl(C, RD, C.getSizeType()); 3443 addFieldToRecordDecl( 3444 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3445 addFieldToRecordDecl( 3446 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3447 RD->completeDefinition(); 3448 RD->addAttr(PackedAttr::CreateImplicit(C)); 3449 TgtOffloadEntryQTy = C.getRecordType(RD); 3450 } 3451 return TgtOffloadEntryQTy; 3452 } 3453 3454 namespace { 3455 struct PrivateHelpersTy { 3456 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3457 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3458 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3459 PrivateElemInit(PrivateElemInit) {} 3460 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3461 const Expr *OriginalRef = nullptr; 3462 const VarDecl *Original = nullptr; 3463 const VarDecl *PrivateCopy = nullptr; 3464 const VarDecl *PrivateElemInit = nullptr; 3465 bool isLocalPrivate() const { 3466 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3467 } 3468 }; 3469 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3470 } // anonymous namespace 3471 3472 static bool isAllocatableDecl(const VarDecl *VD) { 3473 const VarDecl *CVD = VD->getCanonicalDecl(); 3474 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3475 return false; 3476 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3477 // Use the default allocation. 3478 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3479 !AA->getAllocator()); 3480 } 3481 3482 static RecordDecl * 3483 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3484 if (!Privates.empty()) { 3485 ASTContext &C = CGM.getContext(); 3486 // Build struct .kmp_privates_t. { 3487 // /* private vars */ 3488 // }; 3489 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3490 RD->startDefinition(); 3491 for (const auto &Pair : Privates) { 3492 const VarDecl *VD = Pair.second.Original; 3493 QualType Type = VD->getType().getNonReferenceType(); 3494 // If the private variable is a local variable with lvalue ref type, 3495 // allocate the pointer instead of the pointee type. 3496 if (Pair.second.isLocalPrivate()) { 3497 if (VD->getType()->isLValueReferenceType()) 3498 Type = C.getPointerType(Type); 3499 if (isAllocatableDecl(VD)) 3500 Type = C.getPointerType(Type); 3501 } 3502 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3503 if (VD->hasAttrs()) { 3504 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3505 E(VD->getAttrs().end()); 3506 I != E; ++I) 3507 FD->addAttr(*I); 3508 } 3509 } 3510 RD->completeDefinition(); 3511 return RD; 3512 } 3513 return nullptr; 3514 } 3515 3516 static RecordDecl * 3517 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3518 QualType KmpInt32Ty, 3519 QualType KmpRoutineEntryPointerQTy) { 3520 ASTContext &C = CGM.getContext(); 3521 // Build struct kmp_task_t { 3522 // void * shareds; 3523 // kmp_routine_entry_t routine; 3524 // kmp_int32 part_id; 3525 // kmp_cmplrdata_t data1; 3526 // kmp_cmplrdata_t data2; 3527 // For taskloops additional fields: 3528 // kmp_uint64 lb; 3529 // kmp_uint64 ub; 3530 // kmp_int64 st; 3531 // kmp_int32 liter; 3532 // void * reductions; 3533 // }; 3534 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3535 UD->startDefinition(); 3536 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3537 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3538 UD->completeDefinition(); 3539 QualType KmpCmplrdataTy = C.getRecordType(UD); 3540 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3541 RD->startDefinition(); 3542 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3543 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3544 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3545 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3546 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3547 if (isOpenMPTaskLoopDirective(Kind)) { 3548 QualType KmpUInt64Ty = 3549 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3550 QualType KmpInt64Ty = 3551 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3552 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3553 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3554 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3555 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3556 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3557 } 3558 RD->completeDefinition(); 3559 return RD; 3560 } 3561 3562 static RecordDecl * 3563 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3564 ArrayRef<PrivateDataTy> Privates) { 3565 ASTContext &C = CGM.getContext(); 3566 // Build struct kmp_task_t_with_privates { 3567 // kmp_task_t task_data; 3568 // .kmp_privates_t. privates; 3569 // }; 3570 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3571 RD->startDefinition(); 3572 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3573 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3574 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3575 RD->completeDefinition(); 3576 return RD; 3577 } 3578 3579 /// Emit a proxy function which accepts kmp_task_t as the second 3580 /// argument. 3581 /// \code 3582 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3583 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3584 /// For taskloops: 3585 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3586 /// tt->reductions, tt->shareds); 3587 /// return 0; 3588 /// } 3589 /// \endcode 3590 static llvm::Function * 3591 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3592 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3593 QualType KmpTaskTWithPrivatesPtrQTy, 3594 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3595 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3596 llvm::Value *TaskPrivatesMap) { 3597 ASTContext &C = CGM.getContext(); 3598 FunctionArgList Args; 3599 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3600 ImplicitParamDecl::Other); 3601 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3602 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3603 ImplicitParamDecl::Other); 3604 Args.push_back(&GtidArg); 3605 Args.push_back(&TaskTypeArg); 3606 const auto &TaskEntryFnInfo = 3607 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3608 llvm::FunctionType *TaskEntryTy = 3609 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3610 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3611 auto *TaskEntry = llvm::Function::Create( 3612 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3613 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3614 TaskEntry->setDoesNotRecurse(); 3615 CodeGenFunction CGF(CGM); 3616 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3617 Loc, Loc); 3618 3619 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3620 // tt, 3621 // For taskloops: 3622 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3623 // tt->task_data.shareds); 3624 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3625 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3626 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3627 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3628 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3629 const auto *KmpTaskTWithPrivatesQTyRD = 3630 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3631 LValue Base = 3632 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3633 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3634 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3635 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3636 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3637 3638 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3639 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3640 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3641 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3642 CGF.ConvertTypeForMem(SharedsPtrTy)); 3643 3644 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3645 llvm::Value *PrivatesParam; 3646 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3647 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3648 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3649 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3650 } else { 3651 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3652 } 3653 3654 llvm::Value *CommonArgs[] = { 3655 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3656 CGF.Builder 3657 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3658 CGF.VoidPtrTy, CGF.Int8Ty) 3659 .getPointer()}; 3660 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3661 std::end(CommonArgs)); 3662 if (isOpenMPTaskLoopDirective(Kind)) { 3663 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3664 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3665 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3666 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3667 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3668 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3669 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3670 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3671 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3672 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3673 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3674 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3675 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3676 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3677 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3678 CallArgs.push_back(LBParam); 3679 CallArgs.push_back(UBParam); 3680 CallArgs.push_back(StParam); 3681 CallArgs.push_back(LIParam); 3682 CallArgs.push_back(RParam); 3683 } 3684 CallArgs.push_back(SharedsParam); 3685 3686 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3687 CallArgs); 3688 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3689 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3690 CGF.FinishFunction(); 3691 return TaskEntry; 3692 } 3693 3694 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3695 SourceLocation Loc, 3696 QualType KmpInt32Ty, 3697 QualType KmpTaskTWithPrivatesPtrQTy, 3698 QualType KmpTaskTWithPrivatesQTy) { 3699 ASTContext &C = CGM.getContext(); 3700 FunctionArgList Args; 3701 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3702 ImplicitParamDecl::Other); 3703 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3704 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3705 ImplicitParamDecl::Other); 3706 Args.push_back(&GtidArg); 3707 Args.push_back(&TaskTypeArg); 3708 const auto &DestructorFnInfo = 3709 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3710 llvm::FunctionType *DestructorFnTy = 3711 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3712 std::string Name = 3713 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3714 auto *DestructorFn = 3715 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3716 Name, &CGM.getModule()); 3717 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3718 DestructorFnInfo); 3719 DestructorFn->setDoesNotRecurse(); 3720 CodeGenFunction CGF(CGM); 3721 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3722 Args, Loc, Loc); 3723 3724 LValue Base = CGF.EmitLoadOfPointerLValue( 3725 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3726 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3727 const auto *KmpTaskTWithPrivatesQTyRD = 3728 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3729 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3730 Base = CGF.EmitLValueForField(Base, *FI); 3731 for (const auto *Field : 3732 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3733 if (QualType::DestructionKind DtorKind = 3734 Field->getType().isDestructedType()) { 3735 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3736 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3737 } 3738 } 3739 CGF.FinishFunction(); 3740 return DestructorFn; 3741 } 3742 3743 /// Emit a privates mapping function for correct handling of private and 3744 /// firstprivate variables. 3745 /// \code 3746 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3747 /// **noalias priv1,..., <tyn> **noalias privn) { 3748 /// *priv1 = &.privates.priv1; 3749 /// ...; 3750 /// *privn = &.privates.privn; 3751 /// } 3752 /// \endcode 3753 static llvm::Value * 3754 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3755 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3756 ArrayRef<PrivateDataTy> Privates) { 3757 ASTContext &C = CGM.getContext(); 3758 FunctionArgList Args; 3759 ImplicitParamDecl TaskPrivatesArg( 3760 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3761 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3762 ImplicitParamDecl::Other); 3763 Args.push_back(&TaskPrivatesArg); 3764 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3765 unsigned Counter = 1; 3766 for (const Expr *E : Data.PrivateVars) { 3767 Args.push_back(ImplicitParamDecl::Create( 3768 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3769 C.getPointerType(C.getPointerType(E->getType())) 3770 .withConst() 3771 .withRestrict(), 3772 ImplicitParamDecl::Other)); 3773 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3774 PrivateVarsPos[VD] = Counter; 3775 ++Counter; 3776 } 3777 for (const Expr *E : Data.FirstprivateVars) { 3778 Args.push_back(ImplicitParamDecl::Create( 3779 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3780 C.getPointerType(C.getPointerType(E->getType())) 3781 .withConst() 3782 .withRestrict(), 3783 ImplicitParamDecl::Other)); 3784 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3785 PrivateVarsPos[VD] = Counter; 3786 ++Counter; 3787 } 3788 for (const Expr *E : Data.LastprivateVars) { 3789 Args.push_back(ImplicitParamDecl::Create( 3790 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3791 C.getPointerType(C.getPointerType(E->getType())) 3792 .withConst() 3793 .withRestrict(), 3794 ImplicitParamDecl::Other)); 3795 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3796 PrivateVarsPos[VD] = Counter; 3797 ++Counter; 3798 } 3799 for (const VarDecl *VD : Data.PrivateLocals) { 3800 QualType Ty = VD->getType().getNonReferenceType(); 3801 if (VD->getType()->isLValueReferenceType()) 3802 Ty = C.getPointerType(Ty); 3803 if (isAllocatableDecl(VD)) 3804 Ty = C.getPointerType(Ty); 3805 Args.push_back(ImplicitParamDecl::Create( 3806 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3807 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3808 ImplicitParamDecl::Other)); 3809 PrivateVarsPos[VD] = Counter; 3810 ++Counter; 3811 } 3812 const auto &TaskPrivatesMapFnInfo = 3813 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3814 llvm::FunctionType *TaskPrivatesMapTy = 3815 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3816 std::string Name = 3817 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3818 auto *TaskPrivatesMap = llvm::Function::Create( 3819 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3820 &CGM.getModule()); 3821 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3822 TaskPrivatesMapFnInfo); 3823 if (CGM.getLangOpts().Optimize) { 3824 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3825 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3826 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3827 } 3828 CodeGenFunction CGF(CGM); 3829 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3830 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3831 3832 // *privi = &.privates.privi; 3833 LValue Base = CGF.EmitLoadOfPointerLValue( 3834 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3835 TaskPrivatesArg.getType()->castAs<PointerType>()); 3836 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3837 Counter = 0; 3838 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3839 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3840 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3841 LValue RefLVal = 3842 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3843 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3844 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3845 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3846 ++Counter; 3847 } 3848 CGF.FinishFunction(); 3849 return TaskPrivatesMap; 3850 } 3851 3852 /// Emit initialization for private variables in task-based directives. 3853 static void emitPrivatesInit(CodeGenFunction &CGF, 3854 const OMPExecutableDirective &D, 3855 Address KmpTaskSharedsPtr, LValue TDBase, 3856 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3857 QualType SharedsTy, QualType SharedsPtrTy, 3858 const OMPTaskDataTy &Data, 3859 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3860 ASTContext &C = CGF.getContext(); 3861 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3862 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3863 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3864 ? OMPD_taskloop 3865 : OMPD_task; 3866 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3867 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3868 LValue SrcBase; 3869 bool IsTargetTask = 3870 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3871 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3872 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3873 // PointersArray, SizesArray, and MappersArray. The original variables for 3874 // these arrays are not captured and we get their addresses explicitly. 3875 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3876 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3877 SrcBase = CGF.MakeAddrLValue( 3878 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3879 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3880 CGF.ConvertTypeForMem(SharedsTy)), 3881 SharedsTy); 3882 } 3883 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3884 for (const PrivateDataTy &Pair : Privates) { 3885 // Do not initialize private locals. 3886 if (Pair.second.isLocalPrivate()) { 3887 ++FI; 3888 continue; 3889 } 3890 const VarDecl *VD = Pair.second.PrivateCopy; 3891 const Expr *Init = VD->getAnyInitializer(); 3892 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3893 !CGF.isTrivialInitializer(Init)))) { 3894 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3895 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3896 const VarDecl *OriginalVD = Pair.second.Original; 3897 // Check if the variable is the target-based BasePointersArray, 3898 // PointersArray, SizesArray, or MappersArray. 3899 LValue SharedRefLValue; 3900 QualType Type = PrivateLValue.getType(); 3901 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3902 if (IsTargetTask && !SharedField) { 3903 assert(isa<ImplicitParamDecl>(OriginalVD) && 3904 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3905 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3906 ->getNumParams() == 0 && 3907 isa<TranslationUnitDecl>( 3908 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3909 ->getDeclContext()) && 3910 "Expected artificial target data variable."); 3911 SharedRefLValue = 3912 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3913 } else if (ForDup) { 3914 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3915 SharedRefLValue = CGF.MakeAddrLValue( 3916 SharedRefLValue.getAddress(CGF).withAlignment( 3917 C.getDeclAlign(OriginalVD)), 3918 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3919 SharedRefLValue.getTBAAInfo()); 3920 } else if (CGF.LambdaCaptureFields.count( 3921 Pair.second.Original->getCanonicalDecl()) > 0 || 3922 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3923 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3924 } else { 3925 // Processing for implicitly captured variables. 3926 InlinedOpenMPRegionRAII Region( 3927 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3928 /*HasCancel=*/false, /*NoInheritance=*/true); 3929 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3930 } 3931 if (Type->isArrayType()) { 3932 // Initialize firstprivate array. 3933 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3934 // Perform simple memcpy. 3935 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3936 } else { 3937 // Initialize firstprivate array using element-by-element 3938 // initialization. 3939 CGF.EmitOMPAggregateAssign( 3940 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3941 Type, 3942 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3943 Address SrcElement) { 3944 // Clean up any temporaries needed by the initialization. 3945 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3946 InitScope.addPrivate(Elem, SrcElement); 3947 (void)InitScope.Privatize(); 3948 // Emit initialization for single element. 3949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3950 CGF, &CapturesInfo); 3951 CGF.EmitAnyExprToMem(Init, DestElement, 3952 Init->getType().getQualifiers(), 3953 /*IsInitializer=*/false); 3954 }); 3955 } 3956 } else { 3957 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3958 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3959 (void)InitScope.Privatize(); 3960 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3961 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3962 /*capturedByInit=*/false); 3963 } 3964 } else { 3965 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3966 } 3967 } 3968 ++FI; 3969 } 3970 } 3971 3972 /// Check if duplication function is required for taskloops. 3973 static bool checkInitIsRequired(CodeGenFunction &CGF, 3974 ArrayRef<PrivateDataTy> Privates) { 3975 bool InitRequired = false; 3976 for (const PrivateDataTy &Pair : Privates) { 3977 if (Pair.second.isLocalPrivate()) 3978 continue; 3979 const VarDecl *VD = Pair.second.PrivateCopy; 3980 const Expr *Init = VD->getAnyInitializer(); 3981 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3982 !CGF.isTrivialInitializer(Init)); 3983 if (InitRequired) 3984 break; 3985 } 3986 return InitRequired; 3987 } 3988 3989 3990 /// Emit task_dup function (for initialization of 3991 /// private/firstprivate/lastprivate vars and last_iter flag) 3992 /// \code 3993 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3994 /// lastpriv) { 3995 /// // setup lastprivate flag 3996 /// task_dst->last = lastpriv; 3997 /// // could be constructor calls here... 3998 /// } 3999 /// \endcode 4000 static llvm::Value * 4001 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4002 const OMPExecutableDirective &D, 4003 QualType KmpTaskTWithPrivatesPtrQTy, 4004 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4005 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4006 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4007 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4008 ASTContext &C = CGM.getContext(); 4009 FunctionArgList Args; 4010 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4011 KmpTaskTWithPrivatesPtrQTy, 4012 ImplicitParamDecl::Other); 4013 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4014 KmpTaskTWithPrivatesPtrQTy, 4015 ImplicitParamDecl::Other); 4016 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4017 ImplicitParamDecl::Other); 4018 Args.push_back(&DstArg); 4019 Args.push_back(&SrcArg); 4020 Args.push_back(&LastprivArg); 4021 const auto &TaskDupFnInfo = 4022 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4023 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4024 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4025 auto *TaskDup = llvm::Function::Create( 4026 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4027 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4028 TaskDup->setDoesNotRecurse(); 4029 CodeGenFunction CGF(CGM); 4030 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4031 Loc); 4032 4033 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4034 CGF.GetAddrOfLocalVar(&DstArg), 4035 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4036 // task_dst->liter = lastpriv; 4037 if (WithLastIter) { 4038 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4039 LValue Base = CGF.EmitLValueForField( 4040 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4041 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4042 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4043 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4044 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4045 } 4046 4047 // Emit initial values for private copies (if any). 4048 assert(!Privates.empty()); 4049 Address KmpTaskSharedsPtr = Address::invalid(); 4050 if (!Data.FirstprivateVars.empty()) { 4051 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4052 CGF.GetAddrOfLocalVar(&SrcArg), 4053 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4054 LValue Base = CGF.EmitLValueForField( 4055 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4056 KmpTaskSharedsPtr = Address( 4057 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4058 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4059 KmpTaskTShareds)), 4060 Loc), 4061 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4062 } 4063 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4064 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4065 CGF.FinishFunction(); 4066 return TaskDup; 4067 } 4068 4069 /// Checks if destructor function is required to be generated. 4070 /// \return true if cleanups are required, false otherwise. 4071 static bool 4072 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4073 ArrayRef<PrivateDataTy> Privates) { 4074 for (const PrivateDataTy &P : Privates) { 4075 if (P.second.isLocalPrivate()) 4076 continue; 4077 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4078 if (Ty.isDestructedType()) 4079 return true; 4080 } 4081 return false; 4082 } 4083 4084 namespace { 4085 /// Loop generator for OpenMP iterator expression. 4086 class OMPIteratorGeneratorScope final 4087 : public CodeGenFunction::OMPPrivateScope { 4088 CodeGenFunction &CGF; 4089 const OMPIteratorExpr *E = nullptr; 4090 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4091 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4092 OMPIteratorGeneratorScope() = delete; 4093 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4094 4095 public: 4096 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4097 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4098 if (!E) 4099 return; 4100 SmallVector<llvm::Value *, 4> Uppers; 4101 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4102 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4103 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4104 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 4105 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4106 addPrivate( 4107 HelperData.CounterVD, 4108 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 4109 } 4110 Privatize(); 4111 4112 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4113 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4114 LValue CLVal = 4115 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4116 HelperData.CounterVD->getType()); 4117 // Counter = 0; 4118 CGF.EmitStoreOfScalar( 4119 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4120 CLVal); 4121 CodeGenFunction::JumpDest &ContDest = 4122 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4123 CodeGenFunction::JumpDest &ExitDest = 4124 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4125 // N = <number-of_iterations>; 4126 llvm::Value *N = Uppers[I]; 4127 // cont: 4128 // if (Counter < N) goto body; else goto exit; 4129 CGF.EmitBlock(ContDest.getBlock()); 4130 auto *CVal = 4131 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4132 llvm::Value *Cmp = 4133 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4134 ? CGF.Builder.CreateICmpSLT(CVal, N) 4135 : CGF.Builder.CreateICmpULT(CVal, N); 4136 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4137 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4138 // body: 4139 CGF.EmitBlock(BodyBB); 4140 // Iteri = Begini + Counter * Stepi; 4141 CGF.EmitIgnoredExpr(HelperData.Update); 4142 } 4143 } 4144 ~OMPIteratorGeneratorScope() { 4145 if (!E) 4146 return; 4147 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4148 // Counter = Counter + 1; 4149 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4150 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4151 // goto cont; 4152 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4153 // exit: 4154 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4155 } 4156 } 4157 }; 4158 } // namespace 4159 4160 static std::pair<llvm::Value *, llvm::Value *> 4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4162 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4163 llvm::Value *Addr; 4164 if (OASE) { 4165 const Expr *Base = OASE->getBase(); 4166 Addr = CGF.EmitScalarExpr(Base); 4167 } else { 4168 Addr = CGF.EmitLValue(E).getPointer(CGF); 4169 } 4170 llvm::Value *SizeVal; 4171 QualType Ty = E->getType(); 4172 if (OASE) { 4173 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4174 for (const Expr *SE : OASE->getDimensions()) { 4175 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4176 Sz = CGF.EmitScalarConversion( 4177 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4178 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4179 } 4180 } else if (const auto *ASE = 4181 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4182 LValue UpAddrLVal = 4183 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4184 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4185 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4186 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4187 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4188 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4189 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4190 } else { 4191 SizeVal = CGF.getTypeSize(Ty); 4192 } 4193 return std::make_pair(Addr, SizeVal); 4194 } 4195 4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4198 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4199 if (KmpTaskAffinityInfoTy.isNull()) { 4200 RecordDecl *KmpAffinityInfoRD = 4201 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4202 KmpAffinityInfoRD->startDefinition(); 4203 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4204 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4206 KmpAffinityInfoRD->completeDefinition(); 4207 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4208 } 4209 } 4210 4211 CGOpenMPRuntime::TaskResultTy 4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4213 const OMPExecutableDirective &D, 4214 llvm::Function *TaskFunction, QualType SharedsTy, 4215 Address Shareds, const OMPTaskDataTy &Data) { 4216 ASTContext &C = CGM.getContext(); 4217 llvm::SmallVector<PrivateDataTy, 4> Privates; 4218 // Aggregate privates and sort them by the alignment. 4219 const auto *I = Data.PrivateCopies.begin(); 4220 for (const Expr *E : Data.PrivateVars) { 4221 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4222 Privates.emplace_back( 4223 C.getDeclAlign(VD), 4224 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4225 /*PrivateElemInit=*/nullptr)); 4226 ++I; 4227 } 4228 I = Data.FirstprivateCopies.begin(); 4229 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4230 for (const Expr *E : Data.FirstprivateVars) { 4231 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4232 Privates.emplace_back( 4233 C.getDeclAlign(VD), 4234 PrivateHelpersTy( 4235 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4236 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4237 ++I; 4238 ++IElemInitRef; 4239 } 4240 I = Data.LastprivateCopies.begin(); 4241 for (const Expr *E : Data.LastprivateVars) { 4242 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4243 Privates.emplace_back( 4244 C.getDeclAlign(VD), 4245 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4246 /*PrivateElemInit=*/nullptr)); 4247 ++I; 4248 } 4249 for (const VarDecl *VD : Data.PrivateLocals) { 4250 if (isAllocatableDecl(VD)) 4251 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4252 else 4253 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4254 } 4255 llvm::stable_sort(Privates, 4256 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4257 return L.first > R.first; 4258 }); 4259 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4260 // Build type kmp_routine_entry_t (if not built yet). 4261 emitKmpRoutineEntryT(KmpInt32Ty); 4262 // Build type kmp_task_t (if not built yet). 4263 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4264 if (SavedKmpTaskloopTQTy.isNull()) { 4265 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4266 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4267 } 4268 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4269 } else { 4270 assert((D.getDirectiveKind() == OMPD_task || 4271 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4272 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4273 "Expected taskloop, task or target directive"); 4274 if (SavedKmpTaskTQTy.isNull()) { 4275 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4276 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4277 } 4278 KmpTaskTQTy = SavedKmpTaskTQTy; 4279 } 4280 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4281 // Build particular struct kmp_task_t for the given task. 4282 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4283 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4284 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4285 QualType KmpTaskTWithPrivatesPtrQTy = 4286 C.getPointerType(KmpTaskTWithPrivatesQTy); 4287 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4288 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4289 KmpTaskTWithPrivatesTy->getPointerTo(); 4290 llvm::Value *KmpTaskTWithPrivatesTySize = 4291 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4292 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4293 4294 // Emit initial values for private copies (if any). 4295 llvm::Value *TaskPrivatesMap = nullptr; 4296 llvm::Type *TaskPrivatesMapTy = 4297 std::next(TaskFunction->arg_begin(), 3)->getType(); 4298 if (!Privates.empty()) { 4299 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4300 TaskPrivatesMap = 4301 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4302 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4303 TaskPrivatesMap, TaskPrivatesMapTy); 4304 } else { 4305 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4306 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4307 } 4308 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4309 // kmp_task_t *tt); 4310 llvm::Function *TaskEntry = emitProxyTaskFunction( 4311 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4312 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4313 TaskPrivatesMap); 4314 4315 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4316 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4317 // kmp_routine_entry_t *task_entry); 4318 // Task flags. Format is taken from 4319 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4320 // description of kmp_tasking_flags struct. 4321 enum { 4322 TiedFlag = 0x1, 4323 FinalFlag = 0x2, 4324 DestructorsFlag = 0x8, 4325 PriorityFlag = 0x20, 4326 DetachableFlag = 0x40, 4327 }; 4328 unsigned Flags = Data.Tied ? TiedFlag : 0; 4329 bool NeedsCleanup = false; 4330 if (!Privates.empty()) { 4331 NeedsCleanup = 4332 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4333 if (NeedsCleanup) 4334 Flags = Flags | DestructorsFlag; 4335 } 4336 if (Data.Priority.getInt()) 4337 Flags = Flags | PriorityFlag; 4338 if (D.hasClausesOfKind<OMPDetachClause>()) 4339 Flags = Flags | DetachableFlag; 4340 llvm::Value *TaskFlags = 4341 Data.Final.getPointer() 4342 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4343 CGF.Builder.getInt32(FinalFlag), 4344 CGF.Builder.getInt32(/*C=*/0)) 4345 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4346 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4347 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4348 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4349 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4350 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4351 TaskEntry, KmpRoutineEntryPtrTy)}; 4352 llvm::Value *NewTask; 4353 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4354 // Check if we have any device clause associated with the directive. 4355 const Expr *Device = nullptr; 4356 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4357 Device = C->getDevice(); 4358 // Emit device ID if any otherwise use default value. 4359 llvm::Value *DeviceID; 4360 if (Device) 4361 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4362 CGF.Int64Ty, /*isSigned=*/true); 4363 else 4364 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4365 AllocArgs.push_back(DeviceID); 4366 NewTask = CGF.EmitRuntimeCall( 4367 OMPBuilder.getOrCreateRuntimeFunction( 4368 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4369 AllocArgs); 4370 } else { 4371 NewTask = 4372 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4373 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4374 AllocArgs); 4375 } 4376 // Emit detach clause initialization. 4377 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4378 // task_descriptor); 4379 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4380 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4381 LValue EvtLVal = CGF.EmitLValue(Evt); 4382 4383 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4384 // int gtid, kmp_task_t *task); 4385 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4386 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4387 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4388 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4389 OMPBuilder.getOrCreateRuntimeFunction( 4390 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4391 {Loc, Tid, NewTask}); 4392 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4393 Evt->getExprLoc()); 4394 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4395 } 4396 // Process affinity clauses. 4397 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4398 // Process list of affinity data. 4399 ASTContext &C = CGM.getContext(); 4400 Address AffinitiesArray = Address::invalid(); 4401 // Calculate number of elements to form the array of affinity data. 4402 llvm::Value *NumOfElements = nullptr; 4403 unsigned NumAffinities = 0; 4404 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4405 if (const Expr *Modifier = C->getModifier()) { 4406 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4407 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4408 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4409 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4410 NumOfElements = 4411 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4412 } 4413 } else { 4414 NumAffinities += C->varlist_size(); 4415 } 4416 } 4417 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4418 // Fields ids in kmp_task_affinity_info record. 4419 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4420 4421 QualType KmpTaskAffinityInfoArrayTy; 4422 if (NumOfElements) { 4423 NumOfElements = CGF.Builder.CreateNUWAdd( 4424 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4425 auto *OVE = new (C) OpaqueValueExpr( 4426 Loc, 4427 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4428 VK_PRValue); 4429 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4430 RValue::get(NumOfElements)); 4431 KmpTaskAffinityInfoArrayTy = 4432 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4433 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4434 // Properly emit variable-sized array. 4435 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4436 ImplicitParamDecl::Other); 4437 CGF.EmitVarDecl(*PD); 4438 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4439 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4440 /*isSigned=*/false); 4441 } else { 4442 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4443 KmpTaskAffinityInfoTy, 4444 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4445 ArrayType::Normal, /*IndexTypeQuals=*/0); 4446 AffinitiesArray = 4447 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4448 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4449 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4450 /*isSigned=*/false); 4451 } 4452 4453 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4454 // Fill array by elements without iterators. 4455 unsigned Pos = 0; 4456 bool HasIterator = false; 4457 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4458 if (C->getModifier()) { 4459 HasIterator = true; 4460 continue; 4461 } 4462 for (const Expr *E : C->varlists()) { 4463 llvm::Value *Addr; 4464 llvm::Value *Size; 4465 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4466 LValue Base = 4467 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4468 KmpTaskAffinityInfoTy); 4469 // affs[i].base_addr = &<Affinities[i].second>; 4470 LValue BaseAddrLVal = CGF.EmitLValueForField( 4471 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4472 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4473 BaseAddrLVal); 4474 // affs[i].len = sizeof(<Affinities[i].second>); 4475 LValue LenLVal = CGF.EmitLValueForField( 4476 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4477 CGF.EmitStoreOfScalar(Size, LenLVal); 4478 ++Pos; 4479 } 4480 } 4481 LValue PosLVal; 4482 if (HasIterator) { 4483 PosLVal = CGF.MakeAddrLValue( 4484 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4485 C.getSizeType()); 4486 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4487 } 4488 // Process elements with iterators. 4489 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4490 const Expr *Modifier = C->getModifier(); 4491 if (!Modifier) 4492 continue; 4493 OMPIteratorGeneratorScope IteratorScope( 4494 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4495 for (const Expr *E : C->varlists()) { 4496 llvm::Value *Addr; 4497 llvm::Value *Size; 4498 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4499 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4500 LValue Base = CGF.MakeAddrLValue( 4501 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4502 // affs[i].base_addr = &<Affinities[i].second>; 4503 LValue BaseAddrLVal = CGF.EmitLValueForField( 4504 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4505 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4506 BaseAddrLVal); 4507 // affs[i].len = sizeof(<Affinities[i].second>); 4508 LValue LenLVal = CGF.EmitLValueForField( 4509 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4510 CGF.EmitStoreOfScalar(Size, LenLVal); 4511 Idx = CGF.Builder.CreateNUWAdd( 4512 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4513 CGF.EmitStoreOfScalar(Idx, PosLVal); 4514 } 4515 } 4516 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4517 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4518 // naffins, kmp_task_affinity_info_t *affin_list); 4519 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4520 llvm::Value *GTid = getThreadID(CGF, Loc); 4521 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4522 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4523 // FIXME: Emit the function and ignore its result for now unless the 4524 // runtime function is properly implemented. 4525 (void)CGF.EmitRuntimeCall( 4526 OMPBuilder.getOrCreateRuntimeFunction( 4527 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4528 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4529 } 4530 llvm::Value *NewTaskNewTaskTTy = 4531 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4532 NewTask, KmpTaskTWithPrivatesPtrTy); 4533 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4534 KmpTaskTWithPrivatesQTy); 4535 LValue TDBase = 4536 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4537 // Fill the data in the resulting kmp_task_t record. 4538 // Copy shareds if there are any. 4539 Address KmpTaskSharedsPtr = Address::invalid(); 4540 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4541 KmpTaskSharedsPtr = Address( 4542 CGF.EmitLoadOfScalar( 4543 CGF.EmitLValueForField( 4544 TDBase, 4545 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4546 Loc), 4547 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4548 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4549 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4550 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4551 } 4552 // Emit initial values for private copies (if any). 4553 TaskResultTy Result; 4554 if (!Privates.empty()) { 4555 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4556 SharedsTy, SharedsPtrTy, Data, Privates, 4557 /*ForDup=*/false); 4558 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4559 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4560 Result.TaskDupFn = emitTaskDupFunction( 4561 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4562 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4563 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4564 } 4565 } 4566 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4567 enum { Priority = 0, Destructors = 1 }; 4568 // Provide pointer to function with destructors for privates. 4569 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4570 const RecordDecl *KmpCmplrdataUD = 4571 (*FI)->getType()->getAsUnionType()->getDecl(); 4572 if (NeedsCleanup) { 4573 llvm::Value *DestructorFn = emitDestructorsFunction( 4574 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4575 KmpTaskTWithPrivatesQTy); 4576 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4577 LValue DestructorsLV = CGF.EmitLValueForField( 4578 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4579 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4580 DestructorFn, KmpRoutineEntryPtrTy), 4581 DestructorsLV); 4582 } 4583 // Set priority. 4584 if (Data.Priority.getInt()) { 4585 LValue Data2LV = CGF.EmitLValueForField( 4586 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4587 LValue PriorityLV = CGF.EmitLValueForField( 4588 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4589 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4590 } 4591 Result.NewTask = NewTask; 4592 Result.TaskEntry = TaskEntry; 4593 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4594 Result.TDBase = TDBase; 4595 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4596 return Result; 4597 } 4598 4599 namespace { 4600 /// Dependence kind for RTL. 4601 enum RTLDependenceKindTy { 4602 DepIn = 0x01, 4603 DepInOut = 0x3, 4604 DepMutexInOutSet = 0x4, 4605 DepInOutSet = 0x8 4606 }; 4607 /// Fields ids in kmp_depend_info record. 4608 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4609 } // namespace 4610 4611 /// Translates internal dependency kind into the runtime kind. 4612 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4613 RTLDependenceKindTy DepKind; 4614 switch (K) { 4615 case OMPC_DEPEND_in: 4616 DepKind = DepIn; 4617 break; 4618 // Out and InOut dependencies must use the same code. 4619 case OMPC_DEPEND_out: 4620 case OMPC_DEPEND_inout: 4621 DepKind = DepInOut; 4622 break; 4623 case OMPC_DEPEND_mutexinoutset: 4624 DepKind = DepMutexInOutSet; 4625 break; 4626 case OMPC_DEPEND_inoutset: 4627 DepKind = DepInOutSet; 4628 break; 4629 case OMPC_DEPEND_source: 4630 case OMPC_DEPEND_sink: 4631 case OMPC_DEPEND_depobj: 4632 case OMPC_DEPEND_unknown: 4633 llvm_unreachable("Unknown task dependence type"); 4634 } 4635 return DepKind; 4636 } 4637 4638 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4639 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4640 QualType &FlagsTy) { 4641 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4642 if (KmpDependInfoTy.isNull()) { 4643 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4644 KmpDependInfoRD->startDefinition(); 4645 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4647 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4648 KmpDependInfoRD->completeDefinition(); 4649 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4650 } 4651 } 4652 4653 std::pair<llvm::Value *, LValue> 4654 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4655 SourceLocation Loc) { 4656 ASTContext &C = CGM.getContext(); 4657 QualType FlagsTy; 4658 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4659 RecordDecl *KmpDependInfoRD = 4660 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4661 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4662 LValue Base = CGF.EmitLoadOfPointerLValue( 4663 CGF.Builder.CreateElementBitCast( 4664 DepobjLVal.getAddress(CGF), 4665 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4666 KmpDependInfoPtrTy->castAs<PointerType>()); 4667 Address DepObjAddr = CGF.Builder.CreateGEP( 4668 Base.getAddress(CGF), 4669 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4670 LValue NumDepsBase = CGF.MakeAddrLValue( 4671 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4672 // NumDeps = deps[i].base_addr; 4673 LValue BaseAddrLVal = CGF.EmitLValueForField( 4674 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4675 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4676 return std::make_pair(NumDeps, Base); 4677 } 4678 4679 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4680 llvm::PointerUnion<unsigned *, LValue *> Pos, 4681 const OMPTaskDataTy::DependData &Data, 4682 Address DependenciesArray) { 4683 CodeGenModule &CGM = CGF.CGM; 4684 ASTContext &C = CGM.getContext(); 4685 QualType FlagsTy; 4686 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4687 RecordDecl *KmpDependInfoRD = 4688 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4689 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4690 4691 OMPIteratorGeneratorScope IteratorScope( 4692 CGF, cast_or_null<OMPIteratorExpr>( 4693 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4694 : nullptr)); 4695 for (const Expr *E : Data.DepExprs) { 4696 llvm::Value *Addr; 4697 llvm::Value *Size; 4698 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4699 LValue Base; 4700 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4701 Base = CGF.MakeAddrLValue( 4702 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4703 } else { 4704 LValue &PosLVal = *Pos.get<LValue *>(); 4705 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4706 Base = CGF.MakeAddrLValue( 4707 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4708 } 4709 // deps[i].base_addr = &<Dependencies[i].second>; 4710 LValue BaseAddrLVal = CGF.EmitLValueForField( 4711 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4712 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4713 BaseAddrLVal); 4714 // deps[i].len = sizeof(<Dependencies[i].second>); 4715 LValue LenLVal = CGF.EmitLValueForField( 4716 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4717 CGF.EmitStoreOfScalar(Size, LenLVal); 4718 // deps[i].flags = <Dependencies[i].first>; 4719 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4720 LValue FlagsLVal = CGF.EmitLValueForField( 4721 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4722 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4723 FlagsLVal); 4724 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4725 ++(*P); 4726 } else { 4727 LValue &PosLVal = *Pos.get<LValue *>(); 4728 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4729 Idx = CGF.Builder.CreateNUWAdd(Idx, 4730 llvm::ConstantInt::get(Idx->getType(), 1)); 4731 CGF.EmitStoreOfScalar(Idx, PosLVal); 4732 } 4733 } 4734 } 4735 4736 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4737 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4738 const OMPTaskDataTy::DependData &Data) { 4739 assert(Data.DepKind == OMPC_DEPEND_depobj && 4740 "Expected depobj dependecy kind."); 4741 SmallVector<llvm::Value *, 4> Sizes; 4742 SmallVector<LValue, 4> SizeLVals; 4743 ASTContext &C = CGF.getContext(); 4744 { 4745 OMPIteratorGeneratorScope IteratorScope( 4746 CGF, cast_or_null<OMPIteratorExpr>( 4747 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4748 : nullptr)); 4749 for (const Expr *E : Data.DepExprs) { 4750 llvm::Value *NumDeps; 4751 LValue Base; 4752 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4753 std::tie(NumDeps, Base) = 4754 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4755 LValue NumLVal = CGF.MakeAddrLValue( 4756 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4757 C.getUIntPtrType()); 4758 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4759 NumLVal.getAddress(CGF)); 4760 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4761 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4762 CGF.EmitStoreOfScalar(Add, NumLVal); 4763 SizeLVals.push_back(NumLVal); 4764 } 4765 } 4766 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4767 llvm::Value *Size = 4768 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4769 Sizes.push_back(Size); 4770 } 4771 return Sizes; 4772 } 4773 4774 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4775 QualType &KmpDependInfoTy, 4776 LValue PosLVal, 4777 const OMPTaskDataTy::DependData &Data, 4778 Address DependenciesArray) { 4779 assert(Data.DepKind == OMPC_DEPEND_depobj && 4780 "Expected depobj dependecy kind."); 4781 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4782 { 4783 OMPIteratorGeneratorScope IteratorScope( 4784 CGF, cast_or_null<OMPIteratorExpr>( 4785 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4786 : nullptr)); 4787 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4788 const Expr *E = Data.DepExprs[I]; 4789 llvm::Value *NumDeps; 4790 LValue Base; 4791 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4792 std::tie(NumDeps, Base) = 4793 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4794 4795 // memcopy dependency data. 4796 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4797 ElSize, 4798 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4799 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4800 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4801 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4802 4803 // Increase pos. 4804 // pos += size; 4805 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4806 CGF.EmitStoreOfScalar(Add, PosLVal); 4807 } 4808 } 4809 } 4810 4811 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4812 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4813 SourceLocation Loc) { 4814 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4815 return D.DepExprs.empty(); 4816 })) 4817 return std::make_pair(nullptr, Address::invalid()); 4818 // Process list of dependencies. 4819 ASTContext &C = CGM.getContext(); 4820 Address DependenciesArray = Address::invalid(); 4821 llvm::Value *NumOfElements = nullptr; 4822 unsigned NumDependencies = std::accumulate( 4823 Dependencies.begin(), Dependencies.end(), 0, 4824 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4825 return D.DepKind == OMPC_DEPEND_depobj 4826 ? V 4827 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4828 }); 4829 QualType FlagsTy; 4830 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4831 bool HasDepobjDeps = false; 4832 bool HasRegularWithIterators = false; 4833 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4834 llvm::Value *NumOfRegularWithIterators = 4835 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4836 // Calculate number of depobj dependecies and regular deps with the iterators. 4837 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4838 if (D.DepKind == OMPC_DEPEND_depobj) { 4839 SmallVector<llvm::Value *, 4> Sizes = 4840 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4841 for (llvm::Value *Size : Sizes) { 4842 NumOfDepobjElements = 4843 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4844 } 4845 HasDepobjDeps = true; 4846 continue; 4847 } 4848 // Include number of iterations, if any. 4849 4850 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4851 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4852 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4853 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4854 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4855 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4856 NumOfRegularWithIterators = 4857 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4858 } 4859 HasRegularWithIterators = true; 4860 continue; 4861 } 4862 } 4863 4864 QualType KmpDependInfoArrayTy; 4865 if (HasDepobjDeps || HasRegularWithIterators) { 4866 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4867 /*isSigned=*/false); 4868 if (HasDepobjDeps) { 4869 NumOfElements = 4870 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4871 } 4872 if (HasRegularWithIterators) { 4873 NumOfElements = 4874 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4875 } 4876 auto *OVE = new (C) OpaqueValueExpr( 4877 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4878 VK_PRValue); 4879 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4880 RValue::get(NumOfElements)); 4881 KmpDependInfoArrayTy = 4882 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4883 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4884 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4885 // Properly emit variable-sized array. 4886 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4887 ImplicitParamDecl::Other); 4888 CGF.EmitVarDecl(*PD); 4889 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4890 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4891 /*isSigned=*/false); 4892 } else { 4893 KmpDependInfoArrayTy = C.getConstantArrayType( 4894 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4895 ArrayType::Normal, /*IndexTypeQuals=*/0); 4896 DependenciesArray = 4897 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4898 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4899 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4900 /*isSigned=*/false); 4901 } 4902 unsigned Pos = 0; 4903 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4904 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4905 Dependencies[I].IteratorExpr) 4906 continue; 4907 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4908 DependenciesArray); 4909 } 4910 // Copy regular dependecies with iterators. 4911 LValue PosLVal = CGF.MakeAddrLValue( 4912 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4913 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4914 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4915 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4916 !Dependencies[I].IteratorExpr) 4917 continue; 4918 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4919 DependenciesArray); 4920 } 4921 // Copy final depobj arrays without iterators. 4922 if (HasDepobjDeps) { 4923 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4924 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4925 continue; 4926 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4927 DependenciesArray); 4928 } 4929 } 4930 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4931 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4932 return std::make_pair(NumOfElements, DependenciesArray); 4933 } 4934 4935 Address CGOpenMPRuntime::emitDepobjDependClause( 4936 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4937 SourceLocation Loc) { 4938 if (Dependencies.DepExprs.empty()) 4939 return Address::invalid(); 4940 // Process list of dependencies. 4941 ASTContext &C = CGM.getContext(); 4942 Address DependenciesArray = Address::invalid(); 4943 unsigned NumDependencies = Dependencies.DepExprs.size(); 4944 QualType FlagsTy; 4945 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4946 RecordDecl *KmpDependInfoRD = 4947 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4948 4949 llvm::Value *Size; 4950 // Define type kmp_depend_info[<Dependencies.size()>]; 4951 // For depobj reserve one extra element to store the number of elements. 4952 // It is required to handle depobj(x) update(in) construct. 4953 // kmp_depend_info[<Dependencies.size()>] deps; 4954 llvm::Value *NumDepsVal; 4955 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4956 if (const auto *IE = 4957 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4958 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4959 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4960 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4961 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4962 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4963 } 4964 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4965 NumDepsVal); 4966 CharUnits SizeInBytes = 4967 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4968 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4969 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4970 NumDepsVal = 4971 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4972 } else { 4973 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4974 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4975 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4976 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4977 Size = CGM.getSize(Sz.alignTo(Align)); 4978 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4979 } 4980 // Need to allocate on the dynamic memory. 4981 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4982 // Use default allocator. 4983 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4984 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4985 4986 llvm::Value *Addr = 4987 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4988 CGM.getModule(), OMPRTL___kmpc_alloc), 4989 Args, ".dep.arr.addr"); 4990 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4991 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4992 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4993 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4994 // Write number of elements in the first element of array for depobj. 4995 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4996 // deps[i].base_addr = NumDependencies; 4997 LValue BaseAddrLVal = CGF.EmitLValueForField( 4998 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4999 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5000 llvm::PointerUnion<unsigned *, LValue *> Pos; 5001 unsigned Idx = 1; 5002 LValue PosLVal; 5003 if (Dependencies.IteratorExpr) { 5004 PosLVal = CGF.MakeAddrLValue( 5005 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5006 C.getSizeType()); 5007 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5008 /*IsInit=*/true); 5009 Pos = &PosLVal; 5010 } else { 5011 Pos = &Idx; 5012 } 5013 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5014 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5015 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 5016 CGF.Int8Ty); 5017 return DependenciesArray; 5018 } 5019 5020 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5021 SourceLocation Loc) { 5022 ASTContext &C = CGM.getContext(); 5023 QualType FlagsTy; 5024 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5025 LValue Base = CGF.EmitLoadOfPointerLValue( 5026 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 5027 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5028 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5029 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 5030 CGF.ConvertTypeForMem(KmpDependInfoTy)); 5031 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5032 Addr.getElementType(), Addr.getPointer(), 5033 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5034 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5035 CGF.VoidPtrTy); 5036 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5037 // Use default allocator. 5038 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5039 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5040 5041 // _kmpc_free(gtid, addr, nullptr); 5042 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5043 CGM.getModule(), OMPRTL___kmpc_free), 5044 Args); 5045 } 5046 5047 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5048 OpenMPDependClauseKind NewDepKind, 5049 SourceLocation Loc) { 5050 ASTContext &C = CGM.getContext(); 5051 QualType FlagsTy; 5052 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5053 RecordDecl *KmpDependInfoRD = 5054 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5055 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5056 llvm::Value *NumDeps; 5057 LValue Base; 5058 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5059 5060 Address Begin = Base.getAddress(CGF); 5061 // Cast from pointer to array type to pointer to single element. 5062 llvm::Value *End = CGF.Builder.CreateGEP( 5063 Begin.getElementType(), Begin.getPointer(), NumDeps); 5064 // The basic structure here is a while-do loop. 5065 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5066 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5067 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5068 CGF.EmitBlock(BodyBB); 5069 llvm::PHINode *ElementPHI = 5070 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5071 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5072 Begin = Begin.withPointer(ElementPHI); 5073 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5074 Base.getTBAAInfo()); 5075 // deps[i].flags = NewDepKind; 5076 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5077 LValue FlagsLVal = CGF.EmitLValueForField( 5078 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5079 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5080 FlagsLVal); 5081 5082 // Shift the address forward by one element. 5083 Address ElementNext = 5084 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5085 ElementPHI->addIncoming(ElementNext.getPointer(), 5086 CGF.Builder.GetInsertBlock()); 5087 llvm::Value *IsEmpty = 5088 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5089 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5090 // Done. 5091 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5092 } 5093 5094 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5095 const OMPExecutableDirective &D, 5096 llvm::Function *TaskFunction, 5097 QualType SharedsTy, Address Shareds, 5098 const Expr *IfCond, 5099 const OMPTaskDataTy &Data) { 5100 if (!CGF.HaveInsertPoint()) 5101 return; 5102 5103 TaskResultTy Result = 5104 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5105 llvm::Value *NewTask = Result.NewTask; 5106 llvm::Function *TaskEntry = Result.TaskEntry; 5107 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5108 LValue TDBase = Result.TDBase; 5109 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5110 // Process list of dependences. 5111 Address DependenciesArray = Address::invalid(); 5112 llvm::Value *NumOfElements; 5113 std::tie(NumOfElements, DependenciesArray) = 5114 emitDependClause(CGF, Data.Dependences, Loc); 5115 5116 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5117 // libcall. 5118 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5119 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5120 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5121 // list is not empty 5122 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5123 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5124 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5125 llvm::Value *DepTaskArgs[7]; 5126 if (!Data.Dependences.empty()) { 5127 DepTaskArgs[0] = UpLoc; 5128 DepTaskArgs[1] = ThreadID; 5129 DepTaskArgs[2] = NewTask; 5130 DepTaskArgs[3] = NumOfElements; 5131 DepTaskArgs[4] = DependenciesArray.getPointer(); 5132 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5133 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5134 } 5135 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5136 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5137 if (!Data.Tied) { 5138 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5139 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5140 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5141 } 5142 if (!Data.Dependences.empty()) { 5143 CGF.EmitRuntimeCall( 5144 OMPBuilder.getOrCreateRuntimeFunction( 5145 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5146 DepTaskArgs); 5147 } else { 5148 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5149 CGM.getModule(), OMPRTL___kmpc_omp_task), 5150 TaskArgs); 5151 } 5152 // Check if parent region is untied and build return for untied task; 5153 if (auto *Region = 5154 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5155 Region->emitUntiedSwitch(CGF); 5156 }; 5157 5158 llvm::Value *DepWaitTaskArgs[6]; 5159 if (!Data.Dependences.empty()) { 5160 DepWaitTaskArgs[0] = UpLoc; 5161 DepWaitTaskArgs[1] = ThreadID; 5162 DepWaitTaskArgs[2] = NumOfElements; 5163 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5164 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5165 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5166 } 5167 auto &M = CGM.getModule(); 5168 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5169 TaskEntry, &Data, &DepWaitTaskArgs, 5170 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5171 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5172 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5173 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5174 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5175 // is specified. 5176 if (!Data.Dependences.empty()) 5177 CGF.EmitRuntimeCall( 5178 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5179 DepWaitTaskArgs); 5180 // Call proxy_task_entry(gtid, new_task); 5181 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5182 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5183 Action.Enter(CGF); 5184 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5185 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5186 OutlinedFnArgs); 5187 }; 5188 5189 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5190 // kmp_task_t *new_task); 5191 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5192 // kmp_task_t *new_task); 5193 RegionCodeGenTy RCG(CodeGen); 5194 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5195 M, OMPRTL___kmpc_omp_task_begin_if0), 5196 TaskArgs, 5197 OMPBuilder.getOrCreateRuntimeFunction( 5198 M, OMPRTL___kmpc_omp_task_complete_if0), 5199 TaskArgs); 5200 RCG.setAction(Action); 5201 RCG(CGF); 5202 }; 5203 5204 if (IfCond) { 5205 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5206 } else { 5207 RegionCodeGenTy ThenRCG(ThenCodeGen); 5208 ThenRCG(CGF); 5209 } 5210 } 5211 5212 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5213 const OMPLoopDirective &D, 5214 llvm::Function *TaskFunction, 5215 QualType SharedsTy, Address Shareds, 5216 const Expr *IfCond, 5217 const OMPTaskDataTy &Data) { 5218 if (!CGF.HaveInsertPoint()) 5219 return; 5220 TaskResultTy Result = 5221 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5222 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5223 // libcall. 5224 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5225 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5226 // sched, kmp_uint64 grainsize, void *task_dup); 5227 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5228 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5229 llvm::Value *IfVal; 5230 if (IfCond) { 5231 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5232 /*isSigned=*/true); 5233 } else { 5234 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5235 } 5236 5237 LValue LBLVal = CGF.EmitLValueForField( 5238 Result.TDBase, 5239 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5240 const auto *LBVar = 5241 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5242 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5243 LBLVal.getQuals(), 5244 /*IsInitializer=*/true); 5245 LValue UBLVal = CGF.EmitLValueForField( 5246 Result.TDBase, 5247 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5248 const auto *UBVar = 5249 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5250 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5251 UBLVal.getQuals(), 5252 /*IsInitializer=*/true); 5253 LValue StLVal = CGF.EmitLValueForField( 5254 Result.TDBase, 5255 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5256 const auto *StVar = 5257 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5258 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5259 StLVal.getQuals(), 5260 /*IsInitializer=*/true); 5261 // Store reductions address. 5262 LValue RedLVal = CGF.EmitLValueForField( 5263 Result.TDBase, 5264 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5265 if (Data.Reductions) { 5266 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5267 } else { 5268 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5269 CGF.getContext().VoidPtrTy); 5270 } 5271 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5272 llvm::Value *TaskArgs[] = { 5273 UpLoc, 5274 ThreadID, 5275 Result.NewTask, 5276 IfVal, 5277 LBLVal.getPointer(CGF), 5278 UBLVal.getPointer(CGF), 5279 CGF.EmitLoadOfScalar(StLVal, Loc), 5280 llvm::ConstantInt::getSigned( 5281 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5282 llvm::ConstantInt::getSigned( 5283 CGF.IntTy, Data.Schedule.getPointer() 5284 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5285 : NoSchedule), 5286 Data.Schedule.getPointer() 5287 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5288 /*isSigned=*/false) 5289 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5290 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5291 Result.TaskDupFn, CGF.VoidPtrTy) 5292 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5293 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5294 CGM.getModule(), OMPRTL___kmpc_taskloop), 5295 TaskArgs); 5296 } 5297 5298 /// Emit reduction operation for each element of array (required for 5299 /// array sections) LHS op = RHS. 5300 /// \param Type Type of array. 5301 /// \param LHSVar Variable on the left side of the reduction operation 5302 /// (references element of array in original variable). 5303 /// \param RHSVar Variable on the right side of the reduction operation 5304 /// (references element of array in original variable). 5305 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5306 /// RHSVar. 5307 static void EmitOMPAggregateReduction( 5308 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5309 const VarDecl *RHSVar, 5310 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5311 const Expr *, const Expr *)> &RedOpGen, 5312 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5313 const Expr *UpExpr = nullptr) { 5314 // Perform element-by-element initialization. 5315 QualType ElementTy; 5316 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5317 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5318 5319 // Drill down to the base element type on both arrays. 5320 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5321 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5322 5323 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5324 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5325 // Cast from pointer to array type to pointer to single element. 5326 llvm::Value *LHSEnd = 5327 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5328 // The basic structure here is a while-do loop. 5329 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5330 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5331 llvm::Value *IsEmpty = 5332 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5333 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5334 5335 // Enter the loop body, making that address the current address. 5336 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5337 CGF.EmitBlock(BodyBB); 5338 5339 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5340 5341 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5342 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5343 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5344 Address RHSElementCurrent( 5345 RHSElementPHI, RHSAddr.getElementType(), 5346 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5347 5348 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5349 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5350 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5351 Address LHSElementCurrent( 5352 LHSElementPHI, LHSAddr.getElementType(), 5353 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5354 5355 // Emit copy. 5356 CodeGenFunction::OMPPrivateScope Scope(CGF); 5357 Scope.addPrivate(LHSVar, LHSElementCurrent); 5358 Scope.addPrivate(RHSVar, RHSElementCurrent); 5359 Scope.Privatize(); 5360 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5361 Scope.ForceCleanup(); 5362 5363 // Shift the address forward by one element. 5364 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5365 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5366 "omp.arraycpy.dest.element"); 5367 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5368 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5369 "omp.arraycpy.src.element"); 5370 // Check whether we've reached the end. 5371 llvm::Value *Done = 5372 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5373 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5374 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5375 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5376 5377 // Done. 5378 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5379 } 5380 5381 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5382 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5383 /// UDR combiner function. 5384 static void emitReductionCombiner(CodeGenFunction &CGF, 5385 const Expr *ReductionOp) { 5386 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5387 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5388 if (const auto *DRE = 5389 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5390 if (const auto *DRD = 5391 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5392 std::pair<llvm::Function *, llvm::Function *> Reduction = 5393 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5394 RValue Func = RValue::get(Reduction.first); 5395 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5396 CGF.EmitIgnoredExpr(ReductionOp); 5397 return; 5398 } 5399 CGF.EmitIgnoredExpr(ReductionOp); 5400 } 5401 5402 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5403 SourceLocation Loc, llvm::Type *ArgsElemType, 5404 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5405 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5406 ASTContext &C = CGM.getContext(); 5407 5408 // void reduction_func(void *LHSArg, void *RHSArg); 5409 FunctionArgList Args; 5410 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5411 ImplicitParamDecl::Other); 5412 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5413 ImplicitParamDecl::Other); 5414 Args.push_back(&LHSArg); 5415 Args.push_back(&RHSArg); 5416 const auto &CGFI = 5417 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5418 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5419 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5420 llvm::GlobalValue::InternalLinkage, Name, 5421 &CGM.getModule()); 5422 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5423 Fn->setDoesNotRecurse(); 5424 CodeGenFunction CGF(CGM); 5425 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5426 5427 // Dst = (void*[n])(LHSArg); 5428 // Src = (void*[n])(RHSArg); 5429 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5430 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5431 ArgsElemType->getPointerTo()), 5432 ArgsElemType, CGF.getPointerAlign()); 5433 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5434 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5435 ArgsElemType->getPointerTo()), 5436 ArgsElemType, CGF.getPointerAlign()); 5437 5438 // ... 5439 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5440 // ... 5441 CodeGenFunction::OMPPrivateScope Scope(CGF); 5442 const auto *IPriv = Privates.begin(); 5443 unsigned Idx = 0; 5444 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5445 const auto *RHSVar = 5446 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5447 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5448 const auto *LHSVar = 5449 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5450 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5451 QualType PrivTy = (*IPriv)->getType(); 5452 if (PrivTy->isVariablyModifiedType()) { 5453 // Get array size and emit VLA type. 5454 ++Idx; 5455 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5456 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5457 const VariableArrayType *VLA = 5458 CGF.getContext().getAsVariableArrayType(PrivTy); 5459 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5460 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5461 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5462 CGF.EmitVariablyModifiedType(PrivTy); 5463 } 5464 } 5465 Scope.Privatize(); 5466 IPriv = Privates.begin(); 5467 const auto *ILHS = LHSExprs.begin(); 5468 const auto *IRHS = RHSExprs.begin(); 5469 for (const Expr *E : ReductionOps) { 5470 if ((*IPriv)->getType()->isArrayType()) { 5471 // Emit reduction for array section. 5472 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5473 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5474 EmitOMPAggregateReduction( 5475 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5476 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5477 emitReductionCombiner(CGF, E); 5478 }); 5479 } else { 5480 // Emit reduction for array subscript or single variable. 5481 emitReductionCombiner(CGF, E); 5482 } 5483 ++IPriv; 5484 ++ILHS; 5485 ++IRHS; 5486 } 5487 Scope.ForceCleanup(); 5488 CGF.FinishFunction(); 5489 return Fn; 5490 } 5491 5492 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5493 const Expr *ReductionOp, 5494 const Expr *PrivateRef, 5495 const DeclRefExpr *LHS, 5496 const DeclRefExpr *RHS) { 5497 if (PrivateRef->getType()->isArrayType()) { 5498 // Emit reduction for array section. 5499 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5500 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5501 EmitOMPAggregateReduction( 5502 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5503 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5504 emitReductionCombiner(CGF, ReductionOp); 5505 }); 5506 } else { 5507 // Emit reduction for array subscript or single variable. 5508 emitReductionCombiner(CGF, ReductionOp); 5509 } 5510 } 5511 5512 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5513 ArrayRef<const Expr *> Privates, 5514 ArrayRef<const Expr *> LHSExprs, 5515 ArrayRef<const Expr *> RHSExprs, 5516 ArrayRef<const Expr *> ReductionOps, 5517 ReductionOptionsTy Options) { 5518 if (!CGF.HaveInsertPoint()) 5519 return; 5520 5521 bool WithNowait = Options.WithNowait; 5522 bool SimpleReduction = Options.SimpleReduction; 5523 5524 // Next code should be emitted for reduction: 5525 // 5526 // static kmp_critical_name lock = { 0 }; 5527 // 5528 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5529 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5530 // ... 5531 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5532 // *(Type<n>-1*)rhs[<n>-1]); 5533 // } 5534 // 5535 // ... 5536 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5537 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5538 // RedList, reduce_func, &<lock>)) { 5539 // case 1: 5540 // ... 5541 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5542 // ... 5543 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5544 // break; 5545 // case 2: 5546 // ... 5547 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5548 // ... 5549 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5550 // break; 5551 // default:; 5552 // } 5553 // 5554 // if SimpleReduction is true, only the next code is generated: 5555 // ... 5556 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5557 // ... 5558 5559 ASTContext &C = CGM.getContext(); 5560 5561 if (SimpleReduction) { 5562 CodeGenFunction::RunCleanupsScope Scope(CGF); 5563 const auto *IPriv = Privates.begin(); 5564 const auto *ILHS = LHSExprs.begin(); 5565 const auto *IRHS = RHSExprs.begin(); 5566 for (const Expr *E : ReductionOps) { 5567 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5568 cast<DeclRefExpr>(*IRHS)); 5569 ++IPriv; 5570 ++ILHS; 5571 ++IRHS; 5572 } 5573 return; 5574 } 5575 5576 // 1. Build a list of reduction variables. 5577 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5578 auto Size = RHSExprs.size(); 5579 for (const Expr *E : Privates) { 5580 if (E->getType()->isVariablyModifiedType()) 5581 // Reserve place for array size. 5582 ++Size; 5583 } 5584 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5585 QualType ReductionArrayTy = 5586 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5587 /*IndexTypeQuals=*/0); 5588 Address ReductionList = 5589 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5590 const auto *IPriv = Privates.begin(); 5591 unsigned Idx = 0; 5592 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5593 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5594 CGF.Builder.CreateStore( 5595 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5596 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5597 Elem); 5598 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5599 // Store array size. 5600 ++Idx; 5601 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5602 llvm::Value *Size = CGF.Builder.CreateIntCast( 5603 CGF.getVLASize( 5604 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5605 .NumElts, 5606 CGF.SizeTy, /*isSigned=*/false); 5607 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5608 Elem); 5609 } 5610 } 5611 5612 // 2. Emit reduce_func(). 5613 llvm::Function *ReductionFn = 5614 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5615 Privates, LHSExprs, RHSExprs, ReductionOps); 5616 5617 // 3. Create static kmp_critical_name lock = { 0 }; 5618 std::string Name = getName({"reduction"}); 5619 llvm::Value *Lock = getCriticalRegionLock(Name); 5620 5621 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5622 // RedList, reduce_func, &<lock>); 5623 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5624 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5625 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5626 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5627 ReductionList.getPointer(), CGF.VoidPtrTy); 5628 llvm::Value *Args[] = { 5629 IdentTLoc, // ident_t *<loc> 5630 ThreadId, // i32 <gtid> 5631 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5632 ReductionArrayTySize, // size_type sizeof(RedList) 5633 RL, // void *RedList 5634 ReductionFn, // void (*) (void *, void *) <reduce_func> 5635 Lock // kmp_critical_name *&<lock> 5636 }; 5637 llvm::Value *Res = CGF.EmitRuntimeCall( 5638 OMPBuilder.getOrCreateRuntimeFunction( 5639 CGM.getModule(), 5640 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5641 Args); 5642 5643 // 5. Build switch(res) 5644 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5645 llvm::SwitchInst *SwInst = 5646 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5647 5648 // 6. Build case 1: 5649 // ... 5650 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5651 // ... 5652 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5653 // break; 5654 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5655 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5656 CGF.EmitBlock(Case1BB); 5657 5658 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5659 llvm::Value *EndArgs[] = { 5660 IdentTLoc, // ident_t *<loc> 5661 ThreadId, // i32 <gtid> 5662 Lock // kmp_critical_name *&<lock> 5663 }; 5664 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5665 CodeGenFunction &CGF, PrePostActionTy &Action) { 5666 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5667 const auto *IPriv = Privates.begin(); 5668 const auto *ILHS = LHSExprs.begin(); 5669 const auto *IRHS = RHSExprs.begin(); 5670 for (const Expr *E : ReductionOps) { 5671 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5672 cast<DeclRefExpr>(*IRHS)); 5673 ++IPriv; 5674 ++ILHS; 5675 ++IRHS; 5676 } 5677 }; 5678 RegionCodeGenTy RCG(CodeGen); 5679 CommonActionTy Action( 5680 nullptr, llvm::None, 5681 OMPBuilder.getOrCreateRuntimeFunction( 5682 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5683 : OMPRTL___kmpc_end_reduce), 5684 EndArgs); 5685 RCG.setAction(Action); 5686 RCG(CGF); 5687 5688 CGF.EmitBranch(DefaultBB); 5689 5690 // 7. Build case 2: 5691 // ... 5692 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5693 // ... 5694 // break; 5695 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5696 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5697 CGF.EmitBlock(Case2BB); 5698 5699 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5700 CodeGenFunction &CGF, PrePostActionTy &Action) { 5701 const auto *ILHS = LHSExprs.begin(); 5702 const auto *IRHS = RHSExprs.begin(); 5703 const auto *IPriv = Privates.begin(); 5704 for (const Expr *E : ReductionOps) { 5705 const Expr *XExpr = nullptr; 5706 const Expr *EExpr = nullptr; 5707 const Expr *UpExpr = nullptr; 5708 BinaryOperatorKind BO = BO_Comma; 5709 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5710 if (BO->getOpcode() == BO_Assign) { 5711 XExpr = BO->getLHS(); 5712 UpExpr = BO->getRHS(); 5713 } 5714 } 5715 // Try to emit update expression as a simple atomic. 5716 const Expr *RHSExpr = UpExpr; 5717 if (RHSExpr) { 5718 // Analyze RHS part of the whole expression. 5719 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5720 RHSExpr->IgnoreParenImpCasts())) { 5721 // If this is a conditional operator, analyze its condition for 5722 // min/max reduction operator. 5723 RHSExpr = ACO->getCond(); 5724 } 5725 if (const auto *BORHS = 5726 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5727 EExpr = BORHS->getRHS(); 5728 BO = BORHS->getOpcode(); 5729 } 5730 } 5731 if (XExpr) { 5732 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5733 auto &&AtomicRedGen = [BO, VD, 5734 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5735 const Expr *EExpr, const Expr *UpExpr) { 5736 LValue X = CGF.EmitLValue(XExpr); 5737 RValue E; 5738 if (EExpr) 5739 E = CGF.EmitAnyExpr(EExpr); 5740 CGF.EmitOMPAtomicSimpleUpdateExpr( 5741 X, E, BO, /*IsXLHSInRHSPart=*/true, 5742 llvm::AtomicOrdering::Monotonic, Loc, 5743 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5744 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5745 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5746 CGF.emitOMPSimpleStore( 5747 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5748 VD->getType().getNonReferenceType(), Loc); 5749 PrivateScope.addPrivate(VD, LHSTemp); 5750 (void)PrivateScope.Privatize(); 5751 return CGF.EmitAnyExpr(UpExpr); 5752 }); 5753 }; 5754 if ((*IPriv)->getType()->isArrayType()) { 5755 // Emit atomic reduction for array section. 5756 const auto *RHSVar = 5757 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5758 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5759 AtomicRedGen, XExpr, EExpr, UpExpr); 5760 } else { 5761 // Emit atomic reduction for array subscript or single variable. 5762 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5763 } 5764 } else { 5765 // Emit as a critical region. 5766 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5767 const Expr *, const Expr *) { 5768 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5769 std::string Name = RT.getName({"atomic_reduction"}); 5770 RT.emitCriticalRegion( 5771 CGF, Name, 5772 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5773 Action.Enter(CGF); 5774 emitReductionCombiner(CGF, E); 5775 }, 5776 Loc); 5777 }; 5778 if ((*IPriv)->getType()->isArrayType()) { 5779 const auto *LHSVar = 5780 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5781 const auto *RHSVar = 5782 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5783 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5784 CritRedGen); 5785 } else { 5786 CritRedGen(CGF, nullptr, nullptr, nullptr); 5787 } 5788 } 5789 ++ILHS; 5790 ++IRHS; 5791 ++IPriv; 5792 } 5793 }; 5794 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5795 if (!WithNowait) { 5796 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5797 llvm::Value *EndArgs[] = { 5798 IdentTLoc, // ident_t *<loc> 5799 ThreadId, // i32 <gtid> 5800 Lock // kmp_critical_name *&<lock> 5801 }; 5802 CommonActionTy Action(nullptr, llvm::None, 5803 OMPBuilder.getOrCreateRuntimeFunction( 5804 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5805 EndArgs); 5806 AtomicRCG.setAction(Action); 5807 AtomicRCG(CGF); 5808 } else { 5809 AtomicRCG(CGF); 5810 } 5811 5812 CGF.EmitBranch(DefaultBB); 5813 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5814 } 5815 5816 /// Generates unique name for artificial threadprivate variables. 5817 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5818 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5819 const Expr *Ref) { 5820 SmallString<256> Buffer; 5821 llvm::raw_svector_ostream Out(Buffer); 5822 const clang::DeclRefExpr *DE; 5823 const VarDecl *D = ::getBaseDecl(Ref, DE); 5824 if (!D) 5825 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5826 D = D->getCanonicalDecl(); 5827 std::string Name = CGM.getOpenMPRuntime().getName( 5828 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5829 Out << Prefix << Name << "_" 5830 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5831 return std::string(Out.str()); 5832 } 5833 5834 /// Emits reduction initializer function: 5835 /// \code 5836 /// void @.red_init(void* %arg, void* %orig) { 5837 /// %0 = bitcast void* %arg to <type>* 5838 /// store <type> <init>, <type>* %0 5839 /// ret void 5840 /// } 5841 /// \endcode 5842 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5843 SourceLocation Loc, 5844 ReductionCodeGen &RCG, unsigned N) { 5845 ASTContext &C = CGM.getContext(); 5846 QualType VoidPtrTy = C.VoidPtrTy; 5847 VoidPtrTy.addRestrict(); 5848 FunctionArgList Args; 5849 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5850 ImplicitParamDecl::Other); 5851 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5852 ImplicitParamDecl::Other); 5853 Args.emplace_back(&Param); 5854 Args.emplace_back(&ParamOrig); 5855 const auto &FnInfo = 5856 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5857 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5858 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5859 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5860 Name, &CGM.getModule()); 5861 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5862 Fn->setDoesNotRecurse(); 5863 CodeGenFunction CGF(CGM); 5864 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5865 QualType PrivateType = RCG.getPrivateType(N); 5866 Address PrivateAddr = CGF.EmitLoadOfPointer( 5867 CGF.Builder.CreateElementBitCast( 5868 CGF.GetAddrOfLocalVar(&Param), 5869 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5870 C.getPointerType(PrivateType)->castAs<PointerType>()); 5871 llvm::Value *Size = nullptr; 5872 // If the size of the reduction item is non-constant, load it from global 5873 // threadprivate variable. 5874 if (RCG.getSizes(N).second) { 5875 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5876 CGF, CGM.getContext().getSizeType(), 5877 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5878 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5879 CGM.getContext().getSizeType(), Loc); 5880 } 5881 RCG.emitAggregateType(CGF, N, Size); 5882 Address OrigAddr = Address::invalid(); 5883 // If initializer uses initializer from declare reduction construct, emit a 5884 // pointer to the address of the original reduction item (reuired by reduction 5885 // initializer) 5886 if (RCG.usesReductionInitializer(N)) { 5887 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5888 OrigAddr = CGF.EmitLoadOfPointer( 5889 SharedAddr, 5890 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5891 } 5892 // Emit the initializer: 5893 // %0 = bitcast void* %arg to <type>* 5894 // store <type> <init>, <type>* %0 5895 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5896 [](CodeGenFunction &) { return false; }); 5897 CGF.FinishFunction(); 5898 return Fn; 5899 } 5900 5901 /// Emits reduction combiner function: 5902 /// \code 5903 /// void @.red_comb(void* %arg0, void* %arg1) { 5904 /// %lhs = bitcast void* %arg0 to <type>* 5905 /// %rhs = bitcast void* %arg1 to <type>* 5906 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5907 /// store <type> %2, <type>* %lhs 5908 /// ret void 5909 /// } 5910 /// \endcode 5911 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5912 SourceLocation Loc, 5913 ReductionCodeGen &RCG, unsigned N, 5914 const Expr *ReductionOp, 5915 const Expr *LHS, const Expr *RHS, 5916 const Expr *PrivateRef) { 5917 ASTContext &C = CGM.getContext(); 5918 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5919 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5920 FunctionArgList Args; 5921 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5922 C.VoidPtrTy, ImplicitParamDecl::Other); 5923 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5924 ImplicitParamDecl::Other); 5925 Args.emplace_back(&ParamInOut); 5926 Args.emplace_back(&ParamIn); 5927 const auto &FnInfo = 5928 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5929 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5930 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5931 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5932 Name, &CGM.getModule()); 5933 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5934 Fn->setDoesNotRecurse(); 5935 CodeGenFunction CGF(CGM); 5936 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5937 llvm::Value *Size = nullptr; 5938 // If the size of the reduction item is non-constant, load it from global 5939 // threadprivate variable. 5940 if (RCG.getSizes(N).second) { 5941 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5942 CGF, CGM.getContext().getSizeType(), 5943 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5944 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5945 CGM.getContext().getSizeType(), Loc); 5946 } 5947 RCG.emitAggregateType(CGF, N, Size); 5948 // Remap lhs and rhs variables to the addresses of the function arguments. 5949 // %lhs = bitcast void* %arg0 to <type>* 5950 // %rhs = bitcast void* %arg1 to <type>* 5951 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5952 PrivateScope.addPrivate( 5953 LHSVD, 5954 // Pull out the pointer to the variable. 5955 CGF.EmitLoadOfPointer( 5956 CGF.Builder.CreateElementBitCast( 5957 CGF.GetAddrOfLocalVar(&ParamInOut), 5958 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5959 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5960 PrivateScope.addPrivate( 5961 RHSVD, 5962 // Pull out the pointer to the variable. 5963 CGF.EmitLoadOfPointer( 5964 CGF.Builder.CreateElementBitCast( 5965 CGF.GetAddrOfLocalVar(&ParamIn), 5966 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5967 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5968 PrivateScope.Privatize(); 5969 // Emit the combiner body: 5970 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5971 // store <type> %2, <type>* %lhs 5972 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5973 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5974 cast<DeclRefExpr>(RHS)); 5975 CGF.FinishFunction(); 5976 return Fn; 5977 } 5978 5979 /// Emits reduction finalizer function: 5980 /// \code 5981 /// void @.red_fini(void* %arg) { 5982 /// %0 = bitcast void* %arg to <type>* 5983 /// <destroy>(<type>* %0) 5984 /// ret void 5985 /// } 5986 /// \endcode 5987 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5988 SourceLocation Loc, 5989 ReductionCodeGen &RCG, unsigned N) { 5990 if (!RCG.needCleanups(N)) 5991 return nullptr; 5992 ASTContext &C = CGM.getContext(); 5993 FunctionArgList Args; 5994 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5995 ImplicitParamDecl::Other); 5996 Args.emplace_back(&Param); 5997 const auto &FnInfo = 5998 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5999 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6000 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6001 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6002 Name, &CGM.getModule()); 6003 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6004 Fn->setDoesNotRecurse(); 6005 CodeGenFunction CGF(CGM); 6006 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6007 Address PrivateAddr = CGF.EmitLoadOfPointer( 6008 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 6009 llvm::Value *Size = nullptr; 6010 // If the size of the reduction item is non-constant, load it from global 6011 // threadprivate variable. 6012 if (RCG.getSizes(N).second) { 6013 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6014 CGF, CGM.getContext().getSizeType(), 6015 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6016 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6017 CGM.getContext().getSizeType(), Loc); 6018 } 6019 RCG.emitAggregateType(CGF, N, Size); 6020 // Emit the finalizer body: 6021 // <destroy>(<type>* %0) 6022 RCG.emitCleanups(CGF, N, PrivateAddr); 6023 CGF.FinishFunction(Loc); 6024 return Fn; 6025 } 6026 6027 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6028 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6029 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6030 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6031 return nullptr; 6032 6033 // Build typedef struct: 6034 // kmp_taskred_input { 6035 // void *reduce_shar; // shared reduction item 6036 // void *reduce_orig; // original reduction item used for initialization 6037 // size_t reduce_size; // size of data item 6038 // void *reduce_init; // data initialization routine 6039 // void *reduce_fini; // data finalization routine 6040 // void *reduce_comb; // data combiner routine 6041 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6042 // } kmp_taskred_input_t; 6043 ASTContext &C = CGM.getContext(); 6044 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6045 RD->startDefinition(); 6046 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6047 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6048 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6049 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6050 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6051 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6052 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6053 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6054 RD->completeDefinition(); 6055 QualType RDType = C.getRecordType(RD); 6056 unsigned Size = Data.ReductionVars.size(); 6057 llvm::APInt ArraySize(/*numBits=*/64, Size); 6058 QualType ArrayRDType = C.getConstantArrayType( 6059 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6060 // kmp_task_red_input_t .rd_input.[Size]; 6061 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6062 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6063 Data.ReductionCopies, Data.ReductionOps); 6064 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6065 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6066 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6067 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6068 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6069 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6070 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6071 ".rd_input.gep."); 6072 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6073 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6074 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6075 RCG.emitSharedOrigLValue(CGF, Cnt); 6076 llvm::Value *CastedShared = 6077 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6078 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6079 // ElemLVal.reduce_orig = &Origs[Cnt]; 6080 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6081 llvm::Value *CastedOrig = 6082 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6083 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6084 RCG.emitAggregateType(CGF, Cnt); 6085 llvm::Value *SizeValInChars; 6086 llvm::Value *SizeVal; 6087 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6088 // We use delayed creation/initialization for VLAs and array sections. It is 6089 // required because runtime does not provide the way to pass the sizes of 6090 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6091 // threadprivate global variables are used to store these values and use 6092 // them in the functions. 6093 bool DelayedCreation = !!SizeVal; 6094 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6095 /*isSigned=*/false); 6096 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6097 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6098 // ElemLVal.reduce_init = init; 6099 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6100 llvm::Value *InitAddr = 6101 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6102 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6103 // ElemLVal.reduce_fini = fini; 6104 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6105 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6106 llvm::Value *FiniAddr = Fini 6107 ? CGF.EmitCastToVoidPtr(Fini) 6108 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6109 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6110 // ElemLVal.reduce_comb = comb; 6111 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6112 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6113 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6114 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6115 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6116 // ElemLVal.flags = 0; 6117 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6118 if (DelayedCreation) { 6119 CGF.EmitStoreOfScalar( 6120 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6121 FlagsLVal); 6122 } else 6123 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6124 FlagsLVal.getType()); 6125 } 6126 if (Data.IsReductionWithTaskMod) { 6127 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6128 // is_ws, int num, void *data); 6129 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6130 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6131 CGM.IntTy, /*isSigned=*/true); 6132 llvm::Value *Args[] = { 6133 IdentTLoc, GTid, 6134 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6135 /*isSigned=*/true), 6136 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6137 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6138 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6139 return CGF.EmitRuntimeCall( 6140 OMPBuilder.getOrCreateRuntimeFunction( 6141 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6142 Args); 6143 } 6144 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6145 llvm::Value *Args[] = { 6146 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6147 /*isSigned=*/true), 6148 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6149 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6150 CGM.VoidPtrTy)}; 6151 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6152 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6153 Args); 6154 } 6155 6156 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6157 SourceLocation Loc, 6158 bool IsWorksharingReduction) { 6159 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6160 // is_ws, int num, void *data); 6161 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6162 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6163 CGM.IntTy, /*isSigned=*/true); 6164 llvm::Value *Args[] = {IdentTLoc, GTid, 6165 llvm::ConstantInt::get(CGM.IntTy, 6166 IsWorksharingReduction ? 1 : 0, 6167 /*isSigned=*/true)}; 6168 (void)CGF.EmitRuntimeCall( 6169 OMPBuilder.getOrCreateRuntimeFunction( 6170 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6171 Args); 6172 } 6173 6174 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6175 SourceLocation Loc, 6176 ReductionCodeGen &RCG, 6177 unsigned N) { 6178 auto Sizes = RCG.getSizes(N); 6179 // Emit threadprivate global variable if the type is non-constant 6180 // (Sizes.second = nullptr). 6181 if (Sizes.second) { 6182 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6183 /*isSigned=*/false); 6184 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6185 CGF, CGM.getContext().getSizeType(), 6186 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6187 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6188 } 6189 } 6190 6191 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6192 SourceLocation Loc, 6193 llvm::Value *ReductionsPtr, 6194 LValue SharedLVal) { 6195 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6196 // *d); 6197 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6198 CGM.IntTy, 6199 /*isSigned=*/true), 6200 ReductionsPtr, 6201 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6202 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6203 return Address( 6204 CGF.EmitRuntimeCall( 6205 OMPBuilder.getOrCreateRuntimeFunction( 6206 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6207 Args), 6208 CGF.Int8Ty, SharedLVal.getAlignment()); 6209 } 6210 6211 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6212 const OMPTaskDataTy &Data) { 6213 if (!CGF.HaveInsertPoint()) 6214 return; 6215 6216 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6217 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6218 OMPBuilder.createTaskwait(CGF.Builder); 6219 } else { 6220 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6221 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6222 auto &M = CGM.getModule(); 6223 Address DependenciesArray = Address::invalid(); 6224 llvm::Value *NumOfElements; 6225 std::tie(NumOfElements, DependenciesArray) = 6226 emitDependClause(CGF, Data.Dependences, Loc); 6227 llvm::Value *DepWaitTaskArgs[6]; 6228 if (!Data.Dependences.empty()) { 6229 DepWaitTaskArgs[0] = UpLoc; 6230 DepWaitTaskArgs[1] = ThreadID; 6231 DepWaitTaskArgs[2] = NumOfElements; 6232 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6233 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6234 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6235 6236 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6237 6238 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6239 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6240 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6241 // is specified. 6242 CGF.EmitRuntimeCall( 6243 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6244 DepWaitTaskArgs); 6245 6246 } else { 6247 6248 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6249 // global_tid); 6250 llvm::Value *Args[] = {UpLoc, ThreadID}; 6251 // Ignore return result until untied tasks are supported. 6252 CGF.EmitRuntimeCall( 6253 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6254 Args); 6255 } 6256 } 6257 6258 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6259 Region->emitUntiedSwitch(CGF); 6260 } 6261 6262 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6263 OpenMPDirectiveKind InnerKind, 6264 const RegionCodeGenTy &CodeGen, 6265 bool HasCancel) { 6266 if (!CGF.HaveInsertPoint()) 6267 return; 6268 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6269 InnerKind != OMPD_critical && 6270 InnerKind != OMPD_master && 6271 InnerKind != OMPD_masked); 6272 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6273 } 6274 6275 namespace { 6276 enum RTCancelKind { 6277 CancelNoreq = 0, 6278 CancelParallel = 1, 6279 CancelLoop = 2, 6280 CancelSections = 3, 6281 CancelTaskgroup = 4 6282 }; 6283 } // anonymous namespace 6284 6285 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6286 RTCancelKind CancelKind = CancelNoreq; 6287 if (CancelRegion == OMPD_parallel) 6288 CancelKind = CancelParallel; 6289 else if (CancelRegion == OMPD_for) 6290 CancelKind = CancelLoop; 6291 else if (CancelRegion == OMPD_sections) 6292 CancelKind = CancelSections; 6293 else { 6294 assert(CancelRegion == OMPD_taskgroup); 6295 CancelKind = CancelTaskgroup; 6296 } 6297 return CancelKind; 6298 } 6299 6300 void CGOpenMPRuntime::emitCancellationPointCall( 6301 CodeGenFunction &CGF, SourceLocation Loc, 6302 OpenMPDirectiveKind CancelRegion) { 6303 if (!CGF.HaveInsertPoint()) 6304 return; 6305 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6306 // global_tid, kmp_int32 cncl_kind); 6307 if (auto *OMPRegionInfo = 6308 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6309 // For 'cancellation point taskgroup', the task region info may not have a 6310 // cancel. This may instead happen in another adjacent task. 6311 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6312 llvm::Value *Args[] = { 6313 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6314 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6315 // Ignore return result until untied tasks are supported. 6316 llvm::Value *Result = CGF.EmitRuntimeCall( 6317 OMPBuilder.getOrCreateRuntimeFunction( 6318 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6319 Args); 6320 // if (__kmpc_cancellationpoint()) { 6321 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6322 // exit from construct; 6323 // } 6324 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6325 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6326 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6327 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6328 CGF.EmitBlock(ExitBB); 6329 if (CancelRegion == OMPD_parallel) 6330 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6331 // exit from construct; 6332 CodeGenFunction::JumpDest CancelDest = 6333 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6334 CGF.EmitBranchThroughCleanup(CancelDest); 6335 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6336 } 6337 } 6338 } 6339 6340 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6341 const Expr *IfCond, 6342 OpenMPDirectiveKind CancelRegion) { 6343 if (!CGF.HaveInsertPoint()) 6344 return; 6345 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6346 // kmp_int32 cncl_kind); 6347 auto &M = CGM.getModule(); 6348 if (auto *OMPRegionInfo = 6349 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6350 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6351 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6352 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6353 llvm::Value *Args[] = { 6354 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6355 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6356 // Ignore return result until untied tasks are supported. 6357 llvm::Value *Result = CGF.EmitRuntimeCall( 6358 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6359 // if (__kmpc_cancel()) { 6360 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6361 // exit from construct; 6362 // } 6363 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6364 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6365 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6366 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6367 CGF.EmitBlock(ExitBB); 6368 if (CancelRegion == OMPD_parallel) 6369 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6370 // exit from construct; 6371 CodeGenFunction::JumpDest CancelDest = 6372 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6373 CGF.EmitBranchThroughCleanup(CancelDest); 6374 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6375 }; 6376 if (IfCond) { 6377 emitIfClause(CGF, IfCond, ThenGen, 6378 [](CodeGenFunction &, PrePostActionTy &) {}); 6379 } else { 6380 RegionCodeGenTy ThenRCG(ThenGen); 6381 ThenRCG(CGF); 6382 } 6383 } 6384 } 6385 6386 namespace { 6387 /// Cleanup action for uses_allocators support. 6388 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6389 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6390 6391 public: 6392 OMPUsesAllocatorsActionTy( 6393 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6394 : Allocators(Allocators) {} 6395 void Enter(CodeGenFunction &CGF) override { 6396 if (!CGF.HaveInsertPoint()) 6397 return; 6398 for (const auto &AllocatorData : Allocators) { 6399 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6400 CGF, AllocatorData.first, AllocatorData.second); 6401 } 6402 } 6403 void Exit(CodeGenFunction &CGF) override { 6404 if (!CGF.HaveInsertPoint()) 6405 return; 6406 for (const auto &AllocatorData : Allocators) { 6407 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6408 AllocatorData.first); 6409 } 6410 } 6411 }; 6412 } // namespace 6413 6414 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6415 const OMPExecutableDirective &D, StringRef ParentName, 6416 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6417 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6418 assert(!ParentName.empty() && "Invalid target region parent name!"); 6419 HasEmittedTargetRegion = true; 6420 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6421 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6422 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6423 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6424 if (!D.AllocatorTraits) 6425 continue; 6426 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6427 } 6428 } 6429 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6430 CodeGen.setAction(UsesAllocatorAction); 6431 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6432 IsOffloadEntry, CodeGen); 6433 } 6434 6435 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6436 const Expr *Allocator, 6437 const Expr *AllocatorTraits) { 6438 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6439 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6440 // Use default memspace handle. 6441 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6442 llvm::Value *NumTraits = llvm::ConstantInt::get( 6443 CGF.IntTy, cast<ConstantArrayType>( 6444 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6445 ->getSize() 6446 .getLimitedValue()); 6447 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6448 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6449 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6450 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6451 AllocatorTraitsLVal.getBaseInfo(), 6452 AllocatorTraitsLVal.getTBAAInfo()); 6453 llvm::Value *Traits = 6454 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6455 6456 llvm::Value *AllocatorVal = 6457 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6458 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6459 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6460 // Store to allocator. 6461 CGF.EmitVarDecl(*cast<VarDecl>( 6462 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6463 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6464 AllocatorVal = 6465 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6466 Allocator->getType(), Allocator->getExprLoc()); 6467 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6468 } 6469 6470 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6471 const Expr *Allocator) { 6472 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6473 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6474 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6475 llvm::Value *AllocatorVal = 6476 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6477 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6478 CGF.getContext().VoidPtrTy, 6479 Allocator->getExprLoc()); 6480 (void)CGF.EmitRuntimeCall( 6481 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6482 OMPRTL___kmpc_destroy_allocator), 6483 {ThreadId, AllocatorVal}); 6484 } 6485 6486 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6487 const OMPExecutableDirective &D, StringRef ParentName, 6488 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6489 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6490 // Create a unique name for the entry function using the source location 6491 // information of the current target region. The name will be something like: 6492 // 6493 // __omp_offloading_DD_FFFF_PP_lBB 6494 // 6495 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6496 // mangled name of the function that encloses the target region and BB is the 6497 // line number of the target region. 6498 6499 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6500 !CGM.getLangOpts().OpenMPOffloadMandatory; 6501 unsigned DeviceID; 6502 unsigned FileID; 6503 unsigned Line; 6504 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6505 Line); 6506 SmallString<64> EntryFnName; 6507 { 6508 llvm::raw_svector_ostream OS(EntryFnName); 6509 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6510 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6511 } 6512 6513 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6514 6515 CodeGenFunction CGF(CGM, true); 6516 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6517 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6518 6519 if (BuildOutlinedFn) 6520 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6521 6522 // If this target outline function is not an offload entry, we don't need to 6523 // register it. 6524 if (!IsOffloadEntry) 6525 return; 6526 6527 // The target region ID is used by the runtime library to identify the current 6528 // target region, so it only has to be unique and not necessarily point to 6529 // anything. It could be the pointer to the outlined function that implements 6530 // the target region, but we aren't using that so that the compiler doesn't 6531 // need to keep that, and could therefore inline the host function if proven 6532 // worthwhile during optimization. In the other hand, if emitting code for the 6533 // device, the ID has to be the function address so that it can retrieved from 6534 // the offloading entry and launched by the runtime library. We also mark the 6535 // outlined function to have external linkage in case we are emitting code for 6536 // the device, because these functions will be entry points to the device. 6537 6538 if (CGM.getLangOpts().OpenMPIsDevice) { 6539 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6540 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6541 OutlinedFn->setDSOLocal(false); 6542 if (CGM.getTriple().isAMDGCN()) 6543 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6544 } else { 6545 std::string Name = getName({EntryFnName, "region_id"}); 6546 OutlinedFnID = new llvm::GlobalVariable( 6547 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6548 llvm::GlobalValue::WeakAnyLinkage, 6549 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6550 } 6551 6552 // If we do not allow host fallback we still need a named address to use. 6553 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6554 if (!BuildOutlinedFn) { 6555 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6556 "Named kernel already exists?"); 6557 TargetRegionEntryAddr = new llvm::GlobalVariable( 6558 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6559 llvm::GlobalValue::InternalLinkage, 6560 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6561 } 6562 6563 // Register the information for the entry associated with this target region. 6564 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6565 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6566 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6567 6568 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6569 int32_t DefaultValTeams = -1; 6570 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6571 if (DefaultValTeams > 0 && OutlinedFn) { 6572 OutlinedFn->addFnAttr("omp_target_num_teams", 6573 std::to_string(DefaultValTeams)); 6574 } 6575 int32_t DefaultValThreads = -1; 6576 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6577 if (DefaultValThreads > 0 && OutlinedFn) { 6578 OutlinedFn->addFnAttr("omp_target_thread_limit", 6579 std::to_string(DefaultValThreads)); 6580 } 6581 6582 if (BuildOutlinedFn) 6583 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6584 } 6585 6586 /// Checks if the expression is constant or does not have non-trivial function 6587 /// calls. 6588 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6589 // We can skip constant expressions. 6590 // We can skip expressions with trivial calls or simple expressions. 6591 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6592 !E->hasNonTrivialCall(Ctx)) && 6593 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6594 } 6595 6596 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6597 const Stmt *Body) { 6598 const Stmt *Child = Body->IgnoreContainers(); 6599 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6600 Child = nullptr; 6601 for (const Stmt *S : C->body()) { 6602 if (const auto *E = dyn_cast<Expr>(S)) { 6603 if (isTrivial(Ctx, E)) 6604 continue; 6605 } 6606 // Some of the statements can be ignored. 6607 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6608 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6609 continue; 6610 // Analyze declarations. 6611 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6612 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6613 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6614 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6615 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6616 isa<UsingDirectiveDecl>(D) || 6617 isa<OMPDeclareReductionDecl>(D) || 6618 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6619 return true; 6620 const auto *VD = dyn_cast<VarDecl>(D); 6621 if (!VD) 6622 return false; 6623 return VD->hasGlobalStorage() || !VD->isUsed(); 6624 })) 6625 continue; 6626 } 6627 // Found multiple children - cannot get the one child only. 6628 if (Child) 6629 return nullptr; 6630 Child = S; 6631 } 6632 if (Child) 6633 Child = Child->IgnoreContainers(); 6634 } 6635 return Child; 6636 } 6637 6638 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6639 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6640 int32_t &DefaultVal) { 6641 6642 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6643 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6644 "Expected target-based executable directive."); 6645 switch (DirectiveKind) { 6646 case OMPD_target: { 6647 const auto *CS = D.getInnermostCapturedStmt(); 6648 const auto *Body = 6649 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6650 const Stmt *ChildStmt = 6651 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6652 if (const auto *NestedDir = 6653 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6654 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6655 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6656 const Expr *NumTeams = 6657 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6658 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6659 if (auto Constant = 6660 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6661 DefaultVal = Constant->getExtValue(); 6662 return NumTeams; 6663 } 6664 DefaultVal = 0; 6665 return nullptr; 6666 } 6667 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6668 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6669 DefaultVal = 1; 6670 return nullptr; 6671 } 6672 DefaultVal = 1; 6673 return nullptr; 6674 } 6675 // A value of -1 is used to check if we need to emit no teams region 6676 DefaultVal = -1; 6677 return nullptr; 6678 } 6679 case OMPD_target_teams: 6680 case OMPD_target_teams_distribute: 6681 case OMPD_target_teams_distribute_simd: 6682 case OMPD_target_teams_distribute_parallel_for: 6683 case OMPD_target_teams_distribute_parallel_for_simd: { 6684 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6685 const Expr *NumTeams = 6686 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6687 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6688 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6689 DefaultVal = Constant->getExtValue(); 6690 return NumTeams; 6691 } 6692 DefaultVal = 0; 6693 return nullptr; 6694 } 6695 case OMPD_target_parallel: 6696 case OMPD_target_parallel_for: 6697 case OMPD_target_parallel_for_simd: 6698 case OMPD_target_simd: 6699 DefaultVal = 1; 6700 return nullptr; 6701 case OMPD_parallel: 6702 case OMPD_for: 6703 case OMPD_parallel_for: 6704 case OMPD_parallel_master: 6705 case OMPD_parallel_sections: 6706 case OMPD_for_simd: 6707 case OMPD_parallel_for_simd: 6708 case OMPD_cancel: 6709 case OMPD_cancellation_point: 6710 case OMPD_ordered: 6711 case OMPD_threadprivate: 6712 case OMPD_allocate: 6713 case OMPD_task: 6714 case OMPD_simd: 6715 case OMPD_tile: 6716 case OMPD_unroll: 6717 case OMPD_sections: 6718 case OMPD_section: 6719 case OMPD_single: 6720 case OMPD_master: 6721 case OMPD_critical: 6722 case OMPD_taskyield: 6723 case OMPD_barrier: 6724 case OMPD_taskwait: 6725 case OMPD_taskgroup: 6726 case OMPD_atomic: 6727 case OMPD_flush: 6728 case OMPD_depobj: 6729 case OMPD_scan: 6730 case OMPD_teams: 6731 case OMPD_target_data: 6732 case OMPD_target_exit_data: 6733 case OMPD_target_enter_data: 6734 case OMPD_distribute: 6735 case OMPD_distribute_simd: 6736 case OMPD_distribute_parallel_for: 6737 case OMPD_distribute_parallel_for_simd: 6738 case OMPD_teams_distribute: 6739 case OMPD_teams_distribute_simd: 6740 case OMPD_teams_distribute_parallel_for: 6741 case OMPD_teams_distribute_parallel_for_simd: 6742 case OMPD_target_update: 6743 case OMPD_declare_simd: 6744 case OMPD_declare_variant: 6745 case OMPD_begin_declare_variant: 6746 case OMPD_end_declare_variant: 6747 case OMPD_declare_target: 6748 case OMPD_end_declare_target: 6749 case OMPD_declare_reduction: 6750 case OMPD_declare_mapper: 6751 case OMPD_taskloop: 6752 case OMPD_taskloop_simd: 6753 case OMPD_master_taskloop: 6754 case OMPD_master_taskloop_simd: 6755 case OMPD_parallel_master_taskloop: 6756 case OMPD_parallel_master_taskloop_simd: 6757 case OMPD_requires: 6758 case OMPD_metadirective: 6759 case OMPD_unknown: 6760 break; 6761 default: 6762 break; 6763 } 6764 llvm_unreachable("Unexpected directive kind."); 6765 } 6766 6767 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6768 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6769 assert(!CGF.getLangOpts().OpenMPIsDevice && 6770 "Clauses associated with the teams directive expected to be emitted " 6771 "only for the host!"); 6772 CGBuilderTy &Bld = CGF.Builder; 6773 int32_t DefaultNT = -1; 6774 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6775 if (NumTeams != nullptr) { 6776 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6777 6778 switch (DirectiveKind) { 6779 case OMPD_target: { 6780 const auto *CS = D.getInnermostCapturedStmt(); 6781 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6782 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6783 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6784 /*IgnoreResultAssign*/ true); 6785 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6786 /*isSigned=*/true); 6787 } 6788 case OMPD_target_teams: 6789 case OMPD_target_teams_distribute: 6790 case OMPD_target_teams_distribute_simd: 6791 case OMPD_target_teams_distribute_parallel_for: 6792 case OMPD_target_teams_distribute_parallel_for_simd: { 6793 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6794 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6795 /*IgnoreResultAssign*/ true); 6796 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6797 /*isSigned=*/true); 6798 } 6799 default: 6800 break; 6801 } 6802 } else if (DefaultNT == -1) { 6803 return nullptr; 6804 } 6805 6806 return Bld.getInt32(DefaultNT); 6807 } 6808 6809 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6810 llvm::Value *DefaultThreadLimitVal) { 6811 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6812 CGF.getContext(), CS->getCapturedStmt()); 6813 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6814 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6815 llvm::Value *NumThreads = nullptr; 6816 llvm::Value *CondVal = nullptr; 6817 // Handle if clause. If if clause present, the number of threads is 6818 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6819 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6820 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6821 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6822 const OMPIfClause *IfClause = nullptr; 6823 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6824 if (C->getNameModifier() == OMPD_unknown || 6825 C->getNameModifier() == OMPD_parallel) { 6826 IfClause = C; 6827 break; 6828 } 6829 } 6830 if (IfClause) { 6831 const Expr *Cond = IfClause->getCondition(); 6832 bool Result; 6833 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6834 if (!Result) 6835 return CGF.Builder.getInt32(1); 6836 } else { 6837 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6838 if (const auto *PreInit = 6839 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6840 for (const auto *I : PreInit->decls()) { 6841 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6842 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6843 } else { 6844 CodeGenFunction::AutoVarEmission Emission = 6845 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6846 CGF.EmitAutoVarCleanups(Emission); 6847 } 6848 } 6849 } 6850 CondVal = CGF.EvaluateExprAsBool(Cond); 6851 } 6852 } 6853 } 6854 // Check the value of num_threads clause iff if clause was not specified 6855 // or is not evaluated to false. 6856 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6857 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6858 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6859 const auto *NumThreadsClause = 6860 Dir->getSingleClause<OMPNumThreadsClause>(); 6861 CodeGenFunction::LexicalScope Scope( 6862 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6863 if (const auto *PreInit = 6864 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6865 for (const auto *I : PreInit->decls()) { 6866 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6867 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6868 } else { 6869 CodeGenFunction::AutoVarEmission Emission = 6870 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6871 CGF.EmitAutoVarCleanups(Emission); 6872 } 6873 } 6874 } 6875 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6876 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6877 /*isSigned=*/false); 6878 if (DefaultThreadLimitVal) 6879 NumThreads = CGF.Builder.CreateSelect( 6880 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6881 DefaultThreadLimitVal, NumThreads); 6882 } else { 6883 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6884 : CGF.Builder.getInt32(0); 6885 } 6886 // Process condition of the if clause. 6887 if (CondVal) { 6888 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6889 CGF.Builder.getInt32(1)); 6890 } 6891 return NumThreads; 6892 } 6893 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6894 return CGF.Builder.getInt32(1); 6895 return DefaultThreadLimitVal; 6896 } 6897 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6898 : CGF.Builder.getInt32(0); 6899 } 6900 6901 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6902 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6903 int32_t &DefaultVal) { 6904 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6905 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6906 "Expected target-based executable directive."); 6907 6908 switch (DirectiveKind) { 6909 case OMPD_target: 6910 // Teams have no clause thread_limit 6911 return nullptr; 6912 case OMPD_target_teams: 6913 case OMPD_target_teams_distribute: 6914 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6915 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6916 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6917 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6918 if (auto Constant = 6919 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6920 DefaultVal = Constant->getExtValue(); 6921 return ThreadLimit; 6922 } 6923 return nullptr; 6924 case OMPD_target_parallel: 6925 case OMPD_target_parallel_for: 6926 case OMPD_target_parallel_for_simd: 6927 case OMPD_target_teams_distribute_parallel_for: 6928 case OMPD_target_teams_distribute_parallel_for_simd: { 6929 Expr *ThreadLimit = nullptr; 6930 Expr *NumThreads = nullptr; 6931 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6932 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6933 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6934 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6935 if (auto Constant = 6936 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6937 DefaultVal = Constant->getExtValue(); 6938 } 6939 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6940 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6941 NumThreads = NumThreadsClause->getNumThreads(); 6942 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6943 if (auto Constant = 6944 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6945 if (Constant->getExtValue() < DefaultVal) { 6946 DefaultVal = Constant->getExtValue(); 6947 ThreadLimit = NumThreads; 6948 } 6949 } 6950 } 6951 } 6952 return ThreadLimit; 6953 } 6954 case OMPD_target_teams_distribute_simd: 6955 case OMPD_target_simd: 6956 DefaultVal = 1; 6957 return nullptr; 6958 case OMPD_parallel: 6959 case OMPD_for: 6960 case OMPD_parallel_for: 6961 case OMPD_parallel_master: 6962 case OMPD_parallel_sections: 6963 case OMPD_for_simd: 6964 case OMPD_parallel_for_simd: 6965 case OMPD_cancel: 6966 case OMPD_cancellation_point: 6967 case OMPD_ordered: 6968 case OMPD_threadprivate: 6969 case OMPD_allocate: 6970 case OMPD_task: 6971 case OMPD_simd: 6972 case OMPD_tile: 6973 case OMPD_unroll: 6974 case OMPD_sections: 6975 case OMPD_section: 6976 case OMPD_single: 6977 case OMPD_master: 6978 case OMPD_critical: 6979 case OMPD_taskyield: 6980 case OMPD_barrier: 6981 case OMPD_taskwait: 6982 case OMPD_taskgroup: 6983 case OMPD_atomic: 6984 case OMPD_flush: 6985 case OMPD_depobj: 6986 case OMPD_scan: 6987 case OMPD_teams: 6988 case OMPD_target_data: 6989 case OMPD_target_exit_data: 6990 case OMPD_target_enter_data: 6991 case OMPD_distribute: 6992 case OMPD_distribute_simd: 6993 case OMPD_distribute_parallel_for: 6994 case OMPD_distribute_parallel_for_simd: 6995 case OMPD_teams_distribute: 6996 case OMPD_teams_distribute_simd: 6997 case OMPD_teams_distribute_parallel_for: 6998 case OMPD_teams_distribute_parallel_for_simd: 6999 case OMPD_target_update: 7000 case OMPD_declare_simd: 7001 case OMPD_declare_variant: 7002 case OMPD_begin_declare_variant: 7003 case OMPD_end_declare_variant: 7004 case OMPD_declare_target: 7005 case OMPD_end_declare_target: 7006 case OMPD_declare_reduction: 7007 case OMPD_declare_mapper: 7008 case OMPD_taskloop: 7009 case OMPD_taskloop_simd: 7010 case OMPD_master_taskloop: 7011 case OMPD_master_taskloop_simd: 7012 case OMPD_parallel_master_taskloop: 7013 case OMPD_parallel_master_taskloop_simd: 7014 case OMPD_requires: 7015 case OMPD_unknown: 7016 break; 7017 default: 7018 break; 7019 } 7020 llvm_unreachable("Unsupported directive kind."); 7021 } 7022 7023 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7024 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7025 assert(!CGF.getLangOpts().OpenMPIsDevice && 7026 "Clauses associated with the teams directive expected to be emitted " 7027 "only for the host!"); 7028 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7029 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7030 "Expected target-based executable directive."); 7031 CGBuilderTy &Bld = CGF.Builder; 7032 llvm::Value *ThreadLimitVal = nullptr; 7033 llvm::Value *NumThreadsVal = nullptr; 7034 switch (DirectiveKind) { 7035 case OMPD_target: { 7036 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7037 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7038 return NumThreads; 7039 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7040 CGF.getContext(), CS->getCapturedStmt()); 7041 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7042 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7043 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7044 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7045 const auto *ThreadLimitClause = 7046 Dir->getSingleClause<OMPThreadLimitClause>(); 7047 CodeGenFunction::LexicalScope Scope( 7048 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7049 if (const auto *PreInit = 7050 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7051 for (const auto *I : PreInit->decls()) { 7052 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7053 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7054 } else { 7055 CodeGenFunction::AutoVarEmission Emission = 7056 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7057 CGF.EmitAutoVarCleanups(Emission); 7058 } 7059 } 7060 } 7061 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7062 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7063 ThreadLimitVal = 7064 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7065 } 7066 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7067 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7068 CS = Dir->getInnermostCapturedStmt(); 7069 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7070 CGF.getContext(), CS->getCapturedStmt()); 7071 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7072 } 7073 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7074 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7075 CS = Dir->getInnermostCapturedStmt(); 7076 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7077 return NumThreads; 7078 } 7079 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7080 return Bld.getInt32(1); 7081 } 7082 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7083 } 7084 case OMPD_target_teams: { 7085 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7086 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7087 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7088 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7089 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7090 ThreadLimitVal = 7091 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7092 } 7093 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7094 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7095 return NumThreads; 7096 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7097 CGF.getContext(), CS->getCapturedStmt()); 7098 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7099 if (Dir->getDirectiveKind() == OMPD_distribute) { 7100 CS = Dir->getInnermostCapturedStmt(); 7101 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7102 return NumThreads; 7103 } 7104 } 7105 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7106 } 7107 case OMPD_target_teams_distribute: 7108 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7109 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7110 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7111 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7112 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7113 ThreadLimitVal = 7114 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7115 } 7116 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7117 case OMPD_target_parallel: 7118 case OMPD_target_parallel_for: 7119 case OMPD_target_parallel_for_simd: 7120 case OMPD_target_teams_distribute_parallel_for: 7121 case OMPD_target_teams_distribute_parallel_for_simd: { 7122 llvm::Value *CondVal = nullptr; 7123 // Handle if clause. If if clause present, the number of threads is 7124 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7125 if (D.hasClausesOfKind<OMPIfClause>()) { 7126 const OMPIfClause *IfClause = nullptr; 7127 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7128 if (C->getNameModifier() == OMPD_unknown || 7129 C->getNameModifier() == OMPD_parallel) { 7130 IfClause = C; 7131 break; 7132 } 7133 } 7134 if (IfClause) { 7135 const Expr *Cond = IfClause->getCondition(); 7136 bool Result; 7137 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7138 if (!Result) 7139 return Bld.getInt32(1); 7140 } else { 7141 CodeGenFunction::RunCleanupsScope Scope(CGF); 7142 CondVal = CGF.EvaluateExprAsBool(Cond); 7143 } 7144 } 7145 } 7146 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7147 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7148 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7149 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7150 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7151 ThreadLimitVal = 7152 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7153 } 7154 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7155 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7156 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7157 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7158 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7159 NumThreadsVal = 7160 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7161 ThreadLimitVal = ThreadLimitVal 7162 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7163 ThreadLimitVal), 7164 NumThreadsVal, ThreadLimitVal) 7165 : NumThreadsVal; 7166 } 7167 if (!ThreadLimitVal) 7168 ThreadLimitVal = Bld.getInt32(0); 7169 if (CondVal) 7170 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7171 return ThreadLimitVal; 7172 } 7173 case OMPD_target_teams_distribute_simd: 7174 case OMPD_target_simd: 7175 return Bld.getInt32(1); 7176 case OMPD_parallel: 7177 case OMPD_for: 7178 case OMPD_parallel_for: 7179 case OMPD_parallel_master: 7180 case OMPD_parallel_sections: 7181 case OMPD_for_simd: 7182 case OMPD_parallel_for_simd: 7183 case OMPD_cancel: 7184 case OMPD_cancellation_point: 7185 case OMPD_ordered: 7186 case OMPD_threadprivate: 7187 case OMPD_allocate: 7188 case OMPD_task: 7189 case OMPD_simd: 7190 case OMPD_tile: 7191 case OMPD_unroll: 7192 case OMPD_sections: 7193 case OMPD_section: 7194 case OMPD_single: 7195 case OMPD_master: 7196 case OMPD_critical: 7197 case OMPD_taskyield: 7198 case OMPD_barrier: 7199 case OMPD_taskwait: 7200 case OMPD_taskgroup: 7201 case OMPD_atomic: 7202 case OMPD_flush: 7203 case OMPD_depobj: 7204 case OMPD_scan: 7205 case OMPD_teams: 7206 case OMPD_target_data: 7207 case OMPD_target_exit_data: 7208 case OMPD_target_enter_data: 7209 case OMPD_distribute: 7210 case OMPD_distribute_simd: 7211 case OMPD_distribute_parallel_for: 7212 case OMPD_distribute_parallel_for_simd: 7213 case OMPD_teams_distribute: 7214 case OMPD_teams_distribute_simd: 7215 case OMPD_teams_distribute_parallel_for: 7216 case OMPD_teams_distribute_parallel_for_simd: 7217 case OMPD_target_update: 7218 case OMPD_declare_simd: 7219 case OMPD_declare_variant: 7220 case OMPD_begin_declare_variant: 7221 case OMPD_end_declare_variant: 7222 case OMPD_declare_target: 7223 case OMPD_end_declare_target: 7224 case OMPD_declare_reduction: 7225 case OMPD_declare_mapper: 7226 case OMPD_taskloop: 7227 case OMPD_taskloop_simd: 7228 case OMPD_master_taskloop: 7229 case OMPD_master_taskloop_simd: 7230 case OMPD_parallel_master_taskloop: 7231 case OMPD_parallel_master_taskloop_simd: 7232 case OMPD_requires: 7233 case OMPD_metadirective: 7234 case OMPD_unknown: 7235 break; 7236 default: 7237 break; 7238 } 7239 llvm_unreachable("Unsupported directive kind."); 7240 } 7241 7242 namespace { 7243 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7244 7245 // Utility to handle information from clauses associated with a given 7246 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7247 // It provides a convenient interface to obtain the information and generate 7248 // code for that information. 7249 class MappableExprsHandler { 7250 public: 7251 /// Values for bit flags used to specify the mapping type for 7252 /// offloading. 7253 enum OpenMPOffloadMappingFlags : uint64_t { 7254 /// No flags 7255 OMP_MAP_NONE = 0x0, 7256 /// Allocate memory on the device and move data from host to device. 7257 OMP_MAP_TO = 0x01, 7258 /// Allocate memory on the device and move data from device to host. 7259 OMP_MAP_FROM = 0x02, 7260 /// Always perform the requested mapping action on the element, even 7261 /// if it was already mapped before. 7262 OMP_MAP_ALWAYS = 0x04, 7263 /// Delete the element from the device environment, ignoring the 7264 /// current reference count associated with the element. 7265 OMP_MAP_DELETE = 0x08, 7266 /// The element being mapped is a pointer-pointee pair; both the 7267 /// pointer and the pointee should be mapped. 7268 OMP_MAP_PTR_AND_OBJ = 0x10, 7269 /// This flags signals that the base address of an entry should be 7270 /// passed to the target kernel as an argument. 7271 OMP_MAP_TARGET_PARAM = 0x20, 7272 /// Signal that the runtime library has to return the device pointer 7273 /// in the current position for the data being mapped. Used when we have the 7274 /// use_device_ptr or use_device_addr clause. 7275 OMP_MAP_RETURN_PARAM = 0x40, 7276 /// This flag signals that the reference being passed is a pointer to 7277 /// private data. 7278 OMP_MAP_PRIVATE = 0x80, 7279 /// Pass the element to the device by value. 7280 OMP_MAP_LITERAL = 0x100, 7281 /// Implicit map 7282 OMP_MAP_IMPLICIT = 0x200, 7283 /// Close is a hint to the runtime to allocate memory close to 7284 /// the target device. 7285 OMP_MAP_CLOSE = 0x400, 7286 /// 0x800 is reserved for compatibility with XLC. 7287 /// Produce a runtime error if the data is not already allocated. 7288 OMP_MAP_PRESENT = 0x1000, 7289 // Increment and decrement a separate reference counter so that the data 7290 // cannot be unmapped within the associated region. Thus, this flag is 7291 // intended to be used on 'target' and 'target data' directives because they 7292 // are inherently structured. It is not intended to be used on 'target 7293 // enter data' and 'target exit data' directives because they are inherently 7294 // dynamic. 7295 // This is an OpenMP extension for the sake of OpenACC support. 7296 OMP_MAP_OMPX_HOLD = 0x2000, 7297 /// Signal that the runtime library should use args as an array of 7298 /// descriptor_dim pointers and use args_size as dims. Used when we have 7299 /// non-contiguous list items in target update directive 7300 OMP_MAP_NON_CONTIG = 0x100000000000, 7301 /// The 16 MSBs of the flags indicate whether the entry is member of some 7302 /// struct/class. 7303 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7304 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7305 }; 7306 7307 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7308 static unsigned getFlagMemberOffset() { 7309 unsigned Offset = 0; 7310 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7311 Remain = Remain >> 1) 7312 Offset++; 7313 return Offset; 7314 } 7315 7316 /// Class that holds debugging information for a data mapping to be passed to 7317 /// the runtime library. 7318 class MappingExprInfo { 7319 /// The variable declaration used for the data mapping. 7320 const ValueDecl *MapDecl = nullptr; 7321 /// The original expression used in the map clause, or null if there is 7322 /// none. 7323 const Expr *MapExpr = nullptr; 7324 7325 public: 7326 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7327 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7328 7329 const ValueDecl *getMapDecl() const { return MapDecl; } 7330 const Expr *getMapExpr() const { return MapExpr; } 7331 }; 7332 7333 /// Class that associates information with a base pointer to be passed to the 7334 /// runtime library. 7335 class BasePointerInfo { 7336 /// The base pointer. 7337 llvm::Value *Ptr = nullptr; 7338 /// The base declaration that refers to this device pointer, or null if 7339 /// there is none. 7340 const ValueDecl *DevPtrDecl = nullptr; 7341 7342 public: 7343 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7344 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7345 llvm::Value *operator*() const { return Ptr; } 7346 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7347 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7348 }; 7349 7350 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7351 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7352 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7353 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7354 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7355 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7356 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7357 7358 /// This structure contains combined information generated for mappable 7359 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7360 /// mappers, and non-contiguous information. 7361 struct MapCombinedInfoTy { 7362 struct StructNonContiguousInfo { 7363 bool IsNonContiguous = false; 7364 MapDimArrayTy Dims; 7365 MapNonContiguousArrayTy Offsets; 7366 MapNonContiguousArrayTy Counts; 7367 MapNonContiguousArrayTy Strides; 7368 }; 7369 MapExprsArrayTy Exprs; 7370 MapBaseValuesArrayTy BasePointers; 7371 MapValuesArrayTy Pointers; 7372 MapValuesArrayTy Sizes; 7373 MapFlagsArrayTy Types; 7374 MapMappersArrayTy Mappers; 7375 StructNonContiguousInfo NonContigInfo; 7376 7377 /// Append arrays in \a CurInfo. 7378 void append(MapCombinedInfoTy &CurInfo) { 7379 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7380 BasePointers.append(CurInfo.BasePointers.begin(), 7381 CurInfo.BasePointers.end()); 7382 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7383 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7384 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7385 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7386 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7387 CurInfo.NonContigInfo.Dims.end()); 7388 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7389 CurInfo.NonContigInfo.Offsets.end()); 7390 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7391 CurInfo.NonContigInfo.Counts.end()); 7392 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7393 CurInfo.NonContigInfo.Strides.end()); 7394 } 7395 }; 7396 7397 /// Map between a struct and the its lowest & highest elements which have been 7398 /// mapped. 7399 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7400 /// HE(FieldIndex, Pointer)} 7401 struct StructRangeInfoTy { 7402 MapCombinedInfoTy PreliminaryMapData; 7403 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7404 0, Address::invalid()}; 7405 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7406 0, Address::invalid()}; 7407 Address Base = Address::invalid(); 7408 Address LB = Address::invalid(); 7409 bool IsArraySection = false; 7410 bool HasCompleteRecord = false; 7411 }; 7412 7413 private: 7414 /// Kind that defines how a device pointer has to be returned. 7415 struct MapInfo { 7416 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7417 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7418 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7419 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7420 bool ReturnDevicePointer = false; 7421 bool IsImplicit = false; 7422 const ValueDecl *Mapper = nullptr; 7423 const Expr *VarRef = nullptr; 7424 bool ForDeviceAddr = false; 7425 7426 MapInfo() = default; 7427 MapInfo( 7428 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7429 OpenMPMapClauseKind MapType, 7430 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7431 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7432 bool ReturnDevicePointer, bool IsImplicit, 7433 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7434 bool ForDeviceAddr = false) 7435 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7436 MotionModifiers(MotionModifiers), 7437 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7438 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7439 }; 7440 7441 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7442 /// member and there is no map information about it, then emission of that 7443 /// entry is deferred until the whole struct has been processed. 7444 struct DeferredDevicePtrEntryTy { 7445 const Expr *IE = nullptr; 7446 const ValueDecl *VD = nullptr; 7447 bool ForDeviceAddr = false; 7448 7449 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7450 bool ForDeviceAddr) 7451 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7452 }; 7453 7454 /// The target directive from where the mappable clauses were extracted. It 7455 /// is either a executable directive or a user-defined mapper directive. 7456 llvm::PointerUnion<const OMPExecutableDirective *, 7457 const OMPDeclareMapperDecl *> 7458 CurDir; 7459 7460 /// Function the directive is being generated for. 7461 CodeGenFunction &CGF; 7462 7463 /// Set of all first private variables in the current directive. 7464 /// bool data is set to true if the variable is implicitly marked as 7465 /// firstprivate, false otherwise. 7466 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7467 7468 /// Map between device pointer declarations and their expression components. 7469 /// The key value for declarations in 'this' is null. 7470 llvm::DenseMap< 7471 const ValueDecl *, 7472 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7473 DevPointersMap; 7474 7475 /// Map between lambda declarations and their map type. 7476 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7477 7478 llvm::Value *getExprTypeSize(const Expr *E) const { 7479 QualType ExprTy = E->getType().getCanonicalType(); 7480 7481 // Calculate the size for array shaping expression. 7482 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7483 llvm::Value *Size = 7484 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7485 for (const Expr *SE : OAE->getDimensions()) { 7486 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7487 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7488 CGF.getContext().getSizeType(), 7489 SE->getExprLoc()); 7490 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7491 } 7492 return Size; 7493 } 7494 7495 // Reference types are ignored for mapping purposes. 7496 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7497 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7498 7499 // Given that an array section is considered a built-in type, we need to 7500 // do the calculation based on the length of the section instead of relying 7501 // on CGF.getTypeSize(E->getType()). 7502 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7503 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7504 OAE->getBase()->IgnoreParenImpCasts()) 7505 .getCanonicalType(); 7506 7507 // If there is no length associated with the expression and lower bound is 7508 // not specified too, that means we are using the whole length of the 7509 // base. 7510 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7511 !OAE->getLowerBound()) 7512 return CGF.getTypeSize(BaseTy); 7513 7514 llvm::Value *ElemSize; 7515 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7516 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7517 } else { 7518 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7519 assert(ATy && "Expecting array type if not a pointer type."); 7520 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7521 } 7522 7523 // If we don't have a length at this point, that is because we have an 7524 // array section with a single element. 7525 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7526 return ElemSize; 7527 7528 if (const Expr *LenExpr = OAE->getLength()) { 7529 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7530 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7531 CGF.getContext().getSizeType(), 7532 LenExpr->getExprLoc()); 7533 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7534 } 7535 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7536 OAE->getLowerBound() && "expected array_section[lb:]."); 7537 // Size = sizetype - lb * elemtype; 7538 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7539 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7540 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7541 CGF.getContext().getSizeType(), 7542 OAE->getLowerBound()->getExprLoc()); 7543 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7544 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7545 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7546 LengthVal = CGF.Builder.CreateSelect( 7547 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7548 return LengthVal; 7549 } 7550 return CGF.getTypeSize(ExprTy); 7551 } 7552 7553 /// Return the corresponding bits for a given map clause modifier. Add 7554 /// a flag marking the map as a pointer if requested. Add a flag marking the 7555 /// map as the first one of a series of maps that relate to the same map 7556 /// expression. 7557 OpenMPOffloadMappingFlags getMapTypeBits( 7558 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7559 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7560 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7561 OpenMPOffloadMappingFlags Bits = 7562 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7563 switch (MapType) { 7564 case OMPC_MAP_alloc: 7565 case OMPC_MAP_release: 7566 // alloc and release is the default behavior in the runtime library, i.e. 7567 // if we don't pass any bits alloc/release that is what the runtime is 7568 // going to do. Therefore, we don't need to signal anything for these two 7569 // type modifiers. 7570 break; 7571 case OMPC_MAP_to: 7572 Bits |= OMP_MAP_TO; 7573 break; 7574 case OMPC_MAP_from: 7575 Bits |= OMP_MAP_FROM; 7576 break; 7577 case OMPC_MAP_tofrom: 7578 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7579 break; 7580 case OMPC_MAP_delete: 7581 Bits |= OMP_MAP_DELETE; 7582 break; 7583 case OMPC_MAP_unknown: 7584 llvm_unreachable("Unexpected map type!"); 7585 } 7586 if (AddPtrFlag) 7587 Bits |= OMP_MAP_PTR_AND_OBJ; 7588 if (AddIsTargetParamFlag) 7589 Bits |= OMP_MAP_TARGET_PARAM; 7590 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7591 Bits |= OMP_MAP_ALWAYS; 7592 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7593 Bits |= OMP_MAP_CLOSE; 7594 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7595 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7596 Bits |= OMP_MAP_PRESENT; 7597 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7598 Bits |= OMP_MAP_OMPX_HOLD; 7599 if (IsNonContiguous) 7600 Bits |= OMP_MAP_NON_CONTIG; 7601 return Bits; 7602 } 7603 7604 /// Return true if the provided expression is a final array section. A 7605 /// final array section, is one whose length can't be proved to be one. 7606 bool isFinalArraySectionExpression(const Expr *E) const { 7607 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7608 7609 // It is not an array section and therefore not a unity-size one. 7610 if (!OASE) 7611 return false; 7612 7613 // An array section with no colon always refer to a single element. 7614 if (OASE->getColonLocFirst().isInvalid()) 7615 return false; 7616 7617 const Expr *Length = OASE->getLength(); 7618 7619 // If we don't have a length we have to check if the array has size 1 7620 // for this dimension. Also, we should always expect a length if the 7621 // base type is pointer. 7622 if (!Length) { 7623 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7624 OASE->getBase()->IgnoreParenImpCasts()) 7625 .getCanonicalType(); 7626 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7627 return ATy->getSize().getSExtValue() != 1; 7628 // If we don't have a constant dimension length, we have to consider 7629 // the current section as having any size, so it is not necessarily 7630 // unitary. If it happen to be unity size, that's user fault. 7631 return true; 7632 } 7633 7634 // Check if the length evaluates to 1. 7635 Expr::EvalResult Result; 7636 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7637 return true; // Can have more that size 1. 7638 7639 llvm::APSInt ConstLength = Result.Val.getInt(); 7640 return ConstLength.getSExtValue() != 1; 7641 } 7642 7643 /// Generate the base pointers, section pointers, sizes, map type bits, and 7644 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7645 /// map type, map or motion modifiers, and expression components. 7646 /// \a IsFirstComponent should be set to true if the provided set of 7647 /// components is the first associated with a capture. 7648 void generateInfoForComponentList( 7649 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7650 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7651 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7652 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7653 bool IsFirstComponentList, bool IsImplicit, 7654 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7655 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7656 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7657 OverlappedElements = llvm::None) const { 7658 // The following summarizes what has to be generated for each map and the 7659 // types below. The generated information is expressed in this order: 7660 // base pointer, section pointer, size, flags 7661 // (to add to the ones that come from the map type and modifier). 7662 // 7663 // double d; 7664 // int i[100]; 7665 // float *p; 7666 // 7667 // struct S1 { 7668 // int i; 7669 // float f[50]; 7670 // } 7671 // struct S2 { 7672 // int i; 7673 // float f[50]; 7674 // S1 s; 7675 // double *p; 7676 // struct S2 *ps; 7677 // int &ref; 7678 // } 7679 // S2 s; 7680 // S2 *ps; 7681 // 7682 // map(d) 7683 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7684 // 7685 // map(i) 7686 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7687 // 7688 // map(i[1:23]) 7689 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7690 // 7691 // map(p) 7692 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7693 // 7694 // map(p[1:24]) 7695 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7696 // in unified shared memory mode or for local pointers 7697 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7698 // 7699 // map(s) 7700 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7701 // 7702 // map(s.i) 7703 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7704 // 7705 // map(s.s.f) 7706 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7707 // 7708 // map(s.p) 7709 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7710 // 7711 // map(to: s.p[:22]) 7712 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7713 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7714 // &(s.p), &(s.p[0]), 22*sizeof(double), 7715 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7716 // (*) alloc space for struct members, only this is a target parameter 7717 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7718 // optimizes this entry out, same in the examples below) 7719 // (***) map the pointee (map: to) 7720 // 7721 // map(to: s.ref) 7722 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7723 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7724 // (*) alloc space for struct members, only this is a target parameter 7725 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7726 // optimizes this entry out, same in the examples below) 7727 // (***) map the pointee (map: to) 7728 // 7729 // map(s.ps) 7730 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7731 // 7732 // map(from: s.ps->s.i) 7733 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7734 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7735 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7736 // 7737 // map(to: s.ps->ps) 7738 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7739 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7740 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7741 // 7742 // map(s.ps->ps->ps) 7743 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7744 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7745 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7746 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7747 // 7748 // map(to: s.ps->ps->s.f[:22]) 7749 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7750 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7751 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7752 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7753 // 7754 // map(ps) 7755 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7756 // 7757 // map(ps->i) 7758 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7759 // 7760 // map(ps->s.f) 7761 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7762 // 7763 // map(from: ps->p) 7764 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7765 // 7766 // map(to: ps->p[:22]) 7767 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7768 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7769 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7770 // 7771 // map(ps->ps) 7772 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7773 // 7774 // map(from: ps->ps->s.i) 7775 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7776 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7777 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7778 // 7779 // map(from: ps->ps->ps) 7780 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7781 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7782 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7783 // 7784 // map(ps->ps->ps->ps) 7785 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7786 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7787 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7788 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7789 // 7790 // map(to: ps->ps->ps->s.f[:22]) 7791 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7792 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7793 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7794 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7795 // 7796 // map(to: s.f[:22]) map(from: s.p[:33]) 7797 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7798 // sizeof(double*) (**), TARGET_PARAM 7799 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7800 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7801 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7802 // (*) allocate contiguous space needed to fit all mapped members even if 7803 // we allocate space for members not mapped (in this example, 7804 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7805 // them as well because they fall between &s.f[0] and &s.p) 7806 // 7807 // map(from: s.f[:22]) map(to: ps->p[:33]) 7808 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7809 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7810 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7811 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7812 // (*) the struct this entry pertains to is the 2nd element in the list of 7813 // arguments, hence MEMBER_OF(2) 7814 // 7815 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7816 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7817 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7818 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7819 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7820 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7821 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7822 // (*) the struct this entry pertains to is the 4th element in the list 7823 // of arguments, hence MEMBER_OF(4) 7824 7825 // Track if the map information being generated is the first for a capture. 7826 bool IsCaptureFirstInfo = IsFirstComponentList; 7827 // When the variable is on a declare target link or in a to clause with 7828 // unified memory, a reference is needed to hold the host/device address 7829 // of the variable. 7830 bool RequiresReference = false; 7831 7832 // Scan the components from the base to the complete expression. 7833 auto CI = Components.rbegin(); 7834 auto CE = Components.rend(); 7835 auto I = CI; 7836 7837 // Track if the map information being generated is the first for a list of 7838 // components. 7839 bool IsExpressionFirstInfo = true; 7840 bool FirstPointerInComplexData = false; 7841 Address BP = Address::invalid(); 7842 const Expr *AssocExpr = I->getAssociatedExpression(); 7843 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7844 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7845 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7846 7847 if (isa<MemberExpr>(AssocExpr)) { 7848 // The base is the 'this' pointer. The content of the pointer is going 7849 // to be the base of the field being mapped. 7850 BP = CGF.LoadCXXThisAddress(); 7851 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7852 (OASE && 7853 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7854 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7855 } else if (OAShE && 7856 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7857 BP = Address( 7858 CGF.EmitScalarExpr(OAShE->getBase()), 7859 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7860 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7861 } else { 7862 // The base is the reference to the variable. 7863 // BP = &Var. 7864 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7865 if (const auto *VD = 7866 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7867 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7868 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7869 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7870 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7871 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7872 RequiresReference = true; 7873 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7874 } 7875 } 7876 } 7877 7878 // If the variable is a pointer and is being dereferenced (i.e. is not 7879 // the last component), the base has to be the pointer itself, not its 7880 // reference. References are ignored for mapping purposes. 7881 QualType Ty = 7882 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7883 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7884 // No need to generate individual map information for the pointer, it 7885 // can be associated with the combined storage if shared memory mode is 7886 // active or the base declaration is not global variable. 7887 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7888 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7889 !VD || VD->hasLocalStorage()) 7890 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7891 else 7892 FirstPointerInComplexData = true; 7893 ++I; 7894 } 7895 } 7896 7897 // Track whether a component of the list should be marked as MEMBER_OF some 7898 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7899 // in a component list should be marked as MEMBER_OF, all subsequent entries 7900 // do not belong to the base struct. E.g. 7901 // struct S2 s; 7902 // s.ps->ps->ps->f[:] 7903 // (1) (2) (3) (4) 7904 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7905 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7906 // is the pointee of ps(2) which is not member of struct s, so it should not 7907 // be marked as such (it is still PTR_AND_OBJ). 7908 // The variable is initialized to false so that PTR_AND_OBJ entries which 7909 // are not struct members are not considered (e.g. array of pointers to 7910 // data). 7911 bool ShouldBeMemberOf = false; 7912 7913 // Variable keeping track of whether or not we have encountered a component 7914 // in the component list which is a member expression. Useful when we have a 7915 // pointer or a final array section, in which case it is the previous 7916 // component in the list which tells us whether we have a member expression. 7917 // E.g. X.f[:] 7918 // While processing the final array section "[:]" it is "f" which tells us 7919 // whether we are dealing with a member of a declared struct. 7920 const MemberExpr *EncounteredME = nullptr; 7921 7922 // Track for the total number of dimension. Start from one for the dummy 7923 // dimension. 7924 uint64_t DimSize = 1; 7925 7926 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7927 bool IsPrevMemberReference = false; 7928 7929 for (; I != CE; ++I) { 7930 // If the current component is member of a struct (parent struct) mark it. 7931 if (!EncounteredME) { 7932 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7933 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7934 // as MEMBER_OF the parent struct. 7935 if (EncounteredME) { 7936 ShouldBeMemberOf = true; 7937 // Do not emit as complex pointer if this is actually not array-like 7938 // expression. 7939 if (FirstPointerInComplexData) { 7940 QualType Ty = std::prev(I) 7941 ->getAssociatedDeclaration() 7942 ->getType() 7943 .getNonReferenceType(); 7944 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7945 FirstPointerInComplexData = false; 7946 } 7947 } 7948 } 7949 7950 auto Next = std::next(I); 7951 7952 // We need to generate the addresses and sizes if this is the last 7953 // component, if the component is a pointer or if it is an array section 7954 // whose length can't be proved to be one. If this is a pointer, it 7955 // becomes the base address for the following components. 7956 7957 // A final array section, is one whose length can't be proved to be one. 7958 // If the map item is non-contiguous then we don't treat any array section 7959 // as final array section. 7960 bool IsFinalArraySection = 7961 !IsNonContiguous && 7962 isFinalArraySectionExpression(I->getAssociatedExpression()); 7963 7964 // If we have a declaration for the mapping use that, otherwise use 7965 // the base declaration of the map clause. 7966 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7967 ? I->getAssociatedDeclaration() 7968 : BaseDecl; 7969 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7970 : MapExpr; 7971 7972 // Get information on whether the element is a pointer. Have to do a 7973 // special treatment for array sections given that they are built-in 7974 // types. 7975 const auto *OASE = 7976 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7977 const auto *OAShE = 7978 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7979 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7980 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7981 bool IsPointer = 7982 OAShE || 7983 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7984 .getCanonicalType() 7985 ->isAnyPointerType()) || 7986 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7987 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7988 MapDecl && 7989 MapDecl->getType()->isLValueReferenceType(); 7990 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7991 7992 if (OASE) 7993 ++DimSize; 7994 7995 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7996 IsFinalArraySection) { 7997 // If this is not the last component, we expect the pointer to be 7998 // associated with an array expression or member expression. 7999 assert((Next == CE || 8000 isa<MemberExpr>(Next->getAssociatedExpression()) || 8001 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8002 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8003 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8004 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8005 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8006 "Unexpected expression"); 8007 8008 Address LB = Address::invalid(); 8009 Address LowestElem = Address::invalid(); 8010 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8011 const MemberExpr *E) { 8012 const Expr *BaseExpr = E->getBase(); 8013 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8014 // scalar. 8015 LValue BaseLV; 8016 if (E->isArrow()) { 8017 LValueBaseInfo BaseInfo; 8018 TBAAAccessInfo TBAAInfo; 8019 Address Addr = 8020 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8021 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8022 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8023 } else { 8024 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8025 } 8026 return BaseLV; 8027 }; 8028 if (OAShE) { 8029 LowestElem = LB = 8030 Address(CGF.EmitScalarExpr(OAShE->getBase()), 8031 CGF.ConvertTypeForMem( 8032 OAShE->getBase()->getType()->getPointeeType()), 8033 CGF.getContext().getTypeAlignInChars( 8034 OAShE->getBase()->getType())); 8035 } else if (IsMemberReference) { 8036 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8037 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8038 LowestElem = CGF.EmitLValueForFieldInitialization( 8039 BaseLVal, cast<FieldDecl>(MapDecl)) 8040 .getAddress(CGF); 8041 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8042 .getAddress(CGF); 8043 } else { 8044 LowestElem = LB = 8045 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8046 .getAddress(CGF); 8047 } 8048 8049 // If this component is a pointer inside the base struct then we don't 8050 // need to create any entry for it - it will be combined with the object 8051 // it is pointing to into a single PTR_AND_OBJ entry. 8052 bool IsMemberPointerOrAddr = 8053 EncounteredME && 8054 (((IsPointer || ForDeviceAddr) && 8055 I->getAssociatedExpression() == EncounteredME) || 8056 (IsPrevMemberReference && !IsPointer) || 8057 (IsMemberReference && Next != CE && 8058 !Next->getAssociatedExpression()->getType()->isPointerType())); 8059 if (!OverlappedElements.empty() && Next == CE) { 8060 // Handle base element with the info for overlapped elements. 8061 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8062 assert(!IsPointer && 8063 "Unexpected base element with the pointer type."); 8064 // Mark the whole struct as the struct that requires allocation on the 8065 // device. 8066 PartialStruct.LowestElem = {0, LowestElem}; 8067 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8068 I->getAssociatedExpression()->getType()); 8069 Address HB = CGF.Builder.CreateConstGEP( 8070 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8071 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 8072 TypeSize.getQuantity() - 1); 8073 PartialStruct.HighestElem = { 8074 std::numeric_limits<decltype( 8075 PartialStruct.HighestElem.first)>::max(), 8076 HB}; 8077 PartialStruct.Base = BP; 8078 PartialStruct.LB = LB; 8079 assert( 8080 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8081 "Overlapped elements must be used only once for the variable."); 8082 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8083 // Emit data for non-overlapped data. 8084 OpenMPOffloadMappingFlags Flags = 8085 OMP_MAP_MEMBER_OF | 8086 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8087 /*AddPtrFlag=*/false, 8088 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8089 llvm::Value *Size = nullptr; 8090 // Do bitcopy of all non-overlapped structure elements. 8091 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8092 Component : OverlappedElements) { 8093 Address ComponentLB = Address::invalid(); 8094 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8095 Component) { 8096 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8097 const auto *FD = dyn_cast<FieldDecl>(VD); 8098 if (FD && FD->getType()->isLValueReferenceType()) { 8099 const auto *ME = 8100 cast<MemberExpr>(MC.getAssociatedExpression()); 8101 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8102 ComponentLB = 8103 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8104 .getAddress(CGF); 8105 } else { 8106 ComponentLB = 8107 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8108 .getAddress(CGF); 8109 } 8110 Size = CGF.Builder.CreatePtrDiff( 8111 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8112 CGF.EmitCastToVoidPtr(LB.getPointer())); 8113 break; 8114 } 8115 } 8116 assert(Size && "Failed to determine structure size"); 8117 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8118 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8119 CombinedInfo.Pointers.push_back(LB.getPointer()); 8120 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8121 Size, CGF.Int64Ty, /*isSigned=*/true)); 8122 CombinedInfo.Types.push_back(Flags); 8123 CombinedInfo.Mappers.push_back(nullptr); 8124 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8125 : 1); 8126 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8127 } 8128 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8129 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8130 CombinedInfo.Pointers.push_back(LB.getPointer()); 8131 Size = CGF.Builder.CreatePtrDiff( 8132 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8133 CGF.EmitCastToVoidPtr(LB.getPointer())); 8134 CombinedInfo.Sizes.push_back( 8135 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8136 CombinedInfo.Types.push_back(Flags); 8137 CombinedInfo.Mappers.push_back(nullptr); 8138 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8139 : 1); 8140 break; 8141 } 8142 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8143 if (!IsMemberPointerOrAddr || 8144 (Next == CE && MapType != OMPC_MAP_unknown)) { 8145 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8146 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8147 CombinedInfo.Pointers.push_back(LB.getPointer()); 8148 CombinedInfo.Sizes.push_back( 8149 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8150 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8151 : 1); 8152 8153 // If Mapper is valid, the last component inherits the mapper. 8154 bool HasMapper = Mapper && Next == CE; 8155 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8156 8157 // We need to add a pointer flag for each map that comes from the 8158 // same expression except for the first one. We also need to signal 8159 // this map is the first one that relates with the current capture 8160 // (there is a set of entries for each capture). 8161 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8162 MapType, MapModifiers, MotionModifiers, IsImplicit, 8163 !IsExpressionFirstInfo || RequiresReference || 8164 FirstPointerInComplexData || IsMemberReference, 8165 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8166 8167 if (!IsExpressionFirstInfo || IsMemberReference) { 8168 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8169 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8170 if (IsPointer || (IsMemberReference && Next != CE)) 8171 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8172 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8173 8174 if (ShouldBeMemberOf) { 8175 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8176 // should be later updated with the correct value of MEMBER_OF. 8177 Flags |= OMP_MAP_MEMBER_OF; 8178 // From now on, all subsequent PTR_AND_OBJ entries should not be 8179 // marked as MEMBER_OF. 8180 ShouldBeMemberOf = false; 8181 } 8182 } 8183 8184 CombinedInfo.Types.push_back(Flags); 8185 } 8186 8187 // If we have encountered a member expression so far, keep track of the 8188 // mapped member. If the parent is "*this", then the value declaration 8189 // is nullptr. 8190 if (EncounteredME) { 8191 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8192 unsigned FieldIndex = FD->getFieldIndex(); 8193 8194 // Update info about the lowest and highest elements for this struct 8195 if (!PartialStruct.Base.isValid()) { 8196 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8197 if (IsFinalArraySection) { 8198 Address HB = 8199 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8200 .getAddress(CGF); 8201 PartialStruct.HighestElem = {FieldIndex, HB}; 8202 } else { 8203 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8204 } 8205 PartialStruct.Base = BP; 8206 PartialStruct.LB = BP; 8207 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8208 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8209 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8210 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8211 } 8212 } 8213 8214 // Need to emit combined struct for array sections. 8215 if (IsFinalArraySection || IsNonContiguous) 8216 PartialStruct.IsArraySection = true; 8217 8218 // If we have a final array section, we are done with this expression. 8219 if (IsFinalArraySection) 8220 break; 8221 8222 // The pointer becomes the base for the next element. 8223 if (Next != CE) 8224 BP = IsMemberReference ? LowestElem : LB; 8225 8226 IsExpressionFirstInfo = false; 8227 IsCaptureFirstInfo = false; 8228 FirstPointerInComplexData = false; 8229 IsPrevMemberReference = IsMemberReference; 8230 } else if (FirstPointerInComplexData) { 8231 QualType Ty = Components.rbegin() 8232 ->getAssociatedDeclaration() 8233 ->getType() 8234 .getNonReferenceType(); 8235 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8236 FirstPointerInComplexData = false; 8237 } 8238 } 8239 // If ran into the whole component - allocate the space for the whole 8240 // record. 8241 if (!EncounteredME) 8242 PartialStruct.HasCompleteRecord = true; 8243 8244 if (!IsNonContiguous) 8245 return; 8246 8247 const ASTContext &Context = CGF.getContext(); 8248 8249 // For supporting stride in array section, we need to initialize the first 8250 // dimension size as 1, first offset as 0, and first count as 1 8251 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8252 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8253 MapValuesArrayTy CurStrides; 8254 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8255 uint64_t ElementTypeSize; 8256 8257 // Collect Size information for each dimension and get the element size as 8258 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8259 // should be [10, 10] and the first stride is 4 btyes. 8260 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8261 Components) { 8262 const Expr *AssocExpr = Component.getAssociatedExpression(); 8263 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8264 8265 if (!OASE) 8266 continue; 8267 8268 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8269 auto *CAT = Context.getAsConstantArrayType(Ty); 8270 auto *VAT = Context.getAsVariableArrayType(Ty); 8271 8272 // We need all the dimension size except for the last dimension. 8273 assert((VAT || CAT || &Component == &*Components.begin()) && 8274 "Should be either ConstantArray or VariableArray if not the " 8275 "first Component"); 8276 8277 // Get element size if CurStrides is empty. 8278 if (CurStrides.empty()) { 8279 const Type *ElementType = nullptr; 8280 if (CAT) 8281 ElementType = CAT->getElementType().getTypePtr(); 8282 else if (VAT) 8283 ElementType = VAT->getElementType().getTypePtr(); 8284 else 8285 assert(&Component == &*Components.begin() && 8286 "Only expect pointer (non CAT or VAT) when this is the " 8287 "first Component"); 8288 // If ElementType is null, then it means the base is a pointer 8289 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8290 // for next iteration. 8291 if (ElementType) { 8292 // For the case that having pointer as base, we need to remove one 8293 // level of indirection. 8294 if (&Component != &*Components.begin()) 8295 ElementType = ElementType->getPointeeOrArrayElementType(); 8296 ElementTypeSize = 8297 Context.getTypeSizeInChars(ElementType).getQuantity(); 8298 CurStrides.push_back( 8299 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8300 } 8301 } 8302 // Get dimension value except for the last dimension since we don't need 8303 // it. 8304 if (DimSizes.size() < Components.size() - 1) { 8305 if (CAT) 8306 DimSizes.push_back(llvm::ConstantInt::get( 8307 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8308 else if (VAT) 8309 DimSizes.push_back(CGF.Builder.CreateIntCast( 8310 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8311 /*IsSigned=*/false)); 8312 } 8313 } 8314 8315 // Skip the dummy dimension since we have already have its information. 8316 auto *DI = DimSizes.begin() + 1; 8317 // Product of dimension. 8318 llvm::Value *DimProd = 8319 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8320 8321 // Collect info for non-contiguous. Notice that offset, count, and stride 8322 // are only meaningful for array-section, so we insert a null for anything 8323 // other than array-section. 8324 // Also, the size of offset, count, and stride are not the same as 8325 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8326 // count, and stride are the same as the number of non-contiguous 8327 // declaration in target update to/from clause. 8328 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8329 Components) { 8330 const Expr *AssocExpr = Component.getAssociatedExpression(); 8331 8332 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8333 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8334 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8335 /*isSigned=*/false); 8336 CurOffsets.push_back(Offset); 8337 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8338 CurStrides.push_back(CurStrides.back()); 8339 continue; 8340 } 8341 8342 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8343 8344 if (!OASE) 8345 continue; 8346 8347 // Offset 8348 const Expr *OffsetExpr = OASE->getLowerBound(); 8349 llvm::Value *Offset = nullptr; 8350 if (!OffsetExpr) { 8351 // If offset is absent, then we just set it to zero. 8352 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8353 } else { 8354 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8355 CGF.Int64Ty, 8356 /*isSigned=*/false); 8357 } 8358 CurOffsets.push_back(Offset); 8359 8360 // Count 8361 const Expr *CountExpr = OASE->getLength(); 8362 llvm::Value *Count = nullptr; 8363 if (!CountExpr) { 8364 // In Clang, once a high dimension is an array section, we construct all 8365 // the lower dimension as array section, however, for case like 8366 // arr[0:2][2], Clang construct the inner dimension as an array section 8367 // but it actually is not in an array section form according to spec. 8368 if (!OASE->getColonLocFirst().isValid() && 8369 !OASE->getColonLocSecond().isValid()) { 8370 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8371 } else { 8372 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8373 // When the length is absent it defaults to ⌈(size − 8374 // lower-bound)/stride⌉, where size is the size of the array 8375 // dimension. 8376 const Expr *StrideExpr = OASE->getStride(); 8377 llvm::Value *Stride = 8378 StrideExpr 8379 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8380 CGF.Int64Ty, /*isSigned=*/false) 8381 : nullptr; 8382 if (Stride) 8383 Count = CGF.Builder.CreateUDiv( 8384 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8385 else 8386 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8387 } 8388 } else { 8389 Count = CGF.EmitScalarExpr(CountExpr); 8390 } 8391 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8392 CurCounts.push_back(Count); 8393 8394 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8395 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8396 // Offset Count Stride 8397 // D0 0 1 4 (int) <- dummy dimension 8398 // D1 0 2 8 (2 * (1) * 4) 8399 // D2 1 2 20 (1 * (1 * 5) * 4) 8400 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8401 const Expr *StrideExpr = OASE->getStride(); 8402 llvm::Value *Stride = 8403 StrideExpr 8404 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8405 CGF.Int64Ty, /*isSigned=*/false) 8406 : nullptr; 8407 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8408 if (Stride) 8409 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8410 else 8411 CurStrides.push_back(DimProd); 8412 if (DI != DimSizes.end()) 8413 ++DI; 8414 } 8415 8416 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8417 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8418 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8419 } 8420 8421 /// Return the adjusted map modifiers if the declaration a capture refers to 8422 /// appears in a first-private clause. This is expected to be used only with 8423 /// directives that start with 'target'. 8424 MappableExprsHandler::OpenMPOffloadMappingFlags 8425 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8426 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8427 8428 // A first private variable captured by reference will use only the 8429 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8430 // declaration is known as first-private in this handler. 8431 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8432 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8433 return MappableExprsHandler::OMP_MAP_TO | 8434 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8435 return MappableExprsHandler::OMP_MAP_PRIVATE | 8436 MappableExprsHandler::OMP_MAP_TO; 8437 } 8438 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8439 if (I != LambdasMap.end()) 8440 // for map(to: lambda): using user specified map type. 8441 return getMapTypeBits( 8442 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8443 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8444 /*AddPtrFlag=*/false, 8445 /*AddIsTargetParamFlag=*/false, 8446 /*isNonContiguous=*/false); 8447 return MappableExprsHandler::OMP_MAP_TO | 8448 MappableExprsHandler::OMP_MAP_FROM; 8449 } 8450 8451 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8452 // Rotate by getFlagMemberOffset() bits. 8453 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8454 << getFlagMemberOffset()); 8455 } 8456 8457 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8458 OpenMPOffloadMappingFlags MemberOfFlag) { 8459 // If the entry is PTR_AND_OBJ but has not been marked with the special 8460 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8461 // marked as MEMBER_OF. 8462 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8463 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8464 return; 8465 8466 // Reset the placeholder value to prepare the flag for the assignment of the 8467 // proper MEMBER_OF value. 8468 Flags &= ~OMP_MAP_MEMBER_OF; 8469 Flags |= MemberOfFlag; 8470 } 8471 8472 void getPlainLayout(const CXXRecordDecl *RD, 8473 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8474 bool AsBase) const { 8475 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8476 8477 llvm::StructType *St = 8478 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8479 8480 unsigned NumElements = St->getNumElements(); 8481 llvm::SmallVector< 8482 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8483 RecordLayout(NumElements); 8484 8485 // Fill bases. 8486 for (const auto &I : RD->bases()) { 8487 if (I.isVirtual()) 8488 continue; 8489 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8490 // Ignore empty bases. 8491 if (Base->isEmpty() || CGF.getContext() 8492 .getASTRecordLayout(Base) 8493 .getNonVirtualSize() 8494 .isZero()) 8495 continue; 8496 8497 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8498 RecordLayout[FieldIndex] = Base; 8499 } 8500 // Fill in virtual bases. 8501 for (const auto &I : RD->vbases()) { 8502 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8503 // Ignore empty bases. 8504 if (Base->isEmpty()) 8505 continue; 8506 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8507 if (RecordLayout[FieldIndex]) 8508 continue; 8509 RecordLayout[FieldIndex] = Base; 8510 } 8511 // Fill in all the fields. 8512 assert(!RD->isUnion() && "Unexpected union."); 8513 for (const auto *Field : RD->fields()) { 8514 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8515 // will fill in later.) 8516 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8517 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8518 RecordLayout[FieldIndex] = Field; 8519 } 8520 } 8521 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8522 &Data : RecordLayout) { 8523 if (Data.isNull()) 8524 continue; 8525 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8526 getPlainLayout(Base, Layout, /*AsBase=*/true); 8527 else 8528 Layout.push_back(Data.get<const FieldDecl *>()); 8529 } 8530 } 8531 8532 /// Generate all the base pointers, section pointers, sizes, map types, and 8533 /// mappers for the extracted mappable expressions (all included in \a 8534 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8535 /// pair of the relevant declaration and index where it occurs is appended to 8536 /// the device pointers info array. 8537 void generateAllInfoForClauses( 8538 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8539 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8540 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8541 // We have to process the component lists that relate with the same 8542 // declaration in a single chunk so that we can generate the map flags 8543 // correctly. Therefore, we organize all lists in a map. 8544 enum MapKind { Present, Allocs, Other, Total }; 8545 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8546 SmallVector<SmallVector<MapInfo, 8>, 4>> 8547 Info; 8548 8549 // Helper function to fill the information map for the different supported 8550 // clauses. 8551 auto &&InfoGen = 8552 [&Info, &SkipVarSet]( 8553 const ValueDecl *D, MapKind Kind, 8554 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8555 OpenMPMapClauseKind MapType, 8556 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8557 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8558 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8559 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8560 if (SkipVarSet.contains(D)) 8561 return; 8562 auto It = Info.find(D); 8563 if (It == Info.end()) 8564 It = Info 8565 .insert(std::make_pair( 8566 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8567 .first; 8568 It->second[Kind].emplace_back( 8569 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8570 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8571 }; 8572 8573 for (const auto *Cl : Clauses) { 8574 const auto *C = dyn_cast<OMPMapClause>(Cl); 8575 if (!C) 8576 continue; 8577 MapKind Kind = Other; 8578 if (llvm::is_contained(C->getMapTypeModifiers(), 8579 OMPC_MAP_MODIFIER_present)) 8580 Kind = Present; 8581 else if (C->getMapType() == OMPC_MAP_alloc) 8582 Kind = Allocs; 8583 const auto *EI = C->getVarRefs().begin(); 8584 for (const auto L : C->component_lists()) { 8585 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8586 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8587 C->getMapTypeModifiers(), llvm::None, 8588 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8589 E); 8590 ++EI; 8591 } 8592 } 8593 for (const auto *Cl : Clauses) { 8594 const auto *C = dyn_cast<OMPToClause>(Cl); 8595 if (!C) 8596 continue; 8597 MapKind Kind = Other; 8598 if (llvm::is_contained(C->getMotionModifiers(), 8599 OMPC_MOTION_MODIFIER_present)) 8600 Kind = Present; 8601 const auto *EI = C->getVarRefs().begin(); 8602 for (const auto L : C->component_lists()) { 8603 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8604 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8605 C->isImplicit(), std::get<2>(L), *EI); 8606 ++EI; 8607 } 8608 } 8609 for (const auto *Cl : Clauses) { 8610 const auto *C = dyn_cast<OMPFromClause>(Cl); 8611 if (!C) 8612 continue; 8613 MapKind Kind = Other; 8614 if (llvm::is_contained(C->getMotionModifiers(), 8615 OMPC_MOTION_MODIFIER_present)) 8616 Kind = Present; 8617 const auto *EI = C->getVarRefs().begin(); 8618 for (const auto L : C->component_lists()) { 8619 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8620 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8621 C->isImplicit(), std::get<2>(L), *EI); 8622 ++EI; 8623 } 8624 } 8625 8626 // Look at the use_device_ptr clause information and mark the existing map 8627 // entries as such. If there is no map information for an entry in the 8628 // use_device_ptr list, we create one with map type 'alloc' and zero size 8629 // section. It is the user fault if that was not mapped before. If there is 8630 // no map information and the pointer is a struct member, then we defer the 8631 // emission of that entry until the whole struct has been processed. 8632 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8633 SmallVector<DeferredDevicePtrEntryTy, 4>> 8634 DeferredInfo; 8635 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8636 8637 for (const auto *Cl : Clauses) { 8638 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8639 if (!C) 8640 continue; 8641 for (const auto L : C->component_lists()) { 8642 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8643 std::get<1>(L); 8644 assert(!Components.empty() && 8645 "Not expecting empty list of components!"); 8646 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8647 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8648 const Expr *IE = Components.back().getAssociatedExpression(); 8649 // If the first component is a member expression, we have to look into 8650 // 'this', which maps to null in the map of map information. Otherwise 8651 // look directly for the information. 8652 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8653 8654 // We potentially have map information for this declaration already. 8655 // Look for the first set of components that refer to it. 8656 if (It != Info.end()) { 8657 bool Found = false; 8658 for (auto &Data : It->second) { 8659 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8660 return MI.Components.back().getAssociatedDeclaration() == VD; 8661 }); 8662 // If we found a map entry, signal that the pointer has to be 8663 // returned and move on to the next declaration. Exclude cases where 8664 // the base pointer is mapped as array subscript, array section or 8665 // array shaping. The base address is passed as a pointer to base in 8666 // this case and cannot be used as a base for use_device_ptr list 8667 // item. 8668 if (CI != Data.end()) { 8669 auto PrevCI = std::next(CI->Components.rbegin()); 8670 const auto *VarD = dyn_cast<VarDecl>(VD); 8671 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8672 isa<MemberExpr>(IE) || 8673 !VD->getType().getNonReferenceType()->isPointerType() || 8674 PrevCI == CI->Components.rend() || 8675 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8676 VarD->hasLocalStorage()) { 8677 CI->ReturnDevicePointer = true; 8678 Found = true; 8679 break; 8680 } 8681 } 8682 } 8683 if (Found) 8684 continue; 8685 } 8686 8687 // We didn't find any match in our map information - generate a zero 8688 // size array section - if the pointer is a struct member we defer this 8689 // action until the whole struct has been processed. 8690 if (isa<MemberExpr>(IE)) { 8691 // Insert the pointer into Info to be processed by 8692 // generateInfoForComponentList. Because it is a member pointer 8693 // without a pointee, no entry will be generated for it, therefore 8694 // we need to generate one after the whole struct has been processed. 8695 // Nonetheless, generateInfoForComponentList must be called to take 8696 // the pointer into account for the calculation of the range of the 8697 // partial struct. 8698 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8699 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8700 nullptr); 8701 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8702 } else { 8703 llvm::Value *Ptr = 8704 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8705 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8706 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8707 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8708 UseDevicePtrCombinedInfo.Sizes.push_back( 8709 llvm::Constant::getNullValue(CGF.Int64Ty)); 8710 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8711 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8712 } 8713 } 8714 } 8715 8716 // Look at the use_device_addr clause information and mark the existing map 8717 // entries as such. If there is no map information for an entry in the 8718 // use_device_addr list, we create one with map type 'alloc' and zero size 8719 // section. It is the user fault if that was not mapped before. If there is 8720 // no map information and the pointer is a struct member, then we defer the 8721 // emission of that entry until the whole struct has been processed. 8722 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8723 for (const auto *Cl : Clauses) { 8724 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8725 if (!C) 8726 continue; 8727 for (const auto L : C->component_lists()) { 8728 assert(!std::get<1>(L).empty() && 8729 "Not expecting empty list of components!"); 8730 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8731 if (!Processed.insert(VD).second) 8732 continue; 8733 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8734 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8735 // If the first component is a member expression, we have to look into 8736 // 'this', which maps to null in the map of map information. Otherwise 8737 // look directly for the information. 8738 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8739 8740 // We potentially have map information for this declaration already. 8741 // Look for the first set of components that refer to it. 8742 if (It != Info.end()) { 8743 bool Found = false; 8744 for (auto &Data : It->second) { 8745 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8746 return MI.Components.back().getAssociatedDeclaration() == VD; 8747 }); 8748 // If we found a map entry, signal that the pointer has to be 8749 // returned and move on to the next declaration. 8750 if (CI != Data.end()) { 8751 CI->ReturnDevicePointer = true; 8752 Found = true; 8753 break; 8754 } 8755 } 8756 if (Found) 8757 continue; 8758 } 8759 8760 // We didn't find any match in our map information - generate a zero 8761 // size array section - if the pointer is a struct member we defer this 8762 // action until the whole struct has been processed. 8763 if (isa<MemberExpr>(IE)) { 8764 // Insert the pointer into Info to be processed by 8765 // generateInfoForComponentList. Because it is a member pointer 8766 // without a pointee, no entry will be generated for it, therefore 8767 // we need to generate one after the whole struct has been processed. 8768 // Nonetheless, generateInfoForComponentList must be called to take 8769 // the pointer into account for the calculation of the range of the 8770 // partial struct. 8771 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8772 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8773 nullptr, nullptr, /*ForDeviceAddr=*/true); 8774 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8775 } else { 8776 llvm::Value *Ptr; 8777 if (IE->isGLValue()) 8778 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8779 else 8780 Ptr = CGF.EmitScalarExpr(IE); 8781 CombinedInfo.Exprs.push_back(VD); 8782 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8783 CombinedInfo.Pointers.push_back(Ptr); 8784 CombinedInfo.Sizes.push_back( 8785 llvm::Constant::getNullValue(CGF.Int64Ty)); 8786 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8787 CombinedInfo.Mappers.push_back(nullptr); 8788 } 8789 } 8790 } 8791 8792 for (const auto &Data : Info) { 8793 StructRangeInfoTy PartialStruct; 8794 // Temporary generated information. 8795 MapCombinedInfoTy CurInfo; 8796 const Decl *D = Data.first; 8797 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8798 for (const auto &M : Data.second) { 8799 for (const MapInfo &L : M) { 8800 assert(!L.Components.empty() && 8801 "Not expecting declaration with no component lists."); 8802 8803 // Remember the current base pointer index. 8804 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8805 CurInfo.NonContigInfo.IsNonContiguous = 8806 L.Components.back().isNonContiguous(); 8807 generateInfoForComponentList( 8808 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8809 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8810 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8811 8812 // If this entry relates with a device pointer, set the relevant 8813 // declaration and add the 'return pointer' flag. 8814 if (L.ReturnDevicePointer) { 8815 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8816 "Unexpected number of mapped base pointers."); 8817 8818 const ValueDecl *RelevantVD = 8819 L.Components.back().getAssociatedDeclaration(); 8820 assert(RelevantVD && 8821 "No relevant declaration related with device pointer??"); 8822 8823 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8824 RelevantVD); 8825 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8826 } 8827 } 8828 } 8829 8830 // Append any pending zero-length pointers which are struct members and 8831 // used with use_device_ptr or use_device_addr. 8832 auto CI = DeferredInfo.find(Data.first); 8833 if (CI != DeferredInfo.end()) { 8834 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8835 llvm::Value *BasePtr; 8836 llvm::Value *Ptr; 8837 if (L.ForDeviceAddr) { 8838 if (L.IE->isGLValue()) 8839 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8840 else 8841 Ptr = this->CGF.EmitScalarExpr(L.IE); 8842 BasePtr = Ptr; 8843 // Entry is RETURN_PARAM. Also, set the placeholder value 8844 // MEMBER_OF=FFFF so that the entry is later updated with the 8845 // correct value of MEMBER_OF. 8846 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8847 } else { 8848 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8849 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8850 L.IE->getExprLoc()); 8851 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8852 // placeholder value MEMBER_OF=FFFF so that the entry is later 8853 // updated with the correct value of MEMBER_OF. 8854 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8855 OMP_MAP_MEMBER_OF); 8856 } 8857 CurInfo.Exprs.push_back(L.VD); 8858 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8859 CurInfo.Pointers.push_back(Ptr); 8860 CurInfo.Sizes.push_back( 8861 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8862 CurInfo.Mappers.push_back(nullptr); 8863 } 8864 } 8865 // If there is an entry in PartialStruct it means we have a struct with 8866 // individual members mapped. Emit an extra combined entry. 8867 if (PartialStruct.Base.isValid()) { 8868 CurInfo.NonContigInfo.Dims.push_back(0); 8869 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8870 } 8871 8872 // We need to append the results of this capture to what we already 8873 // have. 8874 CombinedInfo.append(CurInfo); 8875 } 8876 // Append data for use_device_ptr clauses. 8877 CombinedInfo.append(UseDevicePtrCombinedInfo); 8878 } 8879 8880 public: 8881 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8882 : CurDir(&Dir), CGF(CGF) { 8883 // Extract firstprivate clause information. 8884 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8885 for (const auto *D : C->varlists()) 8886 FirstPrivateDecls.try_emplace( 8887 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8888 // Extract implicit firstprivates from uses_allocators clauses. 8889 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8890 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8891 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8892 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8893 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8894 /*Implicit=*/true); 8895 else if (const auto *VD = dyn_cast<VarDecl>( 8896 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8897 ->getDecl())) 8898 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8899 } 8900 } 8901 // Extract device pointer clause information. 8902 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8903 for (auto L : C->component_lists()) 8904 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8905 // Extract map information. 8906 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8907 if (C->getMapType() != OMPC_MAP_to) 8908 continue; 8909 for (auto L : C->component_lists()) { 8910 const ValueDecl *VD = std::get<0>(L); 8911 const auto *RD = VD ? VD->getType() 8912 .getCanonicalType() 8913 .getNonReferenceType() 8914 ->getAsCXXRecordDecl() 8915 : nullptr; 8916 if (RD && RD->isLambda()) 8917 LambdasMap.try_emplace(std::get<0>(L), C); 8918 } 8919 } 8920 } 8921 8922 /// Constructor for the declare mapper directive. 8923 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8924 : CurDir(&Dir), CGF(CGF) {} 8925 8926 /// Generate code for the combined entry if we have a partially mapped struct 8927 /// and take care of the mapping flags of the arguments corresponding to 8928 /// individual struct members. 8929 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8930 MapFlagsArrayTy &CurTypes, 8931 const StructRangeInfoTy &PartialStruct, 8932 const ValueDecl *VD = nullptr, 8933 bool NotTargetParams = true) const { 8934 if (CurTypes.size() == 1 && 8935 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8936 !PartialStruct.IsArraySection) 8937 return; 8938 Address LBAddr = PartialStruct.LowestElem.second; 8939 Address HBAddr = PartialStruct.HighestElem.second; 8940 if (PartialStruct.HasCompleteRecord) { 8941 LBAddr = PartialStruct.LB; 8942 HBAddr = PartialStruct.LB; 8943 } 8944 CombinedInfo.Exprs.push_back(VD); 8945 // Base is the base of the struct 8946 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8947 // Pointer is the address of the lowest element 8948 llvm::Value *LB = LBAddr.getPointer(); 8949 CombinedInfo.Pointers.push_back(LB); 8950 // There should not be a mapper for a combined entry. 8951 CombinedInfo.Mappers.push_back(nullptr); 8952 // Size is (addr of {highest+1} element) - (addr of lowest element) 8953 llvm::Value *HB = HBAddr.getPointer(); 8954 llvm::Value *HAddr = 8955 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8956 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8957 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8958 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8959 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8960 /*isSigned=*/false); 8961 CombinedInfo.Sizes.push_back(Size); 8962 // Map type is always TARGET_PARAM, if generate info for captures. 8963 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8964 : OMP_MAP_TARGET_PARAM); 8965 // If any element has the present modifier, then make sure the runtime 8966 // doesn't attempt to allocate the struct. 8967 if (CurTypes.end() != 8968 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8969 return Type & OMP_MAP_PRESENT; 8970 })) 8971 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8972 // Remove TARGET_PARAM flag from the first element 8973 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8974 // If any element has the ompx_hold modifier, then make sure the runtime 8975 // uses the hold reference count for the struct as a whole so that it won't 8976 // be unmapped by an extra dynamic reference count decrement. Add it to all 8977 // elements as well so the runtime knows which reference count to check 8978 // when determining whether it's time for device-to-host transfers of 8979 // individual elements. 8980 if (CurTypes.end() != 8981 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8982 return Type & OMP_MAP_OMPX_HOLD; 8983 })) { 8984 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8985 for (auto &M : CurTypes) 8986 M |= OMP_MAP_OMPX_HOLD; 8987 } 8988 8989 // All other current entries will be MEMBER_OF the combined entry 8990 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8991 // 0xFFFF in the MEMBER_OF field). 8992 OpenMPOffloadMappingFlags MemberOfFlag = 8993 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8994 for (auto &M : CurTypes) 8995 setCorrectMemberOfFlag(M, MemberOfFlag); 8996 } 8997 8998 /// Generate all the base pointers, section pointers, sizes, map types, and 8999 /// mappers for the extracted mappable expressions (all included in \a 9000 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9001 /// pair of the relevant declaration and index where it occurs is appended to 9002 /// the device pointers info array. 9003 void generateAllInfo( 9004 MapCombinedInfoTy &CombinedInfo, 9005 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9006 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9007 assert(CurDir.is<const OMPExecutableDirective *>() && 9008 "Expect a executable directive"); 9009 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9010 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9011 } 9012 9013 /// Generate all the base pointers, section pointers, sizes, map types, and 9014 /// mappers for the extracted map clauses of user-defined mapper (all included 9015 /// in \a CombinedInfo). 9016 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9017 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9018 "Expect a declare mapper directive"); 9019 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9020 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9021 } 9022 9023 /// Emit capture info for lambdas for variables captured by reference. 9024 void generateInfoForLambdaCaptures( 9025 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9026 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9027 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 9028 const auto *RD = VDType->getAsCXXRecordDecl(); 9029 if (!RD || !RD->isLambda()) 9030 return; 9031 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 9032 CGF.getContext().getDeclAlign(VD)); 9033 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 9034 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9035 FieldDecl *ThisCapture = nullptr; 9036 RD->getCaptureFields(Captures, ThisCapture); 9037 if (ThisCapture) { 9038 LValue ThisLVal = 9039 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9040 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9041 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9042 VDLVal.getPointer(CGF)); 9043 CombinedInfo.Exprs.push_back(VD); 9044 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9045 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9046 CombinedInfo.Sizes.push_back( 9047 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9048 CGF.Int64Ty, /*isSigned=*/true)); 9049 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9050 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9051 CombinedInfo.Mappers.push_back(nullptr); 9052 } 9053 for (const LambdaCapture &LC : RD->captures()) { 9054 if (!LC.capturesVariable()) 9055 continue; 9056 const VarDecl *VD = LC.getCapturedVar(); 9057 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9058 continue; 9059 auto It = Captures.find(VD); 9060 assert(It != Captures.end() && "Found lambda capture without field."); 9061 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9062 if (LC.getCaptureKind() == LCK_ByRef) { 9063 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9064 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9065 VDLVal.getPointer(CGF)); 9066 CombinedInfo.Exprs.push_back(VD); 9067 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9068 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9069 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9070 CGF.getTypeSize( 9071 VD->getType().getCanonicalType().getNonReferenceType()), 9072 CGF.Int64Ty, /*isSigned=*/true)); 9073 } else { 9074 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9075 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9076 VDLVal.getPointer(CGF)); 9077 CombinedInfo.Exprs.push_back(VD); 9078 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9079 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9080 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9081 } 9082 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9083 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9084 CombinedInfo.Mappers.push_back(nullptr); 9085 } 9086 } 9087 9088 /// Set correct indices for lambdas captures. 9089 void adjustMemberOfForLambdaCaptures( 9090 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9091 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9092 MapFlagsArrayTy &Types) const { 9093 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9094 // Set correct member_of idx for all implicit lambda captures. 9095 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9096 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9097 continue; 9098 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9099 assert(BasePtr && "Unable to find base lambda address."); 9100 int TgtIdx = -1; 9101 for (unsigned J = I; J > 0; --J) { 9102 unsigned Idx = J - 1; 9103 if (Pointers[Idx] != BasePtr) 9104 continue; 9105 TgtIdx = Idx; 9106 break; 9107 } 9108 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9109 // All other current entries will be MEMBER_OF the combined entry 9110 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9111 // 0xFFFF in the MEMBER_OF field). 9112 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9113 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9114 } 9115 } 9116 9117 /// Generate the base pointers, section pointers, sizes, map types, and 9118 /// mappers associated to a given capture (all included in \a CombinedInfo). 9119 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9120 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9121 StructRangeInfoTy &PartialStruct) const { 9122 assert(!Cap->capturesVariableArrayType() && 9123 "Not expecting to generate map info for a variable array type!"); 9124 9125 // We need to know when we generating information for the first component 9126 const ValueDecl *VD = Cap->capturesThis() 9127 ? nullptr 9128 : Cap->getCapturedVar()->getCanonicalDecl(); 9129 9130 // for map(to: lambda): skip here, processing it in 9131 // generateDefaultMapInfo 9132 if (LambdasMap.count(VD)) 9133 return; 9134 9135 // If this declaration appears in a is_device_ptr clause we just have to 9136 // pass the pointer by value. If it is a reference to a declaration, we just 9137 // pass its value. 9138 if (DevPointersMap.count(VD)) { 9139 CombinedInfo.Exprs.push_back(VD); 9140 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9141 CombinedInfo.Pointers.push_back(Arg); 9142 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9143 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9144 /*isSigned=*/true)); 9145 CombinedInfo.Types.push_back( 9146 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9147 OMP_MAP_TARGET_PARAM); 9148 CombinedInfo.Mappers.push_back(nullptr); 9149 return; 9150 } 9151 9152 using MapData = 9153 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9154 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9155 const ValueDecl *, const Expr *>; 9156 SmallVector<MapData, 4> DeclComponentLists; 9157 assert(CurDir.is<const OMPExecutableDirective *>() && 9158 "Expect a executable directive"); 9159 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9160 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9161 const auto *EI = C->getVarRefs().begin(); 9162 for (const auto L : C->decl_component_lists(VD)) { 9163 const ValueDecl *VDecl, *Mapper; 9164 // The Expression is not correct if the mapping is implicit 9165 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9166 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9167 std::tie(VDecl, Components, Mapper) = L; 9168 assert(VDecl == VD && "We got information for the wrong declaration??"); 9169 assert(!Components.empty() && 9170 "Not expecting declaration with no component lists."); 9171 DeclComponentLists.emplace_back(Components, C->getMapType(), 9172 C->getMapTypeModifiers(), 9173 C->isImplicit(), Mapper, E); 9174 ++EI; 9175 } 9176 } 9177 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9178 const MapData &RHS) { 9179 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9180 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9181 bool HasPresent = 9182 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9183 bool HasAllocs = MapType == OMPC_MAP_alloc; 9184 MapModifiers = std::get<2>(RHS); 9185 MapType = std::get<1>(LHS); 9186 bool HasPresentR = 9187 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9188 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9189 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9190 }); 9191 9192 // Find overlapping elements (including the offset from the base element). 9193 llvm::SmallDenseMap< 9194 const MapData *, 9195 llvm::SmallVector< 9196 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9197 4> 9198 OverlappedData; 9199 size_t Count = 0; 9200 for (const MapData &L : DeclComponentLists) { 9201 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9202 OpenMPMapClauseKind MapType; 9203 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9204 bool IsImplicit; 9205 const ValueDecl *Mapper; 9206 const Expr *VarRef; 9207 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9208 L; 9209 ++Count; 9210 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9211 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9212 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9213 VarRef) = L1; 9214 auto CI = Components.rbegin(); 9215 auto CE = Components.rend(); 9216 auto SI = Components1.rbegin(); 9217 auto SE = Components1.rend(); 9218 for (; CI != CE && SI != SE; ++CI, ++SI) { 9219 if (CI->getAssociatedExpression()->getStmtClass() != 9220 SI->getAssociatedExpression()->getStmtClass()) 9221 break; 9222 // Are we dealing with different variables/fields? 9223 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9224 break; 9225 } 9226 // Found overlapping if, at least for one component, reached the head 9227 // of the components list. 9228 if (CI == CE || SI == SE) { 9229 // Ignore it if it is the same component. 9230 if (CI == CE && SI == SE) 9231 continue; 9232 const auto It = (SI == SE) ? CI : SI; 9233 // If one component is a pointer and another one is a kind of 9234 // dereference of this pointer (array subscript, section, dereference, 9235 // etc.), it is not an overlapping. 9236 // Same, if one component is a base and another component is a 9237 // dereferenced pointer memberexpr with the same base. 9238 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9239 (std::prev(It)->getAssociatedDeclaration() && 9240 std::prev(It) 9241 ->getAssociatedDeclaration() 9242 ->getType() 9243 ->isPointerType()) || 9244 (It->getAssociatedDeclaration() && 9245 It->getAssociatedDeclaration()->getType()->isPointerType() && 9246 std::next(It) != CE && std::next(It) != SE)) 9247 continue; 9248 const MapData &BaseData = CI == CE ? L : L1; 9249 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9250 SI == SE ? Components : Components1; 9251 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9252 OverlappedElements.getSecond().push_back(SubData); 9253 } 9254 } 9255 } 9256 // Sort the overlapped elements for each item. 9257 llvm::SmallVector<const FieldDecl *, 4> Layout; 9258 if (!OverlappedData.empty()) { 9259 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9260 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9261 while (BaseType != OrigType) { 9262 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9263 OrigType = BaseType->getPointeeOrArrayElementType(); 9264 } 9265 9266 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9267 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9268 else { 9269 const auto *RD = BaseType->getAsRecordDecl(); 9270 Layout.append(RD->field_begin(), RD->field_end()); 9271 } 9272 } 9273 for (auto &Pair : OverlappedData) { 9274 llvm::stable_sort( 9275 Pair.getSecond(), 9276 [&Layout]( 9277 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9278 OMPClauseMappableExprCommon::MappableExprComponentListRef 9279 Second) { 9280 auto CI = First.rbegin(); 9281 auto CE = First.rend(); 9282 auto SI = Second.rbegin(); 9283 auto SE = Second.rend(); 9284 for (; CI != CE && SI != SE; ++CI, ++SI) { 9285 if (CI->getAssociatedExpression()->getStmtClass() != 9286 SI->getAssociatedExpression()->getStmtClass()) 9287 break; 9288 // Are we dealing with different variables/fields? 9289 if (CI->getAssociatedDeclaration() != 9290 SI->getAssociatedDeclaration()) 9291 break; 9292 } 9293 9294 // Lists contain the same elements. 9295 if (CI == CE && SI == SE) 9296 return false; 9297 9298 // List with less elements is less than list with more elements. 9299 if (CI == CE || SI == SE) 9300 return CI == CE; 9301 9302 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9303 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9304 if (FD1->getParent() == FD2->getParent()) 9305 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9306 const auto *It = 9307 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9308 return FD == FD1 || FD == FD2; 9309 }); 9310 return *It == FD1; 9311 }); 9312 } 9313 9314 // Associated with a capture, because the mapping flags depend on it. 9315 // Go through all of the elements with the overlapped elements. 9316 bool IsFirstComponentList = true; 9317 for (const auto &Pair : OverlappedData) { 9318 const MapData &L = *Pair.getFirst(); 9319 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9320 OpenMPMapClauseKind MapType; 9321 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9322 bool IsImplicit; 9323 const ValueDecl *Mapper; 9324 const Expr *VarRef; 9325 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9326 L; 9327 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9328 OverlappedComponents = Pair.getSecond(); 9329 generateInfoForComponentList( 9330 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9331 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9332 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9333 IsFirstComponentList = false; 9334 } 9335 // Go through other elements without overlapped elements. 9336 for (const MapData &L : DeclComponentLists) { 9337 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9338 OpenMPMapClauseKind MapType; 9339 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9340 bool IsImplicit; 9341 const ValueDecl *Mapper; 9342 const Expr *VarRef; 9343 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9344 L; 9345 auto It = OverlappedData.find(&L); 9346 if (It == OverlappedData.end()) 9347 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9348 Components, CombinedInfo, PartialStruct, 9349 IsFirstComponentList, IsImplicit, Mapper, 9350 /*ForDeviceAddr=*/false, VD, VarRef); 9351 IsFirstComponentList = false; 9352 } 9353 } 9354 9355 /// Generate the default map information for a given capture \a CI, 9356 /// record field declaration \a RI and captured value \a CV. 9357 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9358 const FieldDecl &RI, llvm::Value *CV, 9359 MapCombinedInfoTy &CombinedInfo) const { 9360 bool IsImplicit = true; 9361 // Do the default mapping. 9362 if (CI.capturesThis()) { 9363 CombinedInfo.Exprs.push_back(nullptr); 9364 CombinedInfo.BasePointers.push_back(CV); 9365 CombinedInfo.Pointers.push_back(CV); 9366 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9367 CombinedInfo.Sizes.push_back( 9368 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9369 CGF.Int64Ty, /*isSigned=*/true)); 9370 // Default map type. 9371 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9372 } else if (CI.capturesVariableByCopy()) { 9373 const VarDecl *VD = CI.getCapturedVar(); 9374 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9375 CombinedInfo.BasePointers.push_back(CV); 9376 CombinedInfo.Pointers.push_back(CV); 9377 if (!RI.getType()->isAnyPointerType()) { 9378 // We have to signal to the runtime captures passed by value that are 9379 // not pointers. 9380 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9381 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9382 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9383 } else { 9384 // Pointers are implicitly mapped with a zero size and no flags 9385 // (other than first map that is added for all implicit maps). 9386 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9387 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9388 } 9389 auto I = FirstPrivateDecls.find(VD); 9390 if (I != FirstPrivateDecls.end()) 9391 IsImplicit = I->getSecond(); 9392 } else { 9393 assert(CI.capturesVariable() && "Expected captured reference."); 9394 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9395 QualType ElementType = PtrTy->getPointeeType(); 9396 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9397 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9398 // The default map type for a scalar/complex type is 'to' because by 9399 // default the value doesn't have to be retrieved. For an aggregate 9400 // type, the default is 'tofrom'. 9401 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9402 const VarDecl *VD = CI.getCapturedVar(); 9403 auto I = FirstPrivateDecls.find(VD); 9404 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9405 CombinedInfo.BasePointers.push_back(CV); 9406 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9407 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9408 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9409 AlignmentSource::Decl)); 9410 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9411 } else { 9412 CombinedInfo.Pointers.push_back(CV); 9413 } 9414 if (I != FirstPrivateDecls.end()) 9415 IsImplicit = I->getSecond(); 9416 } 9417 // Every default map produces a single argument which is a target parameter. 9418 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9419 9420 // Add flag stating this is an implicit map. 9421 if (IsImplicit) 9422 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9423 9424 // No user-defined mapper for default mapping. 9425 CombinedInfo.Mappers.push_back(nullptr); 9426 } 9427 }; 9428 } // anonymous namespace 9429 9430 static void emitNonContiguousDescriptor( 9431 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9432 CGOpenMPRuntime::TargetDataInfo &Info) { 9433 CodeGenModule &CGM = CGF.CGM; 9434 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9435 &NonContigInfo = CombinedInfo.NonContigInfo; 9436 9437 // Build an array of struct descriptor_dim and then assign it to 9438 // offload_args. 9439 // 9440 // struct descriptor_dim { 9441 // uint64_t offset; 9442 // uint64_t count; 9443 // uint64_t stride 9444 // }; 9445 ASTContext &C = CGF.getContext(); 9446 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9447 RecordDecl *RD; 9448 RD = C.buildImplicitRecord("descriptor_dim"); 9449 RD->startDefinition(); 9450 addFieldToRecordDecl(C, RD, Int64Ty); 9451 addFieldToRecordDecl(C, RD, Int64Ty); 9452 addFieldToRecordDecl(C, RD, Int64Ty); 9453 RD->completeDefinition(); 9454 QualType DimTy = C.getRecordType(RD); 9455 9456 enum { OffsetFD = 0, CountFD, StrideFD }; 9457 // We need two index variable here since the size of "Dims" is the same as the 9458 // size of Components, however, the size of offset, count, and stride is equal 9459 // to the size of base declaration that is non-contiguous. 9460 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9461 // Skip emitting ir if dimension size is 1 since it cannot be 9462 // non-contiguous. 9463 if (NonContigInfo.Dims[I] == 1) 9464 continue; 9465 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9466 QualType ArrayTy = 9467 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9468 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9469 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9470 unsigned RevIdx = EE - II - 1; 9471 LValue DimsLVal = CGF.MakeAddrLValue( 9472 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9473 // Offset 9474 LValue OffsetLVal = CGF.EmitLValueForField( 9475 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9476 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9477 // Count 9478 LValue CountLVal = CGF.EmitLValueForField( 9479 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9480 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9481 // Stride 9482 LValue StrideLVal = CGF.EmitLValueForField( 9483 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9484 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9485 } 9486 // args[I] = &dims 9487 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9488 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9489 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9490 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9491 Info.PointersArray, 0, I); 9492 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); 9493 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9494 ++L; 9495 } 9496 } 9497 9498 // Try to extract the base declaration from a `this->x` expression if possible. 9499 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9500 if (!E) 9501 return nullptr; 9502 9503 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9504 if (const MemberExpr *ME = 9505 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9506 return ME->getMemberDecl(); 9507 return nullptr; 9508 } 9509 9510 /// Emit a string constant containing the names of the values mapped to the 9511 /// offloading runtime library. 9512 llvm::Constant * 9513 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9514 MappableExprsHandler::MappingExprInfo &MapExprs) { 9515 9516 uint32_t SrcLocStrSize; 9517 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9518 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9519 9520 SourceLocation Loc; 9521 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9522 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9523 Loc = VD->getLocation(); 9524 else 9525 Loc = MapExprs.getMapExpr()->getExprLoc(); 9526 } else { 9527 Loc = MapExprs.getMapDecl()->getLocation(); 9528 } 9529 9530 std::string ExprName; 9531 if (MapExprs.getMapExpr()) { 9532 PrintingPolicy P(CGF.getContext().getLangOpts()); 9533 llvm::raw_string_ostream OS(ExprName); 9534 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9535 OS.flush(); 9536 } else { 9537 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9538 } 9539 9540 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9541 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9542 PLoc.getLine(), PLoc.getColumn(), 9543 SrcLocStrSize); 9544 } 9545 9546 /// Emit the arrays used to pass the captures and map information to the 9547 /// offloading runtime library. If there is no map or capture information, 9548 /// return nullptr by reference. 9549 static void emitOffloadingArrays( 9550 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9551 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9552 bool IsNonContiguous = false) { 9553 CodeGenModule &CGM = CGF.CGM; 9554 ASTContext &Ctx = CGF.getContext(); 9555 9556 // Reset the array information. 9557 Info.clearArrayInfo(); 9558 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9559 9560 if (Info.NumberOfPtrs) { 9561 // Detect if we have any capture size requiring runtime evaluation of the 9562 // size so that a constant array could be eventually used. 9563 9564 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9565 QualType PointerArrayType = Ctx.getConstantArrayType( 9566 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9567 /*IndexTypeQuals=*/0); 9568 9569 Info.BasePointersArray = 9570 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9571 Info.PointersArray = 9572 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9573 Address MappersArray = 9574 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9575 Info.MappersArray = MappersArray.getPointer(); 9576 9577 // If we don't have any VLA types or other types that require runtime 9578 // evaluation, we can use a constant array for the map sizes, otherwise we 9579 // need to fill up the arrays as we do for the pointers. 9580 QualType Int64Ty = 9581 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9582 SmallVector<llvm::Constant *> ConstSizes( 9583 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9584 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9585 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9586 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9587 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9588 if (IsNonContiguous && (CombinedInfo.Types[I] & 9589 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9590 ConstSizes[I] = llvm::ConstantInt::get( 9591 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9592 else 9593 ConstSizes[I] = CI; 9594 continue; 9595 } 9596 } 9597 RuntimeSizes.set(I); 9598 } 9599 9600 if (RuntimeSizes.all()) { 9601 QualType SizeArrayType = Ctx.getConstantArrayType( 9602 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9603 /*IndexTypeQuals=*/0); 9604 Info.SizesArray = 9605 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9606 } else { 9607 auto *SizesArrayInit = llvm::ConstantArray::get( 9608 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9609 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9610 auto *SizesArrayGbl = new llvm::GlobalVariable( 9611 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9612 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9613 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9614 if (RuntimeSizes.any()) { 9615 QualType SizeArrayType = Ctx.getConstantArrayType( 9616 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9617 /*IndexTypeQuals=*/0); 9618 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9619 llvm::Value *GblConstPtr = 9620 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9621 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9622 CGF.Builder.CreateMemCpy( 9623 Buffer, 9624 Address(GblConstPtr, CGM.Int64Ty, 9625 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9626 /*DestWidth=*/64, /*Signed=*/false))), 9627 CGF.getTypeSize(SizeArrayType)); 9628 Info.SizesArray = Buffer.getPointer(); 9629 } else { 9630 Info.SizesArray = SizesArrayGbl; 9631 } 9632 } 9633 9634 // The map types are always constant so we don't need to generate code to 9635 // fill arrays. Instead, we create an array constant. 9636 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9637 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9638 std::string MaptypesName = 9639 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9640 auto *MapTypesArrayGbl = 9641 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9642 Info.MapTypesArray = MapTypesArrayGbl; 9643 9644 // The information types are only built if there is debug information 9645 // requested. 9646 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9647 Info.MapNamesArray = llvm::Constant::getNullValue( 9648 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9649 } else { 9650 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9651 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9652 }; 9653 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9654 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9655 std::string MapnamesName = 9656 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9657 auto *MapNamesArrayGbl = 9658 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9659 Info.MapNamesArray = MapNamesArrayGbl; 9660 } 9661 9662 // If there's a present map type modifier, it must not be applied to the end 9663 // of a region, so generate a separate map type array in that case. 9664 if (Info.separateBeginEndCalls()) { 9665 bool EndMapTypesDiffer = false; 9666 for (uint64_t &Type : Mapping) { 9667 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9668 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9669 EndMapTypesDiffer = true; 9670 } 9671 } 9672 if (EndMapTypesDiffer) { 9673 MapTypesArrayGbl = 9674 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9675 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9676 } 9677 } 9678 9679 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9680 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9681 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9682 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9683 Info.BasePointersArray, 0, I); 9684 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9685 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9686 Address BPAddr(BP, BPVal->getType(), 9687 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9688 CGF.Builder.CreateStore(BPVal, BPAddr); 9689 9690 if (Info.requiresDevicePointerInfo()) 9691 if (const ValueDecl *DevVD = 9692 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9693 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9694 9695 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9696 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9697 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9698 Info.PointersArray, 0, I); 9699 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9700 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9701 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9702 CGF.Builder.CreateStore(PVal, PAddr); 9703 9704 if (RuntimeSizes.test(I)) { 9705 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9706 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9707 Info.SizesArray, 9708 /*Idx0=*/0, 9709 /*Idx1=*/I); 9710 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); 9711 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9712 CGM.Int64Ty, 9713 /*isSigned=*/true), 9714 SAddr); 9715 } 9716 9717 // Fill up the mapper array. 9718 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9719 if (CombinedInfo.Mappers[I]) { 9720 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9721 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9722 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9723 Info.HasMapper = true; 9724 } 9725 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9726 CGF.Builder.CreateStore(MFunc, MAddr); 9727 } 9728 } 9729 9730 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9731 Info.NumberOfPtrs == 0) 9732 return; 9733 9734 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9735 } 9736 9737 namespace { 9738 /// Additional arguments for emitOffloadingArraysArgument function. 9739 struct ArgumentsOptions { 9740 bool ForEndCall = false; 9741 ArgumentsOptions() = default; 9742 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9743 }; 9744 } // namespace 9745 9746 /// Emit the arguments to be passed to the runtime library based on the 9747 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9748 /// ForEndCall, emit map types to be passed for the end of the region instead of 9749 /// the beginning. 9750 static void emitOffloadingArraysArgument( 9751 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9752 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9753 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9754 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9755 const ArgumentsOptions &Options = ArgumentsOptions()) { 9756 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9757 "expected region end call to runtime only when end call is separate"); 9758 CodeGenModule &CGM = CGF.CGM; 9759 if (Info.NumberOfPtrs) { 9760 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9761 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9762 Info.BasePointersArray, 9763 /*Idx0=*/0, /*Idx1=*/0); 9764 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9765 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9766 Info.PointersArray, 9767 /*Idx0=*/0, 9768 /*Idx1=*/0); 9769 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9770 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9771 /*Idx0=*/0, /*Idx1=*/0); 9772 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9773 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9774 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9775 : Info.MapTypesArray, 9776 /*Idx0=*/0, 9777 /*Idx1=*/0); 9778 9779 // Only emit the mapper information arrays if debug information is 9780 // requested. 9781 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9782 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9783 else 9784 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9785 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9786 Info.MapNamesArray, 9787 /*Idx0=*/0, 9788 /*Idx1=*/0); 9789 // If there is no user-defined mapper, set the mapper array to nullptr to 9790 // avoid an unnecessary data privatization 9791 if (!Info.HasMapper) 9792 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9793 else 9794 MappersArrayArg = 9795 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9796 } else { 9797 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9798 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9799 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9800 MapTypesArrayArg = 9801 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9802 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9803 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9804 } 9805 } 9806 9807 /// Check for inner distribute directive. 9808 static const OMPExecutableDirective * 9809 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9810 const auto *CS = D.getInnermostCapturedStmt(); 9811 const auto *Body = 9812 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9813 const Stmt *ChildStmt = 9814 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9815 9816 if (const auto *NestedDir = 9817 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9818 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9819 switch (D.getDirectiveKind()) { 9820 case OMPD_target: 9821 if (isOpenMPDistributeDirective(DKind)) 9822 return NestedDir; 9823 if (DKind == OMPD_teams) { 9824 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9825 /*IgnoreCaptured=*/true); 9826 if (!Body) 9827 return nullptr; 9828 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9829 if (const auto *NND = 9830 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9831 DKind = NND->getDirectiveKind(); 9832 if (isOpenMPDistributeDirective(DKind)) 9833 return NND; 9834 } 9835 } 9836 return nullptr; 9837 case OMPD_target_teams: 9838 if (isOpenMPDistributeDirective(DKind)) 9839 return NestedDir; 9840 return nullptr; 9841 case OMPD_target_parallel: 9842 case OMPD_target_simd: 9843 case OMPD_target_parallel_for: 9844 case OMPD_target_parallel_for_simd: 9845 return nullptr; 9846 case OMPD_target_teams_distribute: 9847 case OMPD_target_teams_distribute_simd: 9848 case OMPD_target_teams_distribute_parallel_for: 9849 case OMPD_target_teams_distribute_parallel_for_simd: 9850 case OMPD_parallel: 9851 case OMPD_for: 9852 case OMPD_parallel_for: 9853 case OMPD_parallel_master: 9854 case OMPD_parallel_sections: 9855 case OMPD_for_simd: 9856 case OMPD_parallel_for_simd: 9857 case OMPD_cancel: 9858 case OMPD_cancellation_point: 9859 case OMPD_ordered: 9860 case OMPD_threadprivate: 9861 case OMPD_allocate: 9862 case OMPD_task: 9863 case OMPD_simd: 9864 case OMPD_tile: 9865 case OMPD_unroll: 9866 case OMPD_sections: 9867 case OMPD_section: 9868 case OMPD_single: 9869 case OMPD_master: 9870 case OMPD_critical: 9871 case OMPD_taskyield: 9872 case OMPD_barrier: 9873 case OMPD_taskwait: 9874 case OMPD_taskgroup: 9875 case OMPD_atomic: 9876 case OMPD_flush: 9877 case OMPD_depobj: 9878 case OMPD_scan: 9879 case OMPD_teams: 9880 case OMPD_target_data: 9881 case OMPD_target_exit_data: 9882 case OMPD_target_enter_data: 9883 case OMPD_distribute: 9884 case OMPD_distribute_simd: 9885 case OMPD_distribute_parallel_for: 9886 case OMPD_distribute_parallel_for_simd: 9887 case OMPD_teams_distribute: 9888 case OMPD_teams_distribute_simd: 9889 case OMPD_teams_distribute_parallel_for: 9890 case OMPD_teams_distribute_parallel_for_simd: 9891 case OMPD_target_update: 9892 case OMPD_declare_simd: 9893 case OMPD_declare_variant: 9894 case OMPD_begin_declare_variant: 9895 case OMPD_end_declare_variant: 9896 case OMPD_declare_target: 9897 case OMPD_end_declare_target: 9898 case OMPD_declare_reduction: 9899 case OMPD_declare_mapper: 9900 case OMPD_taskloop: 9901 case OMPD_taskloop_simd: 9902 case OMPD_master_taskloop: 9903 case OMPD_master_taskloop_simd: 9904 case OMPD_parallel_master_taskloop: 9905 case OMPD_parallel_master_taskloop_simd: 9906 case OMPD_requires: 9907 case OMPD_metadirective: 9908 case OMPD_unknown: 9909 default: 9910 llvm_unreachable("Unexpected directive."); 9911 } 9912 } 9913 9914 return nullptr; 9915 } 9916 9917 /// Emit the user-defined mapper function. The code generation follows the 9918 /// pattern in the example below. 9919 /// \code 9920 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9921 /// void *base, void *begin, 9922 /// int64_t size, int64_t type, 9923 /// void *name = nullptr) { 9924 /// // Allocate space for an array section first or add a base/begin for 9925 /// // pointer dereference. 9926 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9927 /// !maptype.IsDelete) 9928 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9929 /// size*sizeof(Ty), clearToFromMember(type)); 9930 /// // Map members. 9931 /// for (unsigned i = 0; i < size; i++) { 9932 /// // For each component specified by this mapper: 9933 /// for (auto c : begin[i]->all_components) { 9934 /// if (c.hasMapper()) 9935 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9936 /// c.arg_type, c.arg_name); 9937 /// else 9938 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9939 /// c.arg_begin, c.arg_size, c.arg_type, 9940 /// c.arg_name); 9941 /// } 9942 /// } 9943 /// // Delete the array section. 9944 /// if (size > 1 && maptype.IsDelete) 9945 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9946 /// size*sizeof(Ty), clearToFromMember(type)); 9947 /// } 9948 /// \endcode 9949 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9950 CodeGenFunction *CGF) { 9951 if (UDMMap.count(D) > 0) 9952 return; 9953 ASTContext &C = CGM.getContext(); 9954 QualType Ty = D->getType(); 9955 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9956 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9957 auto *MapperVarDecl = 9958 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9959 SourceLocation Loc = D->getLocation(); 9960 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9961 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9962 9963 // Prepare mapper function arguments and attributes. 9964 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9965 C.VoidPtrTy, ImplicitParamDecl::Other); 9966 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9967 ImplicitParamDecl::Other); 9968 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9969 C.VoidPtrTy, ImplicitParamDecl::Other); 9970 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9971 ImplicitParamDecl::Other); 9972 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9973 ImplicitParamDecl::Other); 9974 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9975 ImplicitParamDecl::Other); 9976 FunctionArgList Args; 9977 Args.push_back(&HandleArg); 9978 Args.push_back(&BaseArg); 9979 Args.push_back(&BeginArg); 9980 Args.push_back(&SizeArg); 9981 Args.push_back(&TypeArg); 9982 Args.push_back(&NameArg); 9983 const CGFunctionInfo &FnInfo = 9984 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9985 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9986 SmallString<64> TyStr; 9987 llvm::raw_svector_ostream Out(TyStr); 9988 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9989 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9990 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9991 Name, &CGM.getModule()); 9992 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9993 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9994 // Start the mapper function code generation. 9995 CodeGenFunction MapperCGF(CGM); 9996 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9997 // Compute the starting and end addresses of array elements. 9998 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9999 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10000 C.getPointerType(Int64Ty), Loc); 10001 // Prepare common arguments for array initiation and deletion. 10002 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10003 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10004 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10005 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10006 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10007 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10008 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10009 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10010 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10011 // Convert the size in bytes into the number of array elements. 10012 Size = MapperCGF.Builder.CreateExactUDiv( 10013 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10014 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10015 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10016 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 10017 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10018 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10019 C.getPointerType(Int64Ty), Loc); 10020 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10021 MapperCGF.GetAddrOfLocalVar(&NameArg), 10022 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10023 10024 // Emit array initiation if this is an array section and \p MapType indicates 10025 // that memory allocation is required. 10026 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10027 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10028 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10029 10030 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10031 10032 // Emit the loop header block. 10033 MapperCGF.EmitBlock(HeadBB); 10034 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10035 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10036 // Evaluate whether the initial condition is satisfied. 10037 llvm::Value *IsEmpty = 10038 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10039 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10040 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10041 10042 // Emit the loop body block. 10043 MapperCGF.EmitBlock(BodyBB); 10044 llvm::BasicBlock *LastBB = BodyBB; 10045 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10046 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10047 PtrPHI->addIncoming(PtrBegin, EntryBB); 10048 Address PtrCurrent(PtrPHI, ElemTy, 10049 MapperCGF.GetAddrOfLocalVar(&BeginArg) 10050 .getAlignment() 10051 .alignmentOfArrayElement(ElementSize)); 10052 // Privatize the declared variable of mapper to be the current array element. 10053 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10054 Scope.addPrivate(MapperVarDecl, PtrCurrent); 10055 (void)Scope.Privatize(); 10056 10057 // Get map clause information. Fill up the arrays with all mapped variables. 10058 MappableExprsHandler::MapCombinedInfoTy Info; 10059 MappableExprsHandler MEHandler(*D, MapperCGF); 10060 MEHandler.generateAllInfoForMapper(Info); 10061 10062 // Call the runtime API __tgt_mapper_num_components to get the number of 10063 // pre-existing components. 10064 llvm::Value *OffloadingArgs[] = {Handle}; 10065 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10066 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10067 OMPRTL___tgt_mapper_num_components), 10068 OffloadingArgs); 10069 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10070 PreviousSize, 10071 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10072 10073 // Fill up the runtime mapper handle for all components. 10074 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10075 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10076 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10077 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10078 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10079 llvm::Value *CurSizeArg = Info.Sizes[I]; 10080 llvm::Value *CurNameArg = 10081 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10082 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10083 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10084 10085 // Extract the MEMBER_OF field from the map type. 10086 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10087 llvm::Value *MemberMapType = 10088 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10089 10090 // Combine the map type inherited from user-defined mapper with that 10091 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10092 // bits of the \a MapType, which is the input argument of the mapper 10093 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10094 // bits of MemberMapType. 10095 // [OpenMP 5.0], 1.2.6. map-type decay. 10096 // | alloc | to | from | tofrom | release | delete 10097 // ---------------------------------------------------------- 10098 // alloc | alloc | alloc | alloc | alloc | release | delete 10099 // to | alloc | to | alloc | to | release | delete 10100 // from | alloc | alloc | from | from | release | delete 10101 // tofrom | alloc | to | from | tofrom | release | delete 10102 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10103 MapType, 10104 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10105 MappableExprsHandler::OMP_MAP_FROM)); 10106 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10107 llvm::BasicBlock *AllocElseBB = 10108 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10109 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10110 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10111 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10112 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10113 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10114 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10115 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10116 MapperCGF.EmitBlock(AllocBB); 10117 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10118 MemberMapType, 10119 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10120 MappableExprsHandler::OMP_MAP_FROM))); 10121 MapperCGF.Builder.CreateBr(EndBB); 10122 MapperCGF.EmitBlock(AllocElseBB); 10123 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10124 LeftToFrom, 10125 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10126 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10127 // In case of to, clear OMP_MAP_FROM. 10128 MapperCGF.EmitBlock(ToBB); 10129 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10130 MemberMapType, 10131 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10132 MapperCGF.Builder.CreateBr(EndBB); 10133 MapperCGF.EmitBlock(ToElseBB); 10134 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10135 LeftToFrom, 10136 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10137 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10138 // In case of from, clear OMP_MAP_TO. 10139 MapperCGF.EmitBlock(FromBB); 10140 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10141 MemberMapType, 10142 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10143 // In case of tofrom, do nothing. 10144 MapperCGF.EmitBlock(EndBB); 10145 LastBB = EndBB; 10146 llvm::PHINode *CurMapType = 10147 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10148 CurMapType->addIncoming(AllocMapType, AllocBB); 10149 CurMapType->addIncoming(ToMapType, ToBB); 10150 CurMapType->addIncoming(FromMapType, FromBB); 10151 CurMapType->addIncoming(MemberMapType, ToElseBB); 10152 10153 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10154 CurSizeArg, CurMapType, CurNameArg}; 10155 if (Info.Mappers[I]) { 10156 // Call the corresponding mapper function. 10157 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10158 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10159 assert(MapperFunc && "Expect a valid mapper function is available."); 10160 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10161 } else { 10162 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10163 // data structure. 10164 MapperCGF.EmitRuntimeCall( 10165 OMPBuilder.getOrCreateRuntimeFunction( 10166 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10167 OffloadingArgs); 10168 } 10169 } 10170 10171 // Update the pointer to point to the next element that needs to be mapped, 10172 // and check whether we have mapped all elements. 10173 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10174 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10175 PtrPHI->addIncoming(PtrNext, LastBB); 10176 llvm::Value *IsDone = 10177 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10178 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10179 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10180 10181 MapperCGF.EmitBlock(ExitBB); 10182 // Emit array deletion if this is an array section and \p MapType indicates 10183 // that deletion is required. 10184 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10185 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10186 10187 // Emit the function exit block. 10188 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10189 MapperCGF.FinishFunction(); 10190 UDMMap.try_emplace(D, Fn); 10191 if (CGF) { 10192 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10193 Decls.second.push_back(D); 10194 } 10195 } 10196 10197 /// Emit the array initialization or deletion portion for user-defined mapper 10198 /// code generation. First, it evaluates whether an array section is mapped and 10199 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10200 /// true, and \a MapType indicates to not delete this array, array 10201 /// initialization code is generated. If \a IsInit is false, and \a MapType 10202 /// indicates to not this array, array deletion code is generated. 10203 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10204 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10205 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10206 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10207 bool IsInit) { 10208 StringRef Prefix = IsInit ? ".init" : ".del"; 10209 10210 // Evaluate if this is an array section. 10211 llvm::BasicBlock *BodyBB = 10212 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10213 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10214 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10215 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10216 MapType, 10217 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10218 llvm::Value *DeleteCond; 10219 llvm::Value *Cond; 10220 if (IsInit) { 10221 // base != begin? 10222 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10223 // IsPtrAndObj? 10224 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10225 MapType, 10226 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10227 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10228 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10229 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10230 DeleteCond = MapperCGF.Builder.CreateIsNull( 10231 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10232 } else { 10233 Cond = IsArray; 10234 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10235 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10236 } 10237 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10238 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10239 10240 MapperCGF.EmitBlock(BodyBB); 10241 // Get the array size by multiplying element size and element number (i.e., \p 10242 // Size). 10243 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10244 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10245 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10246 // memory allocation/deletion purpose only. 10247 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10248 MapType, 10249 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10250 MappableExprsHandler::OMP_MAP_FROM))); 10251 MapTypeArg = MapperCGF.Builder.CreateOr( 10252 MapTypeArg, 10253 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10254 10255 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10256 // data structure. 10257 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10258 ArraySize, MapTypeArg, MapName}; 10259 MapperCGF.EmitRuntimeCall( 10260 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10261 OMPRTL___tgt_push_mapper_component), 10262 OffloadingArgs); 10263 } 10264 10265 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10266 const OMPDeclareMapperDecl *D) { 10267 auto I = UDMMap.find(D); 10268 if (I != UDMMap.end()) 10269 return I->second; 10270 emitUserDefinedMapper(D); 10271 return UDMMap.lookup(D); 10272 } 10273 10274 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10275 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10276 llvm::Value *DeviceID, 10277 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10278 const OMPLoopDirective &D)> 10279 SizeEmitter) { 10280 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10281 const OMPExecutableDirective *TD = &D; 10282 // Get nested teams distribute kind directive, if any. 10283 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10284 TD = getNestedDistributeDirective(CGM.getContext(), D); 10285 if (!TD) 10286 return; 10287 const auto *LD = cast<OMPLoopDirective>(TD); 10288 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10289 PrePostActionTy &) { 10290 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10291 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10292 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10293 CGF.EmitRuntimeCall( 10294 OMPBuilder.getOrCreateRuntimeFunction( 10295 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10296 Args); 10297 } 10298 }; 10299 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10300 } 10301 10302 void CGOpenMPRuntime::emitTargetCall( 10303 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10304 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10305 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10306 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10307 const OMPLoopDirective &D)> 10308 SizeEmitter) { 10309 if (!CGF.HaveInsertPoint()) 10310 return; 10311 10312 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10313 CGM.getLangOpts().OpenMPOffloadMandatory; 10314 10315 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10316 10317 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10318 D.hasClausesOfKind<OMPNowaitClause>(); 10319 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10320 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10321 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10322 PrePostActionTy &) { 10323 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10324 }; 10325 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10326 10327 CodeGenFunction::OMPTargetDataInfo InputInfo; 10328 llvm::Value *MapTypesArray = nullptr; 10329 llvm::Value *MapNamesArray = nullptr; 10330 // Generate code for the host fallback function. 10331 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10332 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10333 if (OffloadingMandatory) { 10334 CGF.Builder.CreateUnreachable(); 10335 } else { 10336 if (RequiresOuterTask) { 10337 CapturedVars.clear(); 10338 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10339 } 10340 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10341 } 10342 }; 10343 // Fill up the pointer arrays and transfer execution to the device. 10344 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10345 &MapNamesArray, SizeEmitter, 10346 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10347 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10348 // Reverse offloading is not supported, so just execute on the host. 10349 FallbackGen(CGF); 10350 return; 10351 } 10352 10353 // On top of the arrays that were filled up, the target offloading call 10354 // takes as arguments the device id as well as the host pointer. The host 10355 // pointer is used by the runtime library to identify the current target 10356 // region, so it only has to be unique and not necessarily point to 10357 // anything. It could be the pointer to the outlined function that 10358 // implements the target region, but we aren't using that so that the 10359 // compiler doesn't need to keep that, and could therefore inline the host 10360 // function if proven worthwhile during optimization. 10361 10362 // From this point on, we need to have an ID of the target region defined. 10363 assert(OutlinedFnID && "Invalid outlined function ID!"); 10364 (void)OutlinedFnID; 10365 10366 // Emit device ID if any. 10367 llvm::Value *DeviceID; 10368 if (Device.getPointer()) { 10369 assert((Device.getInt() == OMPC_DEVICE_unknown || 10370 Device.getInt() == OMPC_DEVICE_device_num) && 10371 "Expected device_num modifier."); 10372 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10373 DeviceID = 10374 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10375 } else { 10376 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10377 } 10378 10379 // Emit the number of elements in the offloading arrays. 10380 llvm::Value *PointerNum = 10381 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10382 10383 // Return value of the runtime offloading call. 10384 llvm::Value *Return; 10385 10386 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10387 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10388 10389 // Source location for the ident struct 10390 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10391 10392 // Emit tripcount for the target loop-based directive. 10393 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10394 10395 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10396 // The target region is an outlined function launched by the runtime 10397 // via calls __tgt_target() or __tgt_target_teams(). 10398 // 10399 // __tgt_target() launches a target region with one team and one thread, 10400 // executing a serial region. This master thread may in turn launch 10401 // more threads within its team upon encountering a parallel region, 10402 // however, no additional teams can be launched on the device. 10403 // 10404 // __tgt_target_teams() launches a target region with one or more teams, 10405 // each with one or more threads. This call is required for target 10406 // constructs such as: 10407 // 'target teams' 10408 // 'target' / 'teams' 10409 // 'target teams distribute parallel for' 10410 // 'target parallel' 10411 // and so on. 10412 // 10413 // Note that on the host and CPU targets, the runtime implementation of 10414 // these calls simply call the outlined function without forking threads. 10415 // The outlined functions themselves have runtime calls to 10416 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10417 // the compiler in emitTeamsCall() and emitParallelCall(). 10418 // 10419 // In contrast, on the NVPTX target, the implementation of 10420 // __tgt_target_teams() launches a GPU kernel with the requested number 10421 // of teams and threads so no additional calls to the runtime are required. 10422 if (NumTeams) { 10423 // If we have NumTeams defined this means that we have an enclosed teams 10424 // region. Therefore we also expect to have NumThreads defined. These two 10425 // values should be defined in the presence of a teams directive, 10426 // regardless of having any clauses associated. If the user is using teams 10427 // but no clauses, these two values will be the default that should be 10428 // passed to the runtime library - a 32-bit integer with the value zero. 10429 assert(NumThreads && "Thread limit expression should be available along " 10430 "with number of teams."); 10431 SmallVector<llvm::Value *> OffloadingArgs = { 10432 RTLoc, 10433 DeviceID, 10434 OutlinedFnID, 10435 PointerNum, 10436 InputInfo.BasePointersArray.getPointer(), 10437 InputInfo.PointersArray.getPointer(), 10438 InputInfo.SizesArray.getPointer(), 10439 MapTypesArray, 10440 MapNamesArray, 10441 InputInfo.MappersArray.getPointer(), 10442 NumTeams, 10443 NumThreads}; 10444 if (HasNowait) { 10445 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10446 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10447 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10448 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10449 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10450 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10451 } 10452 Return = CGF.EmitRuntimeCall( 10453 OMPBuilder.getOrCreateRuntimeFunction( 10454 CGM.getModule(), HasNowait 10455 ? OMPRTL___tgt_target_teams_nowait_mapper 10456 : OMPRTL___tgt_target_teams_mapper), 10457 OffloadingArgs); 10458 } else { 10459 SmallVector<llvm::Value *> OffloadingArgs = { 10460 RTLoc, 10461 DeviceID, 10462 OutlinedFnID, 10463 PointerNum, 10464 InputInfo.BasePointersArray.getPointer(), 10465 InputInfo.PointersArray.getPointer(), 10466 InputInfo.SizesArray.getPointer(), 10467 MapTypesArray, 10468 MapNamesArray, 10469 InputInfo.MappersArray.getPointer()}; 10470 if (HasNowait) { 10471 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10472 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10473 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10474 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10475 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10476 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10477 } 10478 Return = CGF.EmitRuntimeCall( 10479 OMPBuilder.getOrCreateRuntimeFunction( 10480 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10481 : OMPRTL___tgt_target_mapper), 10482 OffloadingArgs); 10483 } 10484 10485 // Check the error code and execute the host version if required. 10486 llvm::BasicBlock *OffloadFailedBlock = 10487 CGF.createBasicBlock("omp_offload.failed"); 10488 llvm::BasicBlock *OffloadContBlock = 10489 CGF.createBasicBlock("omp_offload.cont"); 10490 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10491 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10492 10493 CGF.EmitBlock(OffloadFailedBlock); 10494 FallbackGen(CGF); 10495 10496 CGF.EmitBranch(OffloadContBlock); 10497 10498 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10499 }; 10500 10501 // Notify that the host version must be executed. 10502 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10503 FallbackGen(CGF); 10504 }; 10505 10506 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10507 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10508 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10509 // Fill up the arrays with all the captured variables. 10510 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10511 10512 // Get mappable expression information. 10513 MappableExprsHandler MEHandler(D, CGF); 10514 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10515 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10516 10517 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10518 auto *CV = CapturedVars.begin(); 10519 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10520 CE = CS.capture_end(); 10521 CI != CE; ++CI, ++RI, ++CV) { 10522 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10523 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10524 10525 // VLA sizes are passed to the outlined region by copy and do not have map 10526 // information associated. 10527 if (CI->capturesVariableArrayType()) { 10528 CurInfo.Exprs.push_back(nullptr); 10529 CurInfo.BasePointers.push_back(*CV); 10530 CurInfo.Pointers.push_back(*CV); 10531 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10532 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10533 // Copy to the device as an argument. No need to retrieve it. 10534 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10535 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10536 MappableExprsHandler::OMP_MAP_IMPLICIT); 10537 CurInfo.Mappers.push_back(nullptr); 10538 } else { 10539 // If we have any information in the map clause, we use it, otherwise we 10540 // just do a default mapping. 10541 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10542 if (!CI->capturesThis()) 10543 MappedVarSet.insert(CI->getCapturedVar()); 10544 else 10545 MappedVarSet.insert(nullptr); 10546 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10547 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10548 // Generate correct mapping for variables captured by reference in 10549 // lambdas. 10550 if (CI->capturesVariable()) 10551 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10552 CurInfo, LambdaPointers); 10553 } 10554 // We expect to have at least an element of information for this capture. 10555 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10556 "Non-existing map pointer for capture!"); 10557 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10558 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10559 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10560 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10561 "Inconsistent map information sizes!"); 10562 10563 // If there is an entry in PartialStruct it means we have a struct with 10564 // individual members mapped. Emit an extra combined entry. 10565 if (PartialStruct.Base.isValid()) { 10566 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10567 MEHandler.emitCombinedEntry( 10568 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10569 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10570 } 10571 10572 // We need to append the results of this capture to what we already have. 10573 CombinedInfo.append(CurInfo); 10574 } 10575 // Adjust MEMBER_OF flags for the lambdas captures. 10576 MEHandler.adjustMemberOfForLambdaCaptures( 10577 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10578 CombinedInfo.Types); 10579 // Map any list items in a map clause that were not captures because they 10580 // weren't referenced within the construct. 10581 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10582 10583 TargetDataInfo Info; 10584 // Fill up the arrays and create the arguments. 10585 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10586 emitOffloadingArraysArgument( 10587 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10588 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10589 {/*ForEndCall=*/false}); 10590 10591 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10592 InputInfo.BasePointersArray = 10593 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10594 InputInfo.PointersArray = 10595 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10596 InputInfo.SizesArray = 10597 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10598 InputInfo.MappersArray = 10599 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10600 MapTypesArray = Info.MapTypesArray; 10601 MapNamesArray = Info.MapNamesArray; 10602 if (RequiresOuterTask) 10603 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10604 else 10605 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10606 }; 10607 10608 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10609 CodeGenFunction &CGF, PrePostActionTy &) { 10610 if (RequiresOuterTask) { 10611 CodeGenFunction::OMPTargetDataInfo InputInfo; 10612 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10613 } else { 10614 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10615 } 10616 }; 10617 10618 // If we have a target function ID it means that we need to support 10619 // offloading, otherwise, just execute on the host. We need to execute on host 10620 // regardless of the conditional in the if clause if, e.g., the user do not 10621 // specify target triples. 10622 if (OutlinedFnID) { 10623 if (IfCond) { 10624 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10625 } else { 10626 RegionCodeGenTy ThenRCG(TargetThenGen); 10627 ThenRCG(CGF); 10628 } 10629 } else { 10630 RegionCodeGenTy ElseRCG(TargetElseGen); 10631 ElseRCG(CGF); 10632 } 10633 } 10634 10635 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10636 StringRef ParentName) { 10637 if (!S) 10638 return; 10639 10640 // Codegen OMP target directives that offload compute to the device. 10641 bool RequiresDeviceCodegen = 10642 isa<OMPExecutableDirective>(S) && 10643 isOpenMPTargetExecutionDirective( 10644 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10645 10646 if (RequiresDeviceCodegen) { 10647 const auto &E = *cast<OMPExecutableDirective>(S); 10648 unsigned DeviceID; 10649 unsigned FileID; 10650 unsigned Line; 10651 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10652 FileID, Line); 10653 10654 // Is this a target region that should not be emitted as an entry point? If 10655 // so just signal we are done with this target region. 10656 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10657 ParentName, Line)) 10658 return; 10659 10660 switch (E.getDirectiveKind()) { 10661 case OMPD_target: 10662 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10663 cast<OMPTargetDirective>(E)); 10664 break; 10665 case OMPD_target_parallel: 10666 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10667 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10668 break; 10669 case OMPD_target_teams: 10670 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10671 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10672 break; 10673 case OMPD_target_teams_distribute: 10674 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10675 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10676 break; 10677 case OMPD_target_teams_distribute_simd: 10678 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10679 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10680 break; 10681 case OMPD_target_parallel_for: 10682 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10683 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10684 break; 10685 case OMPD_target_parallel_for_simd: 10686 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10687 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10688 break; 10689 case OMPD_target_simd: 10690 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10691 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10692 break; 10693 case OMPD_target_teams_distribute_parallel_for: 10694 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10695 CGM, ParentName, 10696 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10697 break; 10698 case OMPD_target_teams_distribute_parallel_for_simd: 10699 CodeGenFunction:: 10700 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10701 CGM, ParentName, 10702 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10703 break; 10704 case OMPD_parallel: 10705 case OMPD_for: 10706 case OMPD_parallel_for: 10707 case OMPD_parallel_master: 10708 case OMPD_parallel_sections: 10709 case OMPD_for_simd: 10710 case OMPD_parallel_for_simd: 10711 case OMPD_cancel: 10712 case OMPD_cancellation_point: 10713 case OMPD_ordered: 10714 case OMPD_threadprivate: 10715 case OMPD_allocate: 10716 case OMPD_task: 10717 case OMPD_simd: 10718 case OMPD_tile: 10719 case OMPD_unroll: 10720 case OMPD_sections: 10721 case OMPD_section: 10722 case OMPD_single: 10723 case OMPD_master: 10724 case OMPD_critical: 10725 case OMPD_taskyield: 10726 case OMPD_barrier: 10727 case OMPD_taskwait: 10728 case OMPD_taskgroup: 10729 case OMPD_atomic: 10730 case OMPD_flush: 10731 case OMPD_depobj: 10732 case OMPD_scan: 10733 case OMPD_teams: 10734 case OMPD_target_data: 10735 case OMPD_target_exit_data: 10736 case OMPD_target_enter_data: 10737 case OMPD_distribute: 10738 case OMPD_distribute_simd: 10739 case OMPD_distribute_parallel_for: 10740 case OMPD_distribute_parallel_for_simd: 10741 case OMPD_teams_distribute: 10742 case OMPD_teams_distribute_simd: 10743 case OMPD_teams_distribute_parallel_for: 10744 case OMPD_teams_distribute_parallel_for_simd: 10745 case OMPD_target_update: 10746 case OMPD_declare_simd: 10747 case OMPD_declare_variant: 10748 case OMPD_begin_declare_variant: 10749 case OMPD_end_declare_variant: 10750 case OMPD_declare_target: 10751 case OMPD_end_declare_target: 10752 case OMPD_declare_reduction: 10753 case OMPD_declare_mapper: 10754 case OMPD_taskloop: 10755 case OMPD_taskloop_simd: 10756 case OMPD_master_taskloop: 10757 case OMPD_master_taskloop_simd: 10758 case OMPD_parallel_master_taskloop: 10759 case OMPD_parallel_master_taskloop_simd: 10760 case OMPD_requires: 10761 case OMPD_metadirective: 10762 case OMPD_unknown: 10763 default: 10764 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10765 } 10766 return; 10767 } 10768 10769 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10770 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10771 return; 10772 10773 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10774 return; 10775 } 10776 10777 // If this is a lambda function, look into its body. 10778 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10779 S = L->getBody(); 10780 10781 // Keep looking for target regions recursively. 10782 for (const Stmt *II : S->children()) 10783 scanForTargetRegionsFunctions(II, ParentName); 10784 } 10785 10786 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10787 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10788 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10789 if (!DevTy) 10790 return false; 10791 // Do not emit device_type(nohost) functions for the host. 10792 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10793 return true; 10794 // Do not emit device_type(host) functions for the device. 10795 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10796 return true; 10797 return false; 10798 } 10799 10800 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10801 // If emitting code for the host, we do not process FD here. Instead we do 10802 // the normal code generation. 10803 if (!CGM.getLangOpts().OpenMPIsDevice) { 10804 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10805 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10806 CGM.getLangOpts().OpenMPIsDevice)) 10807 return true; 10808 return false; 10809 } 10810 10811 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10812 // Try to detect target regions in the function. 10813 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10814 StringRef Name = CGM.getMangledName(GD); 10815 scanForTargetRegionsFunctions(FD->getBody(), Name); 10816 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10817 CGM.getLangOpts().OpenMPIsDevice)) 10818 return true; 10819 } 10820 10821 // Do not to emit function if it is not marked as declare target. 10822 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10823 AlreadyEmittedTargetDecls.count(VD) == 0; 10824 } 10825 10826 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10827 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10828 CGM.getLangOpts().OpenMPIsDevice)) 10829 return true; 10830 10831 if (!CGM.getLangOpts().OpenMPIsDevice) 10832 return false; 10833 10834 // Check if there are Ctors/Dtors in this declaration and look for target 10835 // regions in it. We use the complete variant to produce the kernel name 10836 // mangling. 10837 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10838 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10839 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10840 StringRef ParentName = 10841 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10842 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10843 } 10844 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10845 StringRef ParentName = 10846 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10847 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10848 } 10849 } 10850 10851 // Do not to emit variable if it is not marked as declare target. 10852 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10853 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10854 cast<VarDecl>(GD.getDecl())); 10855 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10856 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10857 HasRequiresUnifiedSharedMemory)) { 10858 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10859 return true; 10860 } 10861 return false; 10862 } 10863 10864 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10865 llvm::Constant *Addr) { 10866 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10867 !CGM.getLangOpts().OpenMPIsDevice) 10868 return; 10869 10870 // If we have host/nohost variables, they do not need to be registered. 10871 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10872 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10873 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10874 return; 10875 10876 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10877 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10878 if (!Res) { 10879 if (CGM.getLangOpts().OpenMPIsDevice) { 10880 // Register non-target variables being emitted in device code (debug info 10881 // may cause this). 10882 StringRef VarName = CGM.getMangledName(VD); 10883 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10884 } 10885 return; 10886 } 10887 // Register declare target variables. 10888 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10889 StringRef VarName; 10890 CharUnits VarSize; 10891 llvm::GlobalValue::LinkageTypes Linkage; 10892 10893 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10894 !HasRequiresUnifiedSharedMemory) { 10895 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10896 VarName = CGM.getMangledName(VD); 10897 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10898 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10899 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10900 } else { 10901 VarSize = CharUnits::Zero(); 10902 } 10903 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10904 // Temp solution to prevent optimizations of the internal variables. 10905 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10906 // Do not create a "ref-variable" if the original is not also available 10907 // on the host. 10908 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10909 return; 10910 std::string RefName = getName({VarName, "ref"}); 10911 if (!CGM.GetGlobalValue(RefName)) { 10912 llvm::Constant *AddrRef = 10913 getOrCreateInternalVariable(Addr->getType(), RefName); 10914 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10915 GVAddrRef->setConstant(/*Val=*/true); 10916 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10917 GVAddrRef->setInitializer(Addr); 10918 CGM.addCompilerUsedGlobal(GVAddrRef); 10919 } 10920 } 10921 } else { 10922 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10923 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10924 HasRequiresUnifiedSharedMemory)) && 10925 "Declare target attribute must link or to with unified memory."); 10926 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10927 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10928 else 10929 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10930 10931 if (CGM.getLangOpts().OpenMPIsDevice) { 10932 VarName = Addr->getName(); 10933 Addr = nullptr; 10934 } else { 10935 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10936 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10937 } 10938 VarSize = CGM.getPointerSize(); 10939 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10940 } 10941 10942 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10943 VarName, Addr, VarSize, Flags, Linkage); 10944 } 10945 10946 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10947 if (isa<FunctionDecl>(GD.getDecl()) || 10948 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10949 return emitTargetFunctions(GD); 10950 10951 return emitTargetGlobalVariable(GD); 10952 } 10953 10954 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10955 for (const VarDecl *VD : DeferredGlobalVariables) { 10956 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10957 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10958 if (!Res) 10959 continue; 10960 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10961 !HasRequiresUnifiedSharedMemory) { 10962 CGM.EmitGlobal(VD); 10963 } else { 10964 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10965 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10966 HasRequiresUnifiedSharedMemory)) && 10967 "Expected link clause or to clause with unified memory."); 10968 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10969 } 10970 } 10971 } 10972 10973 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10974 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10975 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10976 " Expected target-based directive."); 10977 } 10978 10979 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10980 for (const OMPClause *Clause : D->clauselists()) { 10981 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10982 HasRequiresUnifiedSharedMemory = true; 10983 } else if (const auto *AC = 10984 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10985 switch (AC->getAtomicDefaultMemOrderKind()) { 10986 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10987 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10988 break; 10989 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10990 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10991 break; 10992 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10993 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10994 break; 10995 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10996 break; 10997 } 10998 } 10999 } 11000 } 11001 11002 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11003 return RequiresAtomicOrdering; 11004 } 11005 11006 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11007 LangAS &AS) { 11008 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11009 return false; 11010 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11011 switch(A->getAllocatorType()) { 11012 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11013 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11014 // Not supported, fallback to the default mem space. 11015 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11016 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11017 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11018 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11019 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11020 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11021 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11022 AS = LangAS::Default; 11023 return true; 11024 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11025 llvm_unreachable("Expected predefined allocator for the variables with the " 11026 "static storage."); 11027 } 11028 return false; 11029 } 11030 11031 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11032 return HasRequiresUnifiedSharedMemory; 11033 } 11034 11035 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11036 CodeGenModule &CGM) 11037 : CGM(CGM) { 11038 if (CGM.getLangOpts().OpenMPIsDevice) { 11039 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11040 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11041 } 11042 } 11043 11044 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11045 if (CGM.getLangOpts().OpenMPIsDevice) 11046 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11047 } 11048 11049 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11050 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11051 return true; 11052 11053 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11054 // Do not to emit function if it is marked as declare target as it was already 11055 // emitted. 11056 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11057 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11058 if (auto *F = dyn_cast_or_null<llvm::Function>( 11059 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11060 return !F->isDeclaration(); 11061 return false; 11062 } 11063 return true; 11064 } 11065 11066 return !AlreadyEmittedTargetDecls.insert(D).second; 11067 } 11068 11069 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11070 // If we don't have entries or if we are emitting code for the device, we 11071 // don't need to do anything. 11072 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11073 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11074 (OffloadEntriesInfoManager.empty() && 11075 !HasEmittedDeclareTargetRegion && 11076 !HasEmittedTargetRegion)) 11077 return nullptr; 11078 11079 // Create and register the function that handles the requires directives. 11080 ASTContext &C = CGM.getContext(); 11081 11082 llvm::Function *RequiresRegFn; 11083 { 11084 CodeGenFunction CGF(CGM); 11085 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11086 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11087 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11088 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11089 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11090 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11091 // TODO: check for other requires clauses. 11092 // The requires directive takes effect only when a target region is 11093 // present in the compilation unit. Otherwise it is ignored and not 11094 // passed to the runtime. This avoids the runtime from throwing an error 11095 // for mismatching requires clauses across compilation units that don't 11096 // contain at least 1 target region. 11097 assert((HasEmittedTargetRegion || 11098 HasEmittedDeclareTargetRegion || 11099 !OffloadEntriesInfoManager.empty()) && 11100 "Target or declare target region expected."); 11101 if (HasRequiresUnifiedSharedMemory) 11102 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11103 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11104 CGM.getModule(), OMPRTL___tgt_register_requires), 11105 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11106 CGF.FinishFunction(); 11107 } 11108 return RequiresRegFn; 11109 } 11110 11111 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11112 const OMPExecutableDirective &D, 11113 SourceLocation Loc, 11114 llvm::Function *OutlinedFn, 11115 ArrayRef<llvm::Value *> CapturedVars) { 11116 if (!CGF.HaveInsertPoint()) 11117 return; 11118 11119 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11120 CodeGenFunction::RunCleanupsScope Scope(CGF); 11121 11122 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11123 llvm::Value *Args[] = { 11124 RTLoc, 11125 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11126 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11127 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11128 RealArgs.append(std::begin(Args), std::end(Args)); 11129 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11130 11131 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11132 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11133 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11134 } 11135 11136 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11137 const Expr *NumTeams, 11138 const Expr *ThreadLimit, 11139 SourceLocation Loc) { 11140 if (!CGF.HaveInsertPoint()) 11141 return; 11142 11143 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11144 11145 llvm::Value *NumTeamsVal = 11146 NumTeams 11147 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11148 CGF.CGM.Int32Ty, /* isSigned = */ true) 11149 : CGF.Builder.getInt32(0); 11150 11151 llvm::Value *ThreadLimitVal = 11152 ThreadLimit 11153 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11154 CGF.CGM.Int32Ty, /* isSigned = */ true) 11155 : CGF.Builder.getInt32(0); 11156 11157 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11158 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11159 ThreadLimitVal}; 11160 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11161 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11162 PushNumTeamsArgs); 11163 } 11164 11165 void CGOpenMPRuntime::emitTargetDataCalls( 11166 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11167 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11168 if (!CGF.HaveInsertPoint()) 11169 return; 11170 11171 // Action used to replace the default codegen action and turn privatization 11172 // off. 11173 PrePostActionTy NoPrivAction; 11174 11175 // Generate the code for the opening of the data environment. Capture all the 11176 // arguments of the runtime call by reference because they are used in the 11177 // closing of the region. 11178 auto &&BeginThenGen = [this, &D, Device, &Info, 11179 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11180 // Fill up the arrays with all the mapped variables. 11181 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11182 11183 // Get map clause information. 11184 MappableExprsHandler MEHandler(D, CGF); 11185 MEHandler.generateAllInfo(CombinedInfo); 11186 11187 // Fill up the arrays and create the arguments. 11188 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11189 /*IsNonContiguous=*/true); 11190 11191 llvm::Value *BasePointersArrayArg = nullptr; 11192 llvm::Value *PointersArrayArg = nullptr; 11193 llvm::Value *SizesArrayArg = nullptr; 11194 llvm::Value *MapTypesArrayArg = nullptr; 11195 llvm::Value *MapNamesArrayArg = nullptr; 11196 llvm::Value *MappersArrayArg = nullptr; 11197 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11198 SizesArrayArg, MapTypesArrayArg, 11199 MapNamesArrayArg, MappersArrayArg, Info); 11200 11201 // Emit device ID if any. 11202 llvm::Value *DeviceID = nullptr; 11203 if (Device) { 11204 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11205 CGF.Int64Ty, /*isSigned=*/true); 11206 } else { 11207 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11208 } 11209 11210 // Emit the number of elements in the offloading arrays. 11211 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11212 // 11213 // Source location for the ident struct 11214 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11215 11216 llvm::Value *OffloadingArgs[] = {RTLoc, 11217 DeviceID, 11218 PointerNum, 11219 BasePointersArrayArg, 11220 PointersArrayArg, 11221 SizesArrayArg, 11222 MapTypesArrayArg, 11223 MapNamesArrayArg, 11224 MappersArrayArg}; 11225 CGF.EmitRuntimeCall( 11226 OMPBuilder.getOrCreateRuntimeFunction( 11227 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11228 OffloadingArgs); 11229 11230 // If device pointer privatization is required, emit the body of the region 11231 // here. It will have to be duplicated: with and without privatization. 11232 if (!Info.CaptureDeviceAddrMap.empty()) 11233 CodeGen(CGF); 11234 }; 11235 11236 // Generate code for the closing of the data region. 11237 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11238 PrePostActionTy &) { 11239 assert(Info.isValid() && "Invalid data environment closing arguments."); 11240 11241 llvm::Value *BasePointersArrayArg = nullptr; 11242 llvm::Value *PointersArrayArg = nullptr; 11243 llvm::Value *SizesArrayArg = nullptr; 11244 llvm::Value *MapTypesArrayArg = nullptr; 11245 llvm::Value *MapNamesArrayArg = nullptr; 11246 llvm::Value *MappersArrayArg = nullptr; 11247 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11248 SizesArrayArg, MapTypesArrayArg, 11249 MapNamesArrayArg, MappersArrayArg, Info, 11250 {/*ForEndCall=*/true}); 11251 11252 // Emit device ID if any. 11253 llvm::Value *DeviceID = nullptr; 11254 if (Device) { 11255 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11256 CGF.Int64Ty, /*isSigned=*/true); 11257 } else { 11258 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11259 } 11260 11261 // Emit the number of elements in the offloading arrays. 11262 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11263 11264 // Source location for the ident struct 11265 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11266 11267 llvm::Value *OffloadingArgs[] = {RTLoc, 11268 DeviceID, 11269 PointerNum, 11270 BasePointersArrayArg, 11271 PointersArrayArg, 11272 SizesArrayArg, 11273 MapTypesArrayArg, 11274 MapNamesArrayArg, 11275 MappersArrayArg}; 11276 CGF.EmitRuntimeCall( 11277 OMPBuilder.getOrCreateRuntimeFunction( 11278 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11279 OffloadingArgs); 11280 }; 11281 11282 // If we need device pointer privatization, we need to emit the body of the 11283 // region with no privatization in the 'else' branch of the conditional. 11284 // Otherwise, we don't have to do anything. 11285 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11286 PrePostActionTy &) { 11287 if (!Info.CaptureDeviceAddrMap.empty()) { 11288 CodeGen.setAction(NoPrivAction); 11289 CodeGen(CGF); 11290 } 11291 }; 11292 11293 // We don't have to do anything to close the region if the if clause evaluates 11294 // to false. 11295 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11296 11297 if (IfCond) { 11298 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11299 } else { 11300 RegionCodeGenTy RCG(BeginThenGen); 11301 RCG(CGF); 11302 } 11303 11304 // If we don't require privatization of device pointers, we emit the body in 11305 // between the runtime calls. This avoids duplicating the body code. 11306 if (Info.CaptureDeviceAddrMap.empty()) { 11307 CodeGen.setAction(NoPrivAction); 11308 CodeGen(CGF); 11309 } 11310 11311 if (IfCond) { 11312 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11313 } else { 11314 RegionCodeGenTy RCG(EndThenGen); 11315 RCG(CGF); 11316 } 11317 } 11318 11319 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11320 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11321 const Expr *Device) { 11322 if (!CGF.HaveInsertPoint()) 11323 return; 11324 11325 assert((isa<OMPTargetEnterDataDirective>(D) || 11326 isa<OMPTargetExitDataDirective>(D) || 11327 isa<OMPTargetUpdateDirective>(D)) && 11328 "Expecting either target enter, exit data, or update directives."); 11329 11330 CodeGenFunction::OMPTargetDataInfo InputInfo; 11331 llvm::Value *MapTypesArray = nullptr; 11332 llvm::Value *MapNamesArray = nullptr; 11333 // Generate the code for the opening of the data environment. 11334 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11335 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11336 // Emit device ID if any. 11337 llvm::Value *DeviceID = nullptr; 11338 if (Device) { 11339 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11340 CGF.Int64Ty, /*isSigned=*/true); 11341 } else { 11342 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11343 } 11344 11345 // Emit the number of elements in the offloading arrays. 11346 llvm::Constant *PointerNum = 11347 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11348 11349 // Source location for the ident struct 11350 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11351 11352 llvm::Value *OffloadingArgs[] = {RTLoc, 11353 DeviceID, 11354 PointerNum, 11355 InputInfo.BasePointersArray.getPointer(), 11356 InputInfo.PointersArray.getPointer(), 11357 InputInfo.SizesArray.getPointer(), 11358 MapTypesArray, 11359 MapNamesArray, 11360 InputInfo.MappersArray.getPointer()}; 11361 11362 // Select the right runtime function call for each standalone 11363 // directive. 11364 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11365 RuntimeFunction RTLFn; 11366 switch (D.getDirectiveKind()) { 11367 case OMPD_target_enter_data: 11368 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11369 : OMPRTL___tgt_target_data_begin_mapper; 11370 break; 11371 case OMPD_target_exit_data: 11372 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11373 : OMPRTL___tgt_target_data_end_mapper; 11374 break; 11375 case OMPD_target_update: 11376 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11377 : OMPRTL___tgt_target_data_update_mapper; 11378 break; 11379 case OMPD_parallel: 11380 case OMPD_for: 11381 case OMPD_parallel_for: 11382 case OMPD_parallel_master: 11383 case OMPD_parallel_sections: 11384 case OMPD_for_simd: 11385 case OMPD_parallel_for_simd: 11386 case OMPD_cancel: 11387 case OMPD_cancellation_point: 11388 case OMPD_ordered: 11389 case OMPD_threadprivate: 11390 case OMPD_allocate: 11391 case OMPD_task: 11392 case OMPD_simd: 11393 case OMPD_tile: 11394 case OMPD_unroll: 11395 case OMPD_sections: 11396 case OMPD_section: 11397 case OMPD_single: 11398 case OMPD_master: 11399 case OMPD_critical: 11400 case OMPD_taskyield: 11401 case OMPD_barrier: 11402 case OMPD_taskwait: 11403 case OMPD_taskgroup: 11404 case OMPD_atomic: 11405 case OMPD_flush: 11406 case OMPD_depobj: 11407 case OMPD_scan: 11408 case OMPD_teams: 11409 case OMPD_target_data: 11410 case OMPD_distribute: 11411 case OMPD_distribute_simd: 11412 case OMPD_distribute_parallel_for: 11413 case OMPD_distribute_parallel_for_simd: 11414 case OMPD_teams_distribute: 11415 case OMPD_teams_distribute_simd: 11416 case OMPD_teams_distribute_parallel_for: 11417 case OMPD_teams_distribute_parallel_for_simd: 11418 case OMPD_declare_simd: 11419 case OMPD_declare_variant: 11420 case OMPD_begin_declare_variant: 11421 case OMPD_end_declare_variant: 11422 case OMPD_declare_target: 11423 case OMPD_end_declare_target: 11424 case OMPD_declare_reduction: 11425 case OMPD_declare_mapper: 11426 case OMPD_taskloop: 11427 case OMPD_taskloop_simd: 11428 case OMPD_master_taskloop: 11429 case OMPD_master_taskloop_simd: 11430 case OMPD_parallel_master_taskloop: 11431 case OMPD_parallel_master_taskloop_simd: 11432 case OMPD_target: 11433 case OMPD_target_simd: 11434 case OMPD_target_teams_distribute: 11435 case OMPD_target_teams_distribute_simd: 11436 case OMPD_target_teams_distribute_parallel_for: 11437 case OMPD_target_teams_distribute_parallel_for_simd: 11438 case OMPD_target_teams: 11439 case OMPD_target_parallel: 11440 case OMPD_target_parallel_for: 11441 case OMPD_target_parallel_for_simd: 11442 case OMPD_requires: 11443 case OMPD_metadirective: 11444 case OMPD_unknown: 11445 default: 11446 llvm_unreachable("Unexpected standalone target data directive."); 11447 break; 11448 } 11449 CGF.EmitRuntimeCall( 11450 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11451 OffloadingArgs); 11452 }; 11453 11454 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11455 &MapNamesArray](CodeGenFunction &CGF, 11456 PrePostActionTy &) { 11457 // Fill up the arrays with all the mapped variables. 11458 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11459 11460 // Get map clause information. 11461 MappableExprsHandler MEHandler(D, CGF); 11462 MEHandler.generateAllInfo(CombinedInfo); 11463 11464 TargetDataInfo Info; 11465 // Fill up the arrays and create the arguments. 11466 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11467 /*IsNonContiguous=*/true); 11468 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11469 D.hasClausesOfKind<OMPNowaitClause>(); 11470 emitOffloadingArraysArgument( 11471 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11472 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11473 {/*ForEndCall=*/false}); 11474 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11475 InputInfo.BasePointersArray = 11476 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11477 InputInfo.PointersArray = 11478 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11479 InputInfo.SizesArray = 11480 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 11481 InputInfo.MappersArray = 11482 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11483 MapTypesArray = Info.MapTypesArray; 11484 MapNamesArray = Info.MapNamesArray; 11485 if (RequiresOuterTask) 11486 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11487 else 11488 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11489 }; 11490 11491 if (IfCond) { 11492 emitIfClause(CGF, IfCond, TargetThenGen, 11493 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11494 } else { 11495 RegionCodeGenTy ThenRCG(TargetThenGen); 11496 ThenRCG(CGF); 11497 } 11498 } 11499 11500 namespace { 11501 /// Kind of parameter in a function with 'declare simd' directive. 11502 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11503 /// Attribute set of the parameter. 11504 struct ParamAttrTy { 11505 ParamKindTy Kind = Vector; 11506 llvm::APSInt StrideOrArg; 11507 llvm::APSInt Alignment; 11508 }; 11509 } // namespace 11510 11511 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11512 ArrayRef<ParamAttrTy> ParamAttrs) { 11513 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11514 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11515 // of that clause. The VLEN value must be power of 2. 11516 // In other case the notion of the function`s "characteristic data type" (CDT) 11517 // is used to compute the vector length. 11518 // CDT is defined in the following order: 11519 // a) For non-void function, the CDT is the return type. 11520 // b) If the function has any non-uniform, non-linear parameters, then the 11521 // CDT is the type of the first such parameter. 11522 // c) If the CDT determined by a) or b) above is struct, union, or class 11523 // type which is pass-by-value (except for the type that maps to the 11524 // built-in complex data type), the characteristic data type is int. 11525 // d) If none of the above three cases is applicable, the CDT is int. 11526 // The VLEN is then determined based on the CDT and the size of vector 11527 // register of that ISA for which current vector version is generated. The 11528 // VLEN is computed using the formula below: 11529 // VLEN = sizeof(vector_register) / sizeof(CDT), 11530 // where vector register size specified in section 3.2.1 Registers and the 11531 // Stack Frame of original AMD64 ABI document. 11532 QualType RetType = FD->getReturnType(); 11533 if (RetType.isNull()) 11534 return 0; 11535 ASTContext &C = FD->getASTContext(); 11536 QualType CDT; 11537 if (!RetType.isNull() && !RetType->isVoidType()) { 11538 CDT = RetType; 11539 } else { 11540 unsigned Offset = 0; 11541 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11542 if (ParamAttrs[Offset].Kind == Vector) 11543 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11544 ++Offset; 11545 } 11546 if (CDT.isNull()) { 11547 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11548 if (ParamAttrs[I + Offset].Kind == Vector) { 11549 CDT = FD->getParamDecl(I)->getType(); 11550 break; 11551 } 11552 } 11553 } 11554 } 11555 if (CDT.isNull()) 11556 CDT = C.IntTy; 11557 CDT = CDT->getCanonicalTypeUnqualified(); 11558 if (CDT->isRecordType() || CDT->isUnionType()) 11559 CDT = C.IntTy; 11560 return C.getTypeSize(CDT); 11561 } 11562 11563 static void 11564 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11565 const llvm::APSInt &VLENVal, 11566 ArrayRef<ParamAttrTy> ParamAttrs, 11567 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11568 struct ISADataTy { 11569 char ISA; 11570 unsigned VecRegSize; 11571 }; 11572 ISADataTy ISAData[] = { 11573 { 11574 'b', 128 11575 }, // SSE 11576 { 11577 'c', 256 11578 }, // AVX 11579 { 11580 'd', 256 11581 }, // AVX2 11582 { 11583 'e', 512 11584 }, // AVX512 11585 }; 11586 llvm::SmallVector<char, 2> Masked; 11587 switch (State) { 11588 case OMPDeclareSimdDeclAttr::BS_Undefined: 11589 Masked.push_back('N'); 11590 Masked.push_back('M'); 11591 break; 11592 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11593 Masked.push_back('N'); 11594 break; 11595 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11596 Masked.push_back('M'); 11597 break; 11598 } 11599 for (char Mask : Masked) { 11600 for (const ISADataTy &Data : ISAData) { 11601 SmallString<256> Buffer; 11602 llvm::raw_svector_ostream Out(Buffer); 11603 Out << "_ZGV" << Data.ISA << Mask; 11604 if (!VLENVal) { 11605 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11606 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11607 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11608 } else { 11609 Out << VLENVal; 11610 } 11611 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11612 switch (ParamAttr.Kind){ 11613 case LinearWithVarStride: 11614 Out << 's' << ParamAttr.StrideOrArg; 11615 break; 11616 case Linear: 11617 Out << 'l'; 11618 if (ParamAttr.StrideOrArg != 1) 11619 Out << ParamAttr.StrideOrArg; 11620 break; 11621 case Uniform: 11622 Out << 'u'; 11623 break; 11624 case Vector: 11625 Out << 'v'; 11626 break; 11627 } 11628 if (!!ParamAttr.Alignment) 11629 Out << 'a' << ParamAttr.Alignment; 11630 } 11631 Out << '_' << Fn->getName(); 11632 Fn->addFnAttr(Out.str()); 11633 } 11634 } 11635 } 11636 11637 // This are the Functions that are needed to mangle the name of the 11638 // vector functions generated by the compiler, according to the rules 11639 // defined in the "Vector Function ABI specifications for AArch64", 11640 // available at 11641 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11642 11643 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11644 /// 11645 /// TODO: Need to implement the behavior for reference marked with a 11646 /// var or no linear modifiers (1.b in the section). For this, we 11647 /// need to extend ParamKindTy to support the linear modifiers. 11648 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11649 QT = QT.getCanonicalType(); 11650 11651 if (QT->isVoidType()) 11652 return false; 11653 11654 if (Kind == ParamKindTy::Uniform) 11655 return false; 11656 11657 if (Kind == ParamKindTy::Linear) 11658 return false; 11659 11660 // TODO: Handle linear references with modifiers 11661 11662 if (Kind == ParamKindTy::LinearWithVarStride) 11663 return false; 11664 11665 return true; 11666 } 11667 11668 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11669 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11670 QT = QT.getCanonicalType(); 11671 unsigned Size = C.getTypeSize(QT); 11672 11673 // Only scalars and complex within 16 bytes wide set PVB to true. 11674 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11675 return false; 11676 11677 if (QT->isFloatingType()) 11678 return true; 11679 11680 if (QT->isIntegerType()) 11681 return true; 11682 11683 if (QT->isPointerType()) 11684 return true; 11685 11686 // TODO: Add support for complex types (section 3.1.2, item 2). 11687 11688 return false; 11689 } 11690 11691 /// Computes the lane size (LS) of a return type or of an input parameter, 11692 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11693 /// TODO: Add support for references, section 3.2.1, item 1. 11694 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11695 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11696 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11697 if (getAArch64PBV(PTy, C)) 11698 return C.getTypeSize(PTy); 11699 } 11700 if (getAArch64PBV(QT, C)) 11701 return C.getTypeSize(QT); 11702 11703 return C.getTypeSize(C.getUIntPtrType()); 11704 } 11705 11706 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11707 // signature of the scalar function, as defined in 3.2.2 of the 11708 // AAVFABI. 11709 static std::tuple<unsigned, unsigned, bool> 11710 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11711 QualType RetType = FD->getReturnType().getCanonicalType(); 11712 11713 ASTContext &C = FD->getASTContext(); 11714 11715 bool OutputBecomesInput = false; 11716 11717 llvm::SmallVector<unsigned, 8> Sizes; 11718 if (!RetType->isVoidType()) { 11719 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11720 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11721 OutputBecomesInput = true; 11722 } 11723 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11724 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11725 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11726 } 11727 11728 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11729 // The LS of a function parameter / return value can only be a power 11730 // of 2, starting from 8 bits, up to 128. 11731 assert(llvm::all_of(Sizes, 11732 [](unsigned Size) { 11733 return Size == 8 || Size == 16 || Size == 32 || 11734 Size == 64 || Size == 128; 11735 }) && 11736 "Invalid size"); 11737 11738 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11739 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11740 OutputBecomesInput); 11741 } 11742 11743 /// Mangle the parameter part of the vector function name according to 11744 /// their OpenMP classification. The mangling function is defined in 11745 /// section 3.5 of the AAVFABI. 11746 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11747 SmallString<256> Buffer; 11748 llvm::raw_svector_ostream Out(Buffer); 11749 for (const auto &ParamAttr : ParamAttrs) { 11750 switch (ParamAttr.Kind) { 11751 case LinearWithVarStride: 11752 Out << "ls" << ParamAttr.StrideOrArg; 11753 break; 11754 case Linear: 11755 Out << 'l'; 11756 // Don't print the step value if it is not present or if it is 11757 // equal to 1. 11758 if (ParamAttr.StrideOrArg != 1) 11759 Out << ParamAttr.StrideOrArg; 11760 break; 11761 case Uniform: 11762 Out << 'u'; 11763 break; 11764 case Vector: 11765 Out << 'v'; 11766 break; 11767 } 11768 11769 if (!!ParamAttr.Alignment) 11770 Out << 'a' << ParamAttr.Alignment; 11771 } 11772 11773 return std::string(Out.str()); 11774 } 11775 11776 // Function used to add the attribute. The parameter `VLEN` is 11777 // templated to allow the use of "x" when targeting scalable functions 11778 // for SVE. 11779 template <typename T> 11780 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11781 char ISA, StringRef ParSeq, 11782 StringRef MangledName, bool OutputBecomesInput, 11783 llvm::Function *Fn) { 11784 SmallString<256> Buffer; 11785 llvm::raw_svector_ostream Out(Buffer); 11786 Out << Prefix << ISA << LMask << VLEN; 11787 if (OutputBecomesInput) 11788 Out << "v"; 11789 Out << ParSeq << "_" << MangledName; 11790 Fn->addFnAttr(Out.str()); 11791 } 11792 11793 // Helper function to generate the Advanced SIMD names depending on 11794 // the value of the NDS when simdlen is not present. 11795 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11796 StringRef Prefix, char ISA, 11797 StringRef ParSeq, StringRef MangledName, 11798 bool OutputBecomesInput, 11799 llvm::Function *Fn) { 11800 switch (NDS) { 11801 case 8: 11802 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11803 OutputBecomesInput, Fn); 11804 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11805 OutputBecomesInput, Fn); 11806 break; 11807 case 16: 11808 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11809 OutputBecomesInput, Fn); 11810 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11811 OutputBecomesInput, Fn); 11812 break; 11813 case 32: 11814 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11815 OutputBecomesInput, Fn); 11816 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11817 OutputBecomesInput, Fn); 11818 break; 11819 case 64: 11820 case 128: 11821 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11822 OutputBecomesInput, Fn); 11823 break; 11824 default: 11825 llvm_unreachable("Scalar type is too wide."); 11826 } 11827 } 11828 11829 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11830 static void emitAArch64DeclareSimdFunction( 11831 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11832 ArrayRef<ParamAttrTy> ParamAttrs, 11833 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11834 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11835 11836 // Get basic data for building the vector signature. 11837 const auto Data = getNDSWDS(FD, ParamAttrs); 11838 const unsigned NDS = std::get<0>(Data); 11839 const unsigned WDS = std::get<1>(Data); 11840 const bool OutputBecomesInput = std::get<2>(Data); 11841 11842 // Check the values provided via `simdlen` by the user. 11843 // 1. A `simdlen(1)` doesn't produce vector signatures, 11844 if (UserVLEN == 1) { 11845 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11846 DiagnosticsEngine::Warning, 11847 "The clause simdlen(1) has no effect when targeting aarch64."); 11848 CGM.getDiags().Report(SLoc, DiagID); 11849 return; 11850 } 11851 11852 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11853 // Advanced SIMD output. 11854 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11855 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11856 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11857 "power of 2 when targeting Advanced SIMD."); 11858 CGM.getDiags().Report(SLoc, DiagID); 11859 return; 11860 } 11861 11862 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11863 // limits. 11864 if (ISA == 's' && UserVLEN != 0) { 11865 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11866 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11867 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11868 "lanes in the architectural constraints " 11869 "for SVE (min is 128-bit, max is " 11870 "2048-bit, by steps of 128-bit)"); 11871 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11872 return; 11873 } 11874 } 11875 11876 // Sort out parameter sequence. 11877 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11878 StringRef Prefix = "_ZGV"; 11879 // Generate simdlen from user input (if any). 11880 if (UserVLEN) { 11881 if (ISA == 's') { 11882 // SVE generates only a masked function. 11883 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11884 OutputBecomesInput, Fn); 11885 } else { 11886 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11887 // Advanced SIMD generates one or two functions, depending on 11888 // the `[not]inbranch` clause. 11889 switch (State) { 11890 case OMPDeclareSimdDeclAttr::BS_Undefined: 11891 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11892 OutputBecomesInput, Fn); 11893 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11894 OutputBecomesInput, Fn); 11895 break; 11896 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11897 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11898 OutputBecomesInput, Fn); 11899 break; 11900 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11901 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11902 OutputBecomesInput, Fn); 11903 break; 11904 } 11905 } 11906 } else { 11907 // If no user simdlen is provided, follow the AAVFABI rules for 11908 // generating the vector length. 11909 if (ISA == 's') { 11910 // SVE, section 3.4.1, item 1. 11911 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11912 OutputBecomesInput, Fn); 11913 } else { 11914 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11915 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11916 // two vector names depending on the use of the clause 11917 // `[not]inbranch`. 11918 switch (State) { 11919 case OMPDeclareSimdDeclAttr::BS_Undefined: 11920 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11921 OutputBecomesInput, Fn); 11922 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11923 OutputBecomesInput, Fn); 11924 break; 11925 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11926 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11927 OutputBecomesInput, Fn); 11928 break; 11929 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11930 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11931 OutputBecomesInput, Fn); 11932 break; 11933 } 11934 } 11935 } 11936 } 11937 11938 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11939 llvm::Function *Fn) { 11940 ASTContext &C = CGM.getContext(); 11941 FD = FD->getMostRecentDecl(); 11942 while (FD) { 11943 // Map params to their positions in function decl. 11944 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11945 if (isa<CXXMethodDecl>(FD)) 11946 ParamPositions.try_emplace(FD, 0); 11947 unsigned ParamPos = ParamPositions.size(); 11948 for (const ParmVarDecl *P : FD->parameters()) { 11949 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11950 ++ParamPos; 11951 } 11952 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11953 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11954 // Mark uniform parameters. 11955 for (const Expr *E : Attr->uniforms()) { 11956 E = E->IgnoreParenImpCasts(); 11957 unsigned Pos; 11958 if (isa<CXXThisExpr>(E)) { 11959 Pos = ParamPositions[FD]; 11960 } else { 11961 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11962 ->getCanonicalDecl(); 11963 auto It = ParamPositions.find(PVD); 11964 assert(It != ParamPositions.end() && "Function parameter not found"); 11965 Pos = It->second; 11966 } 11967 ParamAttrs[Pos].Kind = Uniform; 11968 } 11969 // Get alignment info. 11970 auto *NI = Attr->alignments_begin(); 11971 for (const Expr *E : Attr->aligneds()) { 11972 E = E->IgnoreParenImpCasts(); 11973 unsigned Pos; 11974 QualType ParmTy; 11975 if (isa<CXXThisExpr>(E)) { 11976 Pos = ParamPositions[FD]; 11977 ParmTy = E->getType(); 11978 } else { 11979 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11980 ->getCanonicalDecl(); 11981 auto It = ParamPositions.find(PVD); 11982 assert(It != ParamPositions.end() && "Function parameter not found"); 11983 Pos = It->second; 11984 ParmTy = PVD->getType(); 11985 } 11986 ParamAttrs[Pos].Alignment = 11987 (*NI) 11988 ? (*NI)->EvaluateKnownConstInt(C) 11989 : llvm::APSInt::getUnsigned( 11990 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11991 .getQuantity()); 11992 ++NI; 11993 } 11994 // Mark linear parameters. 11995 auto *SI = Attr->steps_begin(); 11996 for (const Expr *E : Attr->linears()) { 11997 E = E->IgnoreParenImpCasts(); 11998 unsigned Pos; 11999 // Rescaling factor needed to compute the linear parameter 12000 // value in the mangled name. 12001 unsigned PtrRescalingFactor = 1; 12002 if (isa<CXXThisExpr>(E)) { 12003 Pos = ParamPositions[FD]; 12004 } else { 12005 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12006 ->getCanonicalDecl(); 12007 auto It = ParamPositions.find(PVD); 12008 assert(It != ParamPositions.end() && "Function parameter not found"); 12009 Pos = It->second; 12010 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12011 PtrRescalingFactor = CGM.getContext() 12012 .getTypeSizeInChars(P->getPointeeType()) 12013 .getQuantity(); 12014 } 12015 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12016 ParamAttr.Kind = Linear; 12017 // Assuming a stride of 1, for `linear` without modifiers. 12018 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12019 if (*SI) { 12020 Expr::EvalResult Result; 12021 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12022 if (const auto *DRE = 12023 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12024 if (const auto *StridePVD = 12025 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 12026 ParamAttr.Kind = LinearWithVarStride; 12027 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 12028 assert(It != ParamPositions.end() && 12029 "Function parameter not found"); 12030 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 12031 } 12032 } 12033 } else { 12034 ParamAttr.StrideOrArg = Result.Val.getInt(); 12035 } 12036 } 12037 // If we are using a linear clause on a pointer, we need to 12038 // rescale the value of linear_step with the byte size of the 12039 // pointee type. 12040 if (Linear == ParamAttr.Kind) 12041 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12042 ++SI; 12043 } 12044 llvm::APSInt VLENVal; 12045 SourceLocation ExprLoc; 12046 const Expr *VLENExpr = Attr->getSimdlen(); 12047 if (VLENExpr) { 12048 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12049 ExprLoc = VLENExpr->getExprLoc(); 12050 } 12051 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12052 if (CGM.getTriple().isX86()) { 12053 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12054 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12055 unsigned VLEN = VLENVal.getExtValue(); 12056 StringRef MangledName = Fn->getName(); 12057 if (CGM.getTarget().hasFeature("sve")) 12058 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12059 MangledName, 's', 128, Fn, ExprLoc); 12060 if (CGM.getTarget().hasFeature("neon")) 12061 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12062 MangledName, 'n', 128, Fn, ExprLoc); 12063 } 12064 } 12065 FD = FD->getPreviousDecl(); 12066 } 12067 } 12068 12069 namespace { 12070 /// Cleanup action for doacross support. 12071 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12072 public: 12073 static const int DoacrossFinArgs = 2; 12074 12075 private: 12076 llvm::FunctionCallee RTLFn; 12077 llvm::Value *Args[DoacrossFinArgs]; 12078 12079 public: 12080 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12081 ArrayRef<llvm::Value *> CallArgs) 12082 : RTLFn(RTLFn) { 12083 assert(CallArgs.size() == DoacrossFinArgs); 12084 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12085 } 12086 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12087 if (!CGF.HaveInsertPoint()) 12088 return; 12089 CGF.EmitRuntimeCall(RTLFn, Args); 12090 } 12091 }; 12092 } // namespace 12093 12094 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12095 const OMPLoopDirective &D, 12096 ArrayRef<Expr *> NumIterations) { 12097 if (!CGF.HaveInsertPoint()) 12098 return; 12099 12100 ASTContext &C = CGM.getContext(); 12101 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12102 RecordDecl *RD; 12103 if (KmpDimTy.isNull()) { 12104 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12105 // kmp_int64 lo; // lower 12106 // kmp_int64 up; // upper 12107 // kmp_int64 st; // stride 12108 // }; 12109 RD = C.buildImplicitRecord("kmp_dim"); 12110 RD->startDefinition(); 12111 addFieldToRecordDecl(C, RD, Int64Ty); 12112 addFieldToRecordDecl(C, RD, Int64Ty); 12113 addFieldToRecordDecl(C, RD, Int64Ty); 12114 RD->completeDefinition(); 12115 KmpDimTy = C.getRecordType(RD); 12116 } else { 12117 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12118 } 12119 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12120 QualType ArrayTy = 12121 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12122 12123 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12124 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12125 enum { LowerFD = 0, UpperFD, StrideFD }; 12126 // Fill dims with data. 12127 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12128 LValue DimsLVal = CGF.MakeAddrLValue( 12129 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12130 // dims.upper = num_iterations; 12131 LValue UpperLVal = CGF.EmitLValueForField( 12132 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12133 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12134 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12135 Int64Ty, NumIterations[I]->getExprLoc()); 12136 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12137 // dims.stride = 1; 12138 LValue StrideLVal = CGF.EmitLValueForField( 12139 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12140 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12141 StrideLVal); 12142 } 12143 12144 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12145 // kmp_int32 num_dims, struct kmp_dim * dims); 12146 llvm::Value *Args[] = { 12147 emitUpdateLocation(CGF, D.getBeginLoc()), 12148 getThreadID(CGF, D.getBeginLoc()), 12149 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12150 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12151 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12152 CGM.VoidPtrTy)}; 12153 12154 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12155 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12156 CGF.EmitRuntimeCall(RTLFn, Args); 12157 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12158 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12159 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12160 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12161 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12162 llvm::makeArrayRef(FiniArgs)); 12163 } 12164 12165 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12166 const OMPDependClause *C) { 12167 QualType Int64Ty = 12168 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12169 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12170 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12171 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12172 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12173 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12174 const Expr *CounterVal = C->getLoopData(I); 12175 assert(CounterVal); 12176 llvm::Value *CntVal = CGF.EmitScalarConversion( 12177 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12178 CounterVal->getExprLoc()); 12179 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12180 /*Volatile=*/false, Int64Ty); 12181 } 12182 llvm::Value *Args[] = { 12183 emitUpdateLocation(CGF, C->getBeginLoc()), 12184 getThreadID(CGF, C->getBeginLoc()), 12185 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12186 llvm::FunctionCallee RTLFn; 12187 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12188 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12189 OMPRTL___kmpc_doacross_post); 12190 } else { 12191 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12192 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12193 OMPRTL___kmpc_doacross_wait); 12194 } 12195 CGF.EmitRuntimeCall(RTLFn, Args); 12196 } 12197 12198 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12199 llvm::FunctionCallee Callee, 12200 ArrayRef<llvm::Value *> Args) const { 12201 assert(Loc.isValid() && "Outlined function call location must be valid."); 12202 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12203 12204 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12205 if (Fn->doesNotThrow()) { 12206 CGF.EmitNounwindRuntimeCall(Fn, Args); 12207 return; 12208 } 12209 } 12210 CGF.EmitRuntimeCall(Callee, Args); 12211 } 12212 12213 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12214 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12215 ArrayRef<llvm::Value *> Args) const { 12216 emitCall(CGF, Loc, OutlinedFn, Args); 12217 } 12218 12219 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12220 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12221 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12222 HasEmittedDeclareTargetRegion = true; 12223 } 12224 12225 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12226 const VarDecl *NativeParam, 12227 const VarDecl *TargetParam) const { 12228 return CGF.GetAddrOfLocalVar(NativeParam); 12229 } 12230 12231 /// Return allocator value from expression, or return a null allocator (default 12232 /// when no allocator specified). 12233 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12234 const Expr *Allocator) { 12235 llvm::Value *AllocVal; 12236 if (Allocator) { 12237 AllocVal = CGF.EmitScalarExpr(Allocator); 12238 // According to the standard, the original allocator type is a enum 12239 // (integer). Convert to pointer type, if required. 12240 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12241 CGF.getContext().VoidPtrTy, 12242 Allocator->getExprLoc()); 12243 } else { 12244 // If no allocator specified, it defaults to the null allocator. 12245 AllocVal = llvm::Constant::getNullValue( 12246 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12247 } 12248 return AllocVal; 12249 } 12250 12251 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12252 const VarDecl *VD) { 12253 if (!VD) 12254 return Address::invalid(); 12255 Address UntiedAddr = Address::invalid(); 12256 Address UntiedRealAddr = Address::invalid(); 12257 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12258 if (It != FunctionToUntiedTaskStackMap.end()) { 12259 const UntiedLocalVarsAddressesMap &UntiedData = 12260 UntiedLocalVarsStack[It->second]; 12261 auto I = UntiedData.find(VD); 12262 if (I != UntiedData.end()) { 12263 UntiedAddr = I->second.first; 12264 UntiedRealAddr = I->second.second; 12265 } 12266 } 12267 const VarDecl *CVD = VD->getCanonicalDecl(); 12268 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12269 // Use the default allocation. 12270 if (!isAllocatableDecl(VD)) 12271 return UntiedAddr; 12272 llvm::Value *Size; 12273 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12274 if (CVD->getType()->isVariablyModifiedType()) { 12275 Size = CGF.getTypeSize(CVD->getType()); 12276 // Align the size: ((size + align - 1) / align) * align 12277 Size = CGF.Builder.CreateNUWAdd( 12278 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12279 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12280 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12281 } else { 12282 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12283 Size = CGM.getSize(Sz.alignTo(Align)); 12284 } 12285 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12286 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12287 const Expr *Allocator = AA->getAllocator(); 12288 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12289 llvm::Value *Alignment = 12290 AA->getAlignment() 12291 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12292 CGM.SizeTy, /*isSigned=*/false) 12293 : nullptr; 12294 SmallVector<llvm::Value *, 4> Args; 12295 Args.push_back(ThreadID); 12296 if (Alignment) 12297 Args.push_back(Alignment); 12298 Args.push_back(Size); 12299 Args.push_back(AllocVal); 12300 llvm::omp::RuntimeFunction FnID = 12301 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12302 llvm::Value *Addr = CGF.EmitRuntimeCall( 12303 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12304 getName({CVD->getName(), ".void.addr"})); 12305 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12306 CGM.getModule(), OMPRTL___kmpc_free); 12307 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12308 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12309 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12310 if (UntiedAddr.isValid()) 12311 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12312 12313 // Cleanup action for allocate support. 12314 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12315 llvm::FunctionCallee RTLFn; 12316 SourceLocation::UIntTy LocEncoding; 12317 Address Addr; 12318 const Expr *AllocExpr; 12319 12320 public: 12321 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12322 SourceLocation::UIntTy LocEncoding, Address Addr, 12323 const Expr *AllocExpr) 12324 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12325 AllocExpr(AllocExpr) {} 12326 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12327 if (!CGF.HaveInsertPoint()) 12328 return; 12329 llvm::Value *Args[3]; 12330 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12331 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12332 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12333 Addr.getPointer(), CGF.VoidPtrTy); 12334 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12335 Args[2] = AllocVal; 12336 CGF.EmitRuntimeCall(RTLFn, Args); 12337 } 12338 }; 12339 Address VDAddr = 12340 UntiedRealAddr.isValid() 12341 ? UntiedRealAddr 12342 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 12343 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12344 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12345 VDAddr, Allocator); 12346 if (UntiedRealAddr.isValid()) 12347 if (auto *Region = 12348 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12349 Region->emitUntiedSwitch(CGF); 12350 return VDAddr; 12351 } 12352 return UntiedAddr; 12353 } 12354 12355 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12356 const VarDecl *VD) const { 12357 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12358 if (It == FunctionToUntiedTaskStackMap.end()) 12359 return false; 12360 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12361 } 12362 12363 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12364 CodeGenModule &CGM, const OMPLoopDirective &S) 12365 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12366 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12367 if (!NeedToPush) 12368 return; 12369 NontemporalDeclsSet &DS = 12370 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12371 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12372 for (const Stmt *Ref : C->private_refs()) { 12373 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12374 const ValueDecl *VD; 12375 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12376 VD = DRE->getDecl(); 12377 } else { 12378 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12379 assert((ME->isImplicitCXXThis() || 12380 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12381 "Expected member of current class."); 12382 VD = ME->getMemberDecl(); 12383 } 12384 DS.insert(VD); 12385 } 12386 } 12387 } 12388 12389 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12390 if (!NeedToPush) 12391 return; 12392 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12393 } 12394 12395 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12396 CodeGenFunction &CGF, 12397 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12398 std::pair<Address, Address>> &LocalVars) 12399 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12400 if (!NeedToPush) 12401 return; 12402 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12403 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12404 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12405 } 12406 12407 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12408 if (!NeedToPush) 12409 return; 12410 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12411 } 12412 12413 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12414 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12415 12416 return llvm::any_of( 12417 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12418 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12419 } 12420 12421 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12422 const OMPExecutableDirective &S, 12423 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12424 const { 12425 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12426 // Vars in target/task regions must be excluded completely. 12427 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12428 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12429 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12430 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12431 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12432 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12433 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12434 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12435 } 12436 } 12437 // Exclude vars in private clauses. 12438 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12439 for (const Expr *Ref : C->varlists()) { 12440 if (!Ref->getType()->isScalarType()) 12441 continue; 12442 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12443 if (!DRE) 12444 continue; 12445 NeedToCheckForLPCs.insert(DRE->getDecl()); 12446 } 12447 } 12448 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12449 for (const Expr *Ref : C->varlists()) { 12450 if (!Ref->getType()->isScalarType()) 12451 continue; 12452 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12453 if (!DRE) 12454 continue; 12455 NeedToCheckForLPCs.insert(DRE->getDecl()); 12456 } 12457 } 12458 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12459 for (const Expr *Ref : C->varlists()) { 12460 if (!Ref->getType()->isScalarType()) 12461 continue; 12462 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12463 if (!DRE) 12464 continue; 12465 NeedToCheckForLPCs.insert(DRE->getDecl()); 12466 } 12467 } 12468 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12469 for (const Expr *Ref : C->varlists()) { 12470 if (!Ref->getType()->isScalarType()) 12471 continue; 12472 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12473 if (!DRE) 12474 continue; 12475 NeedToCheckForLPCs.insert(DRE->getDecl()); 12476 } 12477 } 12478 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12479 for (const Expr *Ref : C->varlists()) { 12480 if (!Ref->getType()->isScalarType()) 12481 continue; 12482 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12483 if (!DRE) 12484 continue; 12485 NeedToCheckForLPCs.insert(DRE->getDecl()); 12486 } 12487 } 12488 for (const Decl *VD : NeedToCheckForLPCs) { 12489 for (const LastprivateConditionalData &Data : 12490 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12491 if (Data.DeclToUniqueName.count(VD) > 0) { 12492 if (!Data.Disabled) 12493 NeedToAddForLPCsAsDisabled.insert(VD); 12494 break; 12495 } 12496 } 12497 } 12498 } 12499 12500 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12501 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12502 : CGM(CGF.CGM), 12503 Action((CGM.getLangOpts().OpenMP >= 50 && 12504 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12505 [](const OMPLastprivateClause *C) { 12506 return C->getKind() == 12507 OMPC_LASTPRIVATE_conditional; 12508 })) 12509 ? ActionToDo::PushAsLastprivateConditional 12510 : ActionToDo::DoNotPush) { 12511 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12512 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12513 return; 12514 assert(Action == ActionToDo::PushAsLastprivateConditional && 12515 "Expected a push action."); 12516 LastprivateConditionalData &Data = 12517 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12518 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12519 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12520 continue; 12521 12522 for (const Expr *Ref : C->varlists()) { 12523 Data.DeclToUniqueName.insert(std::make_pair( 12524 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12525 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12526 } 12527 } 12528 Data.IVLVal = IVLVal; 12529 Data.Fn = CGF.CurFn; 12530 } 12531 12532 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12533 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12534 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12535 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12536 if (CGM.getLangOpts().OpenMP < 50) 12537 return; 12538 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12539 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12540 if (!NeedToAddForLPCsAsDisabled.empty()) { 12541 Action = ActionToDo::DisableLastprivateConditional; 12542 LastprivateConditionalData &Data = 12543 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12544 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12545 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12546 Data.Fn = CGF.CurFn; 12547 Data.Disabled = true; 12548 } 12549 } 12550 12551 CGOpenMPRuntime::LastprivateConditionalRAII 12552 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12553 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12554 return LastprivateConditionalRAII(CGF, S); 12555 } 12556 12557 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12558 if (CGM.getLangOpts().OpenMP < 50) 12559 return; 12560 if (Action == ActionToDo::DisableLastprivateConditional) { 12561 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12562 "Expected list of disabled private vars."); 12563 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12564 } 12565 if (Action == ActionToDo::PushAsLastprivateConditional) { 12566 assert( 12567 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12568 "Expected list of lastprivate conditional vars."); 12569 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12570 } 12571 } 12572 12573 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12574 const VarDecl *VD) { 12575 ASTContext &C = CGM.getContext(); 12576 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12577 if (I == LastprivateConditionalToTypes.end()) 12578 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12579 QualType NewType; 12580 const FieldDecl *VDField; 12581 const FieldDecl *FiredField; 12582 LValue BaseLVal; 12583 auto VI = I->getSecond().find(VD); 12584 if (VI == I->getSecond().end()) { 12585 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12586 RD->startDefinition(); 12587 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12588 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12589 RD->completeDefinition(); 12590 NewType = C.getRecordType(RD); 12591 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12592 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12593 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12594 } else { 12595 NewType = std::get<0>(VI->getSecond()); 12596 VDField = std::get<1>(VI->getSecond()); 12597 FiredField = std::get<2>(VI->getSecond()); 12598 BaseLVal = std::get<3>(VI->getSecond()); 12599 } 12600 LValue FiredLVal = 12601 CGF.EmitLValueForField(BaseLVal, FiredField); 12602 CGF.EmitStoreOfScalar( 12603 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12604 FiredLVal); 12605 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12606 } 12607 12608 namespace { 12609 /// Checks if the lastprivate conditional variable is referenced in LHS. 12610 class LastprivateConditionalRefChecker final 12611 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12612 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12613 const Expr *FoundE = nullptr; 12614 const Decl *FoundD = nullptr; 12615 StringRef UniqueDeclName; 12616 LValue IVLVal; 12617 llvm::Function *FoundFn = nullptr; 12618 SourceLocation Loc; 12619 12620 public: 12621 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12622 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12623 llvm::reverse(LPM)) { 12624 auto It = D.DeclToUniqueName.find(E->getDecl()); 12625 if (It == D.DeclToUniqueName.end()) 12626 continue; 12627 if (D.Disabled) 12628 return false; 12629 FoundE = E; 12630 FoundD = E->getDecl()->getCanonicalDecl(); 12631 UniqueDeclName = It->second; 12632 IVLVal = D.IVLVal; 12633 FoundFn = D.Fn; 12634 break; 12635 } 12636 return FoundE == E; 12637 } 12638 bool VisitMemberExpr(const MemberExpr *E) { 12639 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12640 return false; 12641 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12642 llvm::reverse(LPM)) { 12643 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12644 if (It == D.DeclToUniqueName.end()) 12645 continue; 12646 if (D.Disabled) 12647 return false; 12648 FoundE = E; 12649 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12650 UniqueDeclName = It->second; 12651 IVLVal = D.IVLVal; 12652 FoundFn = D.Fn; 12653 break; 12654 } 12655 return FoundE == E; 12656 } 12657 bool VisitStmt(const Stmt *S) { 12658 for (const Stmt *Child : S->children()) { 12659 if (!Child) 12660 continue; 12661 if (const auto *E = dyn_cast<Expr>(Child)) 12662 if (!E->isGLValue()) 12663 continue; 12664 if (Visit(Child)) 12665 return true; 12666 } 12667 return false; 12668 } 12669 explicit LastprivateConditionalRefChecker( 12670 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12671 : LPM(LPM) {} 12672 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12673 getFoundData() const { 12674 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12675 } 12676 }; 12677 } // namespace 12678 12679 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12680 LValue IVLVal, 12681 StringRef UniqueDeclName, 12682 LValue LVal, 12683 SourceLocation Loc) { 12684 // Last updated loop counter for the lastprivate conditional var. 12685 // int<xx> last_iv = 0; 12686 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12687 llvm::Constant *LastIV = 12688 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12689 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12690 IVLVal.getAlignment().getAsAlign()); 12691 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12692 12693 // Last value of the lastprivate conditional. 12694 // decltype(priv_a) last_a; 12695 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12696 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12697 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12698 LValue LastLVal = CGF.MakeAddrLValue( 12699 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12700 12701 // Global loop counter. Required to handle inner parallel-for regions. 12702 // iv 12703 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12704 12705 // #pragma omp critical(a) 12706 // if (last_iv <= iv) { 12707 // last_iv = iv; 12708 // last_a = priv_a; 12709 // } 12710 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12711 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12712 Action.Enter(CGF); 12713 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12714 // (last_iv <= iv) ? Check if the variable is updated and store new 12715 // value in global var. 12716 llvm::Value *CmpRes; 12717 if (IVLVal.getType()->isSignedIntegerType()) { 12718 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12719 } else { 12720 assert(IVLVal.getType()->isUnsignedIntegerType() && 12721 "Loop iteration variable must be integer."); 12722 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12723 } 12724 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12725 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12726 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12727 // { 12728 CGF.EmitBlock(ThenBB); 12729 12730 // last_iv = iv; 12731 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12732 12733 // last_a = priv_a; 12734 switch (CGF.getEvaluationKind(LVal.getType())) { 12735 case TEK_Scalar: { 12736 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12737 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12738 break; 12739 } 12740 case TEK_Complex: { 12741 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12742 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12743 break; 12744 } 12745 case TEK_Aggregate: 12746 llvm_unreachable( 12747 "Aggregates are not supported in lastprivate conditional."); 12748 } 12749 // } 12750 CGF.EmitBranch(ExitBB); 12751 // There is no need to emit line number for unconditional branch. 12752 (void)ApplyDebugLocation::CreateEmpty(CGF); 12753 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12754 }; 12755 12756 if (CGM.getLangOpts().OpenMPSimd) { 12757 // Do not emit as a critical region as no parallel region could be emitted. 12758 RegionCodeGenTy ThenRCG(CodeGen); 12759 ThenRCG(CGF); 12760 } else { 12761 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12762 } 12763 } 12764 12765 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12766 const Expr *LHS) { 12767 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12768 return; 12769 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12770 if (!Checker.Visit(LHS)) 12771 return; 12772 const Expr *FoundE; 12773 const Decl *FoundD; 12774 StringRef UniqueDeclName; 12775 LValue IVLVal; 12776 llvm::Function *FoundFn; 12777 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12778 Checker.getFoundData(); 12779 if (FoundFn != CGF.CurFn) { 12780 // Special codegen for inner parallel regions. 12781 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12782 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12783 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12784 "Lastprivate conditional is not found in outer region."); 12785 QualType StructTy = std::get<0>(It->getSecond()); 12786 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12787 LValue PrivLVal = CGF.EmitLValue(FoundE); 12788 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12789 PrivLVal.getAddress(CGF), 12790 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12791 CGF.ConvertTypeForMem(StructTy)); 12792 LValue BaseLVal = 12793 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12794 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12795 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12796 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12797 FiredLVal, llvm::AtomicOrdering::Unordered, 12798 /*IsVolatile=*/true, /*isInit=*/false); 12799 return; 12800 } 12801 12802 // Private address of the lastprivate conditional in the current context. 12803 // priv_a 12804 LValue LVal = CGF.EmitLValue(FoundE); 12805 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12806 FoundE->getExprLoc()); 12807 } 12808 12809 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12810 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12811 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12812 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12813 return; 12814 auto Range = llvm::reverse(LastprivateConditionalStack); 12815 auto It = llvm::find_if( 12816 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12817 if (It == Range.end() || It->Fn != CGF.CurFn) 12818 return; 12819 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12820 assert(LPCI != LastprivateConditionalToTypes.end() && 12821 "Lastprivates must be registered already."); 12822 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12823 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12824 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12825 for (const auto &Pair : It->DeclToUniqueName) { 12826 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12827 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12828 continue; 12829 auto I = LPCI->getSecond().find(Pair.first); 12830 assert(I != LPCI->getSecond().end() && 12831 "Lastprivate must be rehistered already."); 12832 // bool Cmp = priv_a.Fired != 0; 12833 LValue BaseLVal = std::get<3>(I->getSecond()); 12834 LValue FiredLVal = 12835 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12836 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12837 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12838 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12839 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12840 // if (Cmp) { 12841 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12842 CGF.EmitBlock(ThenBB); 12843 Address Addr = CGF.GetAddrOfLocalVar(VD); 12844 LValue LVal; 12845 if (VD->getType()->isReferenceType()) 12846 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12847 AlignmentSource::Decl); 12848 else 12849 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12850 AlignmentSource::Decl); 12851 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12852 D.getBeginLoc()); 12853 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12854 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12855 // } 12856 } 12857 } 12858 12859 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12860 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12861 SourceLocation Loc) { 12862 if (CGF.getLangOpts().OpenMP < 50) 12863 return; 12864 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12865 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12866 "Unknown lastprivate conditional variable."); 12867 StringRef UniqueName = It->second; 12868 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12869 // The variable was not updated in the region - exit. 12870 if (!GV) 12871 return; 12872 LValue LPLVal = CGF.MakeAddrLValue( 12873 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12874 PrivLVal.getType().getNonReferenceType()); 12875 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12876 CGF.EmitStoreOfScalar(Res, PrivLVal); 12877 } 12878 12879 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12880 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12881 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12882 llvm_unreachable("Not supported in SIMD-only mode"); 12883 } 12884 12885 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12886 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12887 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12888 llvm_unreachable("Not supported in SIMD-only mode"); 12889 } 12890 12891 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12892 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12893 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12894 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12895 bool Tied, unsigned &NumberOfParts) { 12896 llvm_unreachable("Not supported in SIMD-only mode"); 12897 } 12898 12899 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12900 SourceLocation Loc, 12901 llvm::Function *OutlinedFn, 12902 ArrayRef<llvm::Value *> CapturedVars, 12903 const Expr *IfCond, 12904 llvm::Value *NumThreads) { 12905 llvm_unreachable("Not supported in SIMD-only mode"); 12906 } 12907 12908 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12909 CodeGenFunction &CGF, StringRef CriticalName, 12910 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12911 const Expr *Hint) { 12912 llvm_unreachable("Not supported in SIMD-only mode"); 12913 } 12914 12915 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12916 const RegionCodeGenTy &MasterOpGen, 12917 SourceLocation Loc) { 12918 llvm_unreachable("Not supported in SIMD-only mode"); 12919 } 12920 12921 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12922 const RegionCodeGenTy &MasterOpGen, 12923 SourceLocation Loc, 12924 const Expr *Filter) { 12925 llvm_unreachable("Not supported in SIMD-only mode"); 12926 } 12927 12928 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12929 SourceLocation Loc) { 12930 llvm_unreachable("Not supported in SIMD-only mode"); 12931 } 12932 12933 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12934 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12935 SourceLocation Loc) { 12936 llvm_unreachable("Not supported in SIMD-only mode"); 12937 } 12938 12939 void CGOpenMPSIMDRuntime::emitSingleRegion( 12940 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12941 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12942 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12943 ArrayRef<const Expr *> AssignmentOps) { 12944 llvm_unreachable("Not supported in SIMD-only mode"); 12945 } 12946 12947 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12948 const RegionCodeGenTy &OrderedOpGen, 12949 SourceLocation Loc, 12950 bool IsThreads) { 12951 llvm_unreachable("Not supported in SIMD-only mode"); 12952 } 12953 12954 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12955 SourceLocation Loc, 12956 OpenMPDirectiveKind Kind, 12957 bool EmitChecks, 12958 bool ForceSimpleCall) { 12959 llvm_unreachable("Not supported in SIMD-only mode"); 12960 } 12961 12962 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12963 CodeGenFunction &CGF, SourceLocation Loc, 12964 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12965 bool Ordered, const DispatchRTInput &DispatchValues) { 12966 llvm_unreachable("Not supported in SIMD-only mode"); 12967 } 12968 12969 void CGOpenMPSIMDRuntime::emitForStaticInit( 12970 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12971 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12972 llvm_unreachable("Not supported in SIMD-only mode"); 12973 } 12974 12975 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12976 CodeGenFunction &CGF, SourceLocation Loc, 12977 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12978 llvm_unreachable("Not supported in SIMD-only mode"); 12979 } 12980 12981 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12982 SourceLocation Loc, 12983 unsigned IVSize, 12984 bool IVSigned) { 12985 llvm_unreachable("Not supported in SIMD-only mode"); 12986 } 12987 12988 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12989 SourceLocation Loc, 12990 OpenMPDirectiveKind DKind) { 12991 llvm_unreachable("Not supported in SIMD-only mode"); 12992 } 12993 12994 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12995 SourceLocation Loc, 12996 unsigned IVSize, bool IVSigned, 12997 Address IL, Address LB, 12998 Address UB, Address ST) { 12999 llvm_unreachable("Not supported in SIMD-only mode"); 13000 } 13001 13002 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 13003 llvm::Value *NumThreads, 13004 SourceLocation Loc) { 13005 llvm_unreachable("Not supported in SIMD-only mode"); 13006 } 13007 13008 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13009 ProcBindKind ProcBind, 13010 SourceLocation Loc) { 13011 llvm_unreachable("Not supported in SIMD-only mode"); 13012 } 13013 13014 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13015 const VarDecl *VD, 13016 Address VDAddr, 13017 SourceLocation Loc) { 13018 llvm_unreachable("Not supported in SIMD-only mode"); 13019 } 13020 13021 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13022 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13023 CodeGenFunction *CGF) { 13024 llvm_unreachable("Not supported in SIMD-only mode"); 13025 } 13026 13027 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13028 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13029 llvm_unreachable("Not supported in SIMD-only mode"); 13030 } 13031 13032 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13033 ArrayRef<const Expr *> Vars, 13034 SourceLocation Loc, 13035 llvm::AtomicOrdering AO) { 13036 llvm_unreachable("Not supported in SIMD-only mode"); 13037 } 13038 13039 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13040 const OMPExecutableDirective &D, 13041 llvm::Function *TaskFunction, 13042 QualType SharedsTy, Address Shareds, 13043 const Expr *IfCond, 13044 const OMPTaskDataTy &Data) { 13045 llvm_unreachable("Not supported in SIMD-only mode"); 13046 } 13047 13048 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13049 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13050 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13051 const Expr *IfCond, const OMPTaskDataTy &Data) { 13052 llvm_unreachable("Not supported in SIMD-only mode"); 13053 } 13054 13055 void CGOpenMPSIMDRuntime::emitReduction( 13056 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13057 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13058 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13059 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13060 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13061 ReductionOps, Options); 13062 } 13063 13064 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13065 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13066 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13067 llvm_unreachable("Not supported in SIMD-only mode"); 13068 } 13069 13070 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13071 SourceLocation Loc, 13072 bool IsWorksharingReduction) { 13073 llvm_unreachable("Not supported in SIMD-only mode"); 13074 } 13075 13076 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13077 SourceLocation Loc, 13078 ReductionCodeGen &RCG, 13079 unsigned N) { 13080 llvm_unreachable("Not supported in SIMD-only mode"); 13081 } 13082 13083 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13084 SourceLocation Loc, 13085 llvm::Value *ReductionsPtr, 13086 LValue SharedLVal) { 13087 llvm_unreachable("Not supported in SIMD-only mode"); 13088 } 13089 13090 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13091 SourceLocation Loc, 13092 const OMPTaskDataTy &Data) { 13093 llvm_unreachable("Not supported in SIMD-only mode"); 13094 } 13095 13096 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13097 CodeGenFunction &CGF, SourceLocation Loc, 13098 OpenMPDirectiveKind CancelRegion) { 13099 llvm_unreachable("Not supported in SIMD-only mode"); 13100 } 13101 13102 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13103 SourceLocation Loc, const Expr *IfCond, 13104 OpenMPDirectiveKind CancelRegion) { 13105 llvm_unreachable("Not supported in SIMD-only mode"); 13106 } 13107 13108 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13109 const OMPExecutableDirective &D, StringRef ParentName, 13110 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13111 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13112 llvm_unreachable("Not supported in SIMD-only mode"); 13113 } 13114 13115 void CGOpenMPSIMDRuntime::emitTargetCall( 13116 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13117 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13118 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13119 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13120 const OMPLoopDirective &D)> 13121 SizeEmitter) { 13122 llvm_unreachable("Not supported in SIMD-only mode"); 13123 } 13124 13125 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13126 llvm_unreachable("Not supported in SIMD-only mode"); 13127 } 13128 13129 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13130 llvm_unreachable("Not supported in SIMD-only mode"); 13131 } 13132 13133 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13134 return false; 13135 } 13136 13137 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13138 const OMPExecutableDirective &D, 13139 SourceLocation Loc, 13140 llvm::Function *OutlinedFn, 13141 ArrayRef<llvm::Value *> CapturedVars) { 13142 llvm_unreachable("Not supported in SIMD-only mode"); 13143 } 13144 13145 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13146 const Expr *NumTeams, 13147 const Expr *ThreadLimit, 13148 SourceLocation Loc) { 13149 llvm_unreachable("Not supported in SIMD-only mode"); 13150 } 13151 13152 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13153 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13154 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13155 llvm_unreachable("Not supported in SIMD-only mode"); 13156 } 13157 13158 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13159 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13160 const Expr *Device) { 13161 llvm_unreachable("Not supported in SIMD-only mode"); 13162 } 13163 13164 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13165 const OMPLoopDirective &D, 13166 ArrayRef<Expr *> NumIterations) { 13167 llvm_unreachable("Not supported in SIMD-only mode"); 13168 } 13169 13170 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13171 const OMPDependClause *C) { 13172 llvm_unreachable("Not supported in SIMD-only mode"); 13173 } 13174 13175 const VarDecl * 13176 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13177 const VarDecl *NativeParam) const { 13178 llvm_unreachable("Not supported in SIMD-only mode"); 13179 } 13180 13181 Address 13182 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13183 const VarDecl *NativeParam, 13184 const VarDecl *TargetParam) const { 13185 llvm_unreachable("Not supported in SIMD-only mode"); 13186 } 13187