1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 46 using namespace clang; 47 using namespace CodeGen; 48 using namespace llvm::omp; 49 50 namespace { 51 /// Base class for handling code generation inside OpenMP regions. 52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 53 public: 54 /// Kinds of OpenMP regions used in codegen. 55 enum CGOpenMPRegionKind { 56 /// Region with outlined function for standalone 'parallel' 57 /// directive. 58 ParallelOutlinedRegion, 59 /// Region with outlined function for standalone 'task' directive. 60 TaskOutlinedRegion, 61 /// Region for constructs that do not require function outlining, 62 /// like 'for', 'sections', 'atomic' etc. directives. 63 InlinedRegion, 64 /// Region with outlined function for standalone 'target' directive. 65 TargetRegion, 66 }; 67 68 CGOpenMPRegionInfo(const CapturedStmt &CS, 69 const CGOpenMPRegionKind RegionKind, 70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 71 bool HasCancel) 72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 74 75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 77 bool HasCancel) 78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 79 Kind(Kind), HasCancel(HasCancel) {} 80 81 /// Get a variable or parameter for storing global thread id 82 /// inside OpenMP construct. 83 virtual const VarDecl *getThreadIDVariable() const = 0; 84 85 /// Emit the captured statement body. 86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 87 88 /// Get an LValue for the current ThreadID variable. 89 /// \return LValue for thread id variable. This LValue always has type int32*. 90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 91 92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 93 94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 95 96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 97 98 bool hasCancel() const { return HasCancel; } 99 100 static bool classof(const CGCapturedStmtInfo *Info) { 101 return Info->getKind() == CR_OpenMP; 102 } 103 104 ~CGOpenMPRegionInfo() override = default; 105 106 protected: 107 CGOpenMPRegionKind RegionKind; 108 RegionCodeGenTy CodeGen; 109 OpenMPDirectiveKind Kind; 110 bool HasCancel; 111 }; 112 113 /// API for captured statement code generation in OpenMP constructs. 114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 115 public: 116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 117 const RegionCodeGenTy &CodeGen, 118 OpenMPDirectiveKind Kind, bool HasCancel, 119 StringRef HelperName) 120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 121 HasCancel), 122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 124 } 125 126 /// Get a variable or parameter for storing global thread id 127 /// inside OpenMP construct. 128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 129 130 /// Get the name of the capture helper. 131 StringRef getHelperName() const override { return HelperName; } 132 133 static bool classof(const CGCapturedStmtInfo *Info) { 134 return CGOpenMPRegionInfo::classof(Info) && 135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 136 ParallelOutlinedRegion; 137 } 138 139 private: 140 /// A variable or parameter storing global thread id for OpenMP 141 /// constructs. 142 const VarDecl *ThreadIDVar; 143 StringRef HelperName; 144 }; 145 146 /// API for captured statement code generation in OpenMP constructs. 147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 148 public: 149 class UntiedTaskActionTy final : public PrePostActionTy { 150 bool Untied; 151 const VarDecl *PartIDVar; 152 const RegionCodeGenTy UntiedCodeGen; 153 llvm::SwitchInst *UntiedSwitch = nullptr; 154 155 public: 156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 157 const RegionCodeGenTy &UntiedCodeGen) 158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 159 void Enter(CodeGenFunction &CGF) override { 160 if (Untied) { 161 // Emit task switching point. 162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 llvm::Value *Res = 166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 169 CGF.EmitBlock(DoneBB); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 173 CGF.Builder.GetInsertBlock()); 174 emitUntiedSwitch(CGF); 175 } 176 } 177 void emitUntiedSwitch(CodeGenFunction &CGF) const { 178 if (Untied) { 179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 180 CGF.GetAddrOfLocalVar(PartIDVar), 181 PartIDVar->getType()->castAs<PointerType>()); 182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 183 PartIdLVal); 184 UntiedCodeGen(CGF); 185 CodeGenFunction::JumpDest CurPoint = 186 CGF.getJumpDestInCurrentScope(".untied.next."); 187 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 190 CGF.Builder.GetInsertBlock()); 191 CGF.EmitBranchThroughCleanup(CurPoint); 192 CGF.EmitBlock(CurPoint.getBlock()); 193 } 194 } 195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 196 }; 197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 198 const VarDecl *ThreadIDVar, 199 const RegionCodeGenTy &CodeGen, 200 OpenMPDirectiveKind Kind, bool HasCancel, 201 const UntiedTaskActionTy &Action) 202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 203 ThreadIDVar(ThreadIDVar), Action(Action) { 204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 205 } 206 207 /// Get a variable or parameter for storing global thread id 208 /// inside OpenMP construct. 209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 210 211 /// Get an LValue for the current ThreadID variable. 212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 213 214 /// Get the name of the capture helper. 215 StringRef getHelperName() const override { return ".omp_outlined."; } 216 217 void emitUntiedSwitch(CodeGenFunction &CGF) override { 218 Action.emitUntiedSwitch(CGF); 219 } 220 221 static bool classof(const CGCapturedStmtInfo *Info) { 222 return CGOpenMPRegionInfo::classof(Info) && 223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 224 TaskOutlinedRegion; 225 } 226 227 private: 228 /// A variable or parameter storing global thread id for OpenMP 229 /// constructs. 230 const VarDecl *ThreadIDVar; 231 /// Action for emitting code for untied tasks. 232 const UntiedTaskActionTy &Action; 233 }; 234 235 /// API for inlined captured statement code generation in OpenMP 236 /// constructs. 237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 238 public: 239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 240 const RegionCodeGenTy &CodeGen, 241 OpenMPDirectiveKind Kind, bool HasCancel) 242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 243 OldCSI(OldCSI), 244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 245 246 // Retrieve the value of the context parameter. 247 llvm::Value *getContextValue() const override { 248 if (OuterRegionInfo) 249 return OuterRegionInfo->getContextValue(); 250 llvm_unreachable("No context value for inlined OpenMP region"); 251 } 252 253 void setContextValue(llvm::Value *V) override { 254 if (OuterRegionInfo) { 255 OuterRegionInfo->setContextValue(V); 256 return; 257 } 258 llvm_unreachable("No context value for inlined OpenMP region"); 259 } 260 261 /// Lookup the captured field decl for a variable. 262 const FieldDecl *lookup(const VarDecl *VD) const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->lookup(VD); 265 // If there is no outer outlined region,no need to lookup in a list of 266 // captured variables, we can use the original one. 267 return nullptr; 268 } 269 270 FieldDecl *getThisFieldDecl() const override { 271 if (OuterRegionInfo) 272 return OuterRegionInfo->getThisFieldDecl(); 273 return nullptr; 274 } 275 276 /// Get a variable or parameter for storing global thread id 277 /// inside OpenMP construct. 278 const VarDecl *getThreadIDVariable() const override { 279 if (OuterRegionInfo) 280 return OuterRegionInfo->getThreadIDVariable(); 281 return nullptr; 282 } 283 284 /// Get an LValue for the current ThreadID variable. 285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 286 if (OuterRegionInfo) 287 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 288 llvm_unreachable("No LValue for inlined OpenMP construct"); 289 } 290 291 /// Get the name of the capture helper. 292 StringRef getHelperName() const override { 293 if (auto *OuterRegionInfo = getOldCSI()) 294 return OuterRegionInfo->getHelperName(); 295 llvm_unreachable("No helper name for inlined OpenMP construct"); 296 } 297 298 void emitUntiedSwitch(CodeGenFunction &CGF) override { 299 if (OuterRegionInfo) 300 OuterRegionInfo->emitUntiedSwitch(CGF); 301 } 302 303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 304 305 static bool classof(const CGCapturedStmtInfo *Info) { 306 return CGOpenMPRegionInfo::classof(Info) && 307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 308 } 309 310 ~CGOpenMPInlinedRegionInfo() override = default; 311 312 private: 313 /// CodeGen info about outer OpenMP region. 314 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 315 CGOpenMPRegionInfo *OuterRegionInfo; 316 }; 317 318 /// API for captured statement code generation in OpenMP target 319 /// constructs. For this captures, implicit parameters are used instead of the 320 /// captured fields. The name of the target region has to be unique in a given 321 /// application so it is provided by the client, because only the client has 322 /// the information to generate that. 323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 324 public: 325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 326 const RegionCodeGenTy &CodeGen, StringRef HelperName) 327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 328 /*HasCancel=*/false), 329 HelperName(HelperName) {} 330 331 /// This is unused for target regions because each starts executing 332 /// with a single thread. 333 const VarDecl *getThreadIDVariable() const override { return nullptr; } 334 335 /// Get the name of the capture helper. 336 StringRef getHelperName() const override { return HelperName; } 337 338 static bool classof(const CGCapturedStmtInfo *Info) { 339 return CGOpenMPRegionInfo::classof(Info) && 340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 341 } 342 343 private: 344 StringRef HelperName; 345 }; 346 347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 348 llvm_unreachable("No codegen for expressions"); 349 } 350 /// API for generation of expressions captured in a innermost OpenMP 351 /// region. 352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 353 public: 354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 356 OMPD_unknown, 357 /*HasCancel=*/false), 358 PrivScope(CGF) { 359 // Make sure the globals captured in the provided statement are local by 360 // using the privatization logic. We assume the same variable is not 361 // captured more than once. 362 for (const auto &C : CS.captures()) { 363 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 364 continue; 365 366 const VarDecl *VD = C.getCapturedVar(); 367 if (VD->isLocalVarDeclOrParm()) 368 continue; 369 370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 371 /*RefersToEnclosingVariableOrCapture=*/false, 372 VD->getType().getNonReferenceType(), VK_LValue, 373 C.getLocation()); 374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 if (DRD) 691 SrcAddr = 692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 693 694 llvm::Value *SrcBegin = nullptr; 695 if (DRD) 696 SrcBegin = SrcAddr.getPointer(); 697 llvm::Value *DestBegin = DestAddr.getPointer(); 698 // Cast from pointer to array type to pointer to single element. 699 llvm::Value *DestEnd = 700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, SrcAddr.getElementType(), 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, DestAddr.getElementType(), 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 746 "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 753 "omp.arraycpy.dest.element"); 754 // Check whether we've reached the end. 755 llvm::Value *Done = 756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 759 760 // Done. 761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 762 } 763 764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 765 return CGF.EmitOMPSharedLValue(E); 766 } 767 768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 769 const Expr *E) { 770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 772 return LValue(); 773 } 774 775 void ReductionCodeGen::emitAggregateInitialization( 776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 777 const OMPDeclareReductionDecl *DRD) { 778 // Emit VarDecl with copy init for arrays. 779 // Get the address of the original variable captured in current 780 // captured region. 781 const auto *PrivateVD = 782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 783 bool EmitDeclareReductionInit = 784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 786 EmitDeclareReductionInit, 787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 788 : PrivateVD->getInit(), 789 DRD, SharedAddr); 790 } 791 792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 793 ArrayRef<const Expr *> Origs, 794 ArrayRef<const Expr *> Privates, 795 ArrayRef<const Expr *> ReductionOps) { 796 ClausesData.reserve(Shareds.size()); 797 SharedAddresses.reserve(Shareds.size()); 798 Sizes.reserve(Shareds.size()); 799 BaseDecls.reserve(Shareds.size()); 800 const auto *IOrig = Origs.begin(); 801 const auto *IPriv = Privates.begin(); 802 const auto *IRed = ReductionOps.begin(); 803 for (const Expr *Ref : Shareds) { 804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 805 std::advance(IOrig, 1); 806 std::advance(IPriv, 1); 807 std::advance(IRed, 1); 808 } 809 } 810 811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 813 "Number of generated lvalues must be exactly N."); 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 816 SharedAddresses.emplace_back(First, Second); 817 if (ClausesData[N].Shared == ClausesData[N].Ref) { 818 OrigAddresses.emplace_back(First, Second); 819 } else { 820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 822 OrigAddresses.emplace_back(First, Second); 823 } 824 } 825 826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 827 QualType PrivateType = getPrivateType(N); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 839 if (AsArraySection) { 840 Size = CGF.Builder.CreatePtrDiff(ElemType, 841 OrigAddresses[N].second.getPointer(CGF), 842 OrigAddresses[N].first.getPointer(CGF)); 843 Size = CGF.Builder.CreateNUWAdd( 844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 846 } else { 847 SizeInChars = 848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 850 } 851 Sizes.emplace_back(SizeInChars, Size); 852 CodeGenFunction::OpaqueValueMapping OpaqueMap( 853 CGF, 854 cast<OpaqueValueExpr>( 855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 856 RValue::get(Size)); 857 CGF.EmitVariablyModifiedType(PrivateType); 858 } 859 860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 861 llvm::Value *Size) { 862 QualType PrivateType = getPrivateType(N); 863 if (!PrivateType->isVariablyModifiedType()) { 864 assert(!Size && !Sizes[N].second && 865 "Size should be nullptr for non-variably modified reduction " 866 "items."); 867 return; 868 } 869 CodeGenFunction::OpaqueValueMapping OpaqueMap( 870 CGF, 871 cast<OpaqueValueExpr>( 872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 873 RValue::get(Size)); 874 CGF.EmitVariablyModifiedType(PrivateType); 875 } 876 877 void ReductionCodeGen::emitInitialization( 878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 880 assert(SharedAddresses.size() > N && "No variable was generated"); 881 const auto *PrivateVD = 882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 883 const OMPDeclareReductionDecl *DRD = 884 getReductionInit(ClausesData[N].ReductionOp); 885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 886 if (DRD && DRD->getInitializer()) 887 (void)DefaultInit(CGF); 888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 890 (void)DefaultInit(CGF); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 893 PrivateAddr, SharedAddr, SharedType); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 QualType PrivateType = getPrivateType(N); 904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 905 return DTorKind != QualType::DK_none; 906 } 907 908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 909 Address PrivateAddr) { 910 QualType PrivateType = getPrivateType(N); 911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 912 if (needCleanups(N)) { 913 PrivateAddr = CGF.Builder.CreateElementBitCast( 914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 916 } 917 } 918 919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 920 LValue BaseLV) { 921 BaseTy = BaseTy.getNonReferenceType(); 922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 923 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 926 } else { 927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 929 } 930 BaseTy = BaseTy->getPointeeType(); 931 } 932 return CGF.MakeAddrLValue( 933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 934 CGF.ConvertTypeForMem(ElTy)), 935 BaseLV.getType(), BaseLV.getBaseInfo(), 936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 937 } 938 939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 Address OriginalBaseAddress, llvm::Value *Addr) { 941 Address Tmp = Address::invalid(); 942 Address TopTmp = Address::invalid(); 943 Address MostTopTmp = Address::invalid(); 944 BaseTy = BaseTy.getNonReferenceType(); 945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 946 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 947 Tmp = CGF.CreateMemTemp(BaseTy); 948 if (TopTmp.isValid()) 949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 950 else 951 MostTopTmp = Tmp; 952 TopTmp = Tmp; 953 BaseTy = BaseTy->getPointeeType(); 954 } 955 956 if (Tmp.isValid()) { 957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 958 Addr, Tmp.getElementType()); 959 CGF.Builder.CreateStore(Addr, Tmp); 960 return MostTopTmp; 961 } 962 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 964 Addr, OriginalBaseAddress.getType()); 965 return OriginalBaseAddress.withPointer(Addr); 966 } 967 968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 969 const VarDecl *OrigVD = nullptr; 970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 973 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 975 Base = TempASE->getBase()->IgnoreParenImpCasts(); 976 DE = cast<DeclRefExpr>(Base); 977 OrigVD = cast<VarDecl>(DE->getDecl()); 978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 981 Base = TempASE->getBase()->IgnoreParenImpCasts(); 982 DE = cast<DeclRefExpr>(Base); 983 OrigVD = cast<VarDecl>(DE->getDecl()); 984 } 985 return OrigVD; 986 } 987 988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 989 Address PrivateAddr) { 990 const DeclRefExpr *DE; 991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 992 BaseDecls.emplace_back(OrigVD); 993 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 994 LValue BaseLValue = 995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 996 OriginalBaseLValue); 997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1000 SharedAddr.getPointer()); 1001 llvm::Value *PrivatePointer = 1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1003 PrivateAddr.getPointer(), SharedAddr.getType()); 1004 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1005 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1006 return castToBase(CGF, OrigVD->getType(), 1007 SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue.getAddress(CGF), Ptr); 1009 } 1010 BaseDecls.emplace_back( 1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1012 return PrivateAddr; 1013 } 1014 1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1016 const OMPDeclareReductionDecl *DRD = 1017 getReductionInit(ClausesData[N].ReductionOp); 1018 return DRD && DRD->getInitializer(); 1019 } 1020 1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1022 return CGF.EmitLoadOfPointerLValue( 1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1024 getThreadIDVariable()->getType()->castAs<PointerType>()); 1025 } 1026 1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1028 if (!CGF.HaveInsertPoint()) 1029 return; 1030 // 1.2.2 OpenMP Language Terminology 1031 // Structured block - An executable statement with a single entry at the 1032 // top and a single exit at the bottom. 1033 // The point of exit cannot be a branch out of the structured block. 1034 // longjmp() and throw() must not violate the entry/exit criteria. 1035 CGF.EHStack.pushTerminate(); 1036 if (S) 1037 CGF.incrementProfileCounter(S); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1065 1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1067 OMPBuilder.initialize(); 1068 loadOffloadInfoMetadata(); 1069 } 1070 1071 void CGOpenMPRuntime::clear() { 1072 InternalVars.clear(); 1073 // Clean non-target variable declarations possibly used only in debug info. 1074 for (const auto &Data : EmittedNonTargetVariables) { 1075 if (!Data.getValue().pointsToAliveValue()) 1076 continue; 1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1078 if (!GV) 1079 continue; 1080 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1081 continue; 1082 GV->eraseFromParent(); 1083 } 1084 } 1085 1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1087 SmallString<128> Buffer; 1088 llvm::raw_svector_ostream OS(Buffer); 1089 StringRef Sep = FirstSeparator; 1090 for (StringRef Part : Parts) { 1091 OS << Sep << Part; 1092 Sep = Separator; 1093 } 1094 return std::string(OS.str()); 1095 } 1096 1097 static llvm::Function * 1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1099 const Expr *CombinerInitializer, const VarDecl *In, 1100 const VarDecl *Out, bool IsCombiner) { 1101 // void .omp_combiner.(Ty *in, Ty *out); 1102 ASTContext &C = CGM.getContext(); 1103 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1104 FunctionArgList Args; 1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 Args.push_back(&OmpOutParm); 1110 Args.push_back(&OmpInParm); 1111 const CGFunctionInfo &FnInfo = 1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1114 std::string Name = CGM.getOpenMPRuntime().getName( 1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1117 Name, &CGM.getModule()); 1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1119 if (CGM.getLangOpts().Optimize) { 1120 Fn->removeFnAttr(llvm::Attribute::NoInline); 1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1123 } 1124 CodeGenFunction CGF(CGM); 1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1128 Out->getLocation()); 1129 CodeGenFunction::OMPPrivateScope Scope(CGF); 1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1131 Scope.addPrivate( 1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1133 .getAddress(CGF)); 1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1135 Scope.addPrivate( 1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1137 .getAddress(CGF)); 1138 (void)Scope.Privatize(); 1139 if (!IsCombiner && Out->hasInit() && 1140 !CGF.isTrivialInitializer(Out->getInit())) { 1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1142 Out->getType().getQualifiers(), 1143 /*IsInitializer=*/true); 1144 } 1145 if (CombinerInitializer) 1146 CGF.EmitIgnoredExpr(CombinerInitializer); 1147 Scope.ForceCleanup(); 1148 CGF.FinishFunction(); 1149 return Fn; 1150 } 1151 1152 void CGOpenMPRuntime::emitUserDefinedReduction( 1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1154 if (UDRMap.count(D) > 0) 1155 return; 1156 llvm::Function *Combiner = emitCombinerOrInitializer( 1157 CGM, D->getType(), D->getCombiner(), 1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1160 /*IsCombiner=*/true); 1161 llvm::Function *Initializer = nullptr; 1162 if (const Expr *Init = D->getInitializer()) { 1163 Initializer = emitCombinerOrInitializer( 1164 CGM, D->getType(), 1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1166 : nullptr, 1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1169 /*IsCombiner=*/false); 1170 } 1171 UDRMap.try_emplace(D, Combiner, Initializer); 1172 if (CGF) { 1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1174 Decls.second.push_back(D); 1175 } 1176 } 1177 1178 std::pair<llvm::Function *, llvm::Function *> 1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1180 auto I = UDRMap.find(D); 1181 if (I != UDRMap.end()) 1182 return I->second; 1183 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1184 return UDRMap.lookup(D); 1185 } 1186 1187 namespace { 1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1189 // Builder if one is present. 1190 struct PushAndPopStackRAII { 1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1192 bool HasCancel, llvm::omp::Directive Kind) 1193 : OMPBuilder(OMPBuilder) { 1194 if (!OMPBuilder) 1195 return; 1196 1197 // The following callback is the crucial part of clangs cleanup process. 1198 // 1199 // NOTE: 1200 // Once the OpenMPIRBuilder is used to create parallel regions (and 1201 // similar), the cancellation destination (Dest below) is determined via 1202 // IP. That means if we have variables to finalize we split the block at IP, 1203 // use the new block (=BB) as destination to build a JumpDest (via 1204 // getJumpDestInCurrentScope(BB)) which then is fed to 1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1206 // to push & pop an FinalizationInfo object. 1207 // The FiniCB will still be needed but at the point where the 1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1210 assert(IP.getBlock()->end() == IP.getPoint() && 1211 "Clang CG should cause non-terminated block!"); 1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1213 CGF.Builder.restoreIP(IP); 1214 CodeGenFunction::JumpDest Dest = 1215 CGF.getOMPCancelDestination(OMPD_parallel); 1216 CGF.EmitBranchThroughCleanup(Dest); 1217 }; 1218 1219 // TODO: Remove this once we emit parallel regions through the 1220 // OpenMPIRBuilder as it can do this setup internally. 1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1222 OMPBuilder->pushFinalizationCB(std::move(FI)); 1223 } 1224 ~PushAndPopStackRAII() { 1225 if (OMPBuilder) 1226 OMPBuilder->popFinalizationCB(); 1227 } 1228 llvm::OpenMPIRBuilder *OMPBuilder; 1229 }; 1230 } // namespace 1231 1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1236 assert(ThreadIDVar->getType()->isPointerType() && 1237 "thread id variable must be of type kmp_int32 *"); 1238 CodeGenFunction CGF(CGM, true); 1239 bool HasCancel = false; 1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1241 HasCancel = OPD->hasCancel(); 1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1243 HasCancel = OPD->hasCancel(); 1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1245 HasCancel = OPSD->hasCancel(); 1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1247 HasCancel = OPFD->hasCancel(); 1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1249 HasCancel = OPFD->hasCancel(); 1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1251 HasCancel = OPFD->hasCancel(); 1252 else if (const auto *OPFD = 1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = 1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1257 HasCancel = OPFD->hasCancel(); 1258 1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1260 // parallel region to make cancellation barriers work properly. 1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1264 HasCancel, OutlinedHelperName); 1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1267 } 1268 1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1273 return emitParallelOrTeamsOutlinedFunction( 1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1275 } 1276 1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1281 return emitParallelOrTeamsOutlinedFunction( 1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1283 } 1284 1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1289 bool Tied, unsigned &NumberOfParts) { 1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1291 PrePostActionTy &) { 1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1294 llvm::Value *TaskArgs[] = { 1295 UpLoc, ThreadID, 1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1297 TaskTVar->getType()->castAs<PointerType>()) 1298 .getPointer(CGF)}; 1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1300 CGM.getModule(), OMPRTL___kmpc_omp_task), 1301 TaskArgs); 1302 }; 1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1304 UntiedCodeGen); 1305 CodeGen.setAction(Action); 1306 assert(!ThreadIDVar->getType()->isPointerType() && 1307 "thread id variable must be of type kmp_int32 for tasks"); 1308 const OpenMPDirectiveKind Region = 1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1310 : OMPD_task; 1311 const CapturedStmt *CS = D.getCapturedStmt(Region); 1312 bool HasCancel = false; 1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1318 HasCancel = TD->hasCancel(); 1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1320 HasCancel = TD->hasCancel(); 1321 1322 CodeGenFunction CGF(CGM, true); 1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1324 InnermostKind, HasCancel, Action); 1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1327 if (!Tied) 1328 NumberOfParts = Action.getNumberOfParts(); 1329 return Res; 1330 } 1331 1332 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1333 const RecordDecl *RD, const CGRecordLayout &RL, 1334 ArrayRef<llvm::Constant *> Data) { 1335 llvm::StructType *StructTy = RL.getLLVMType(); 1336 unsigned PrevIdx = 0; 1337 ConstantInitBuilder CIBuilder(CGM); 1338 const auto *DI = Data.begin(); 1339 for (const FieldDecl *FD : RD->fields()) { 1340 unsigned Idx = RL.getLLVMFieldNo(FD); 1341 // Fill the alignment. 1342 for (unsigned I = PrevIdx; I < Idx; ++I) 1343 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1344 PrevIdx = Idx + 1; 1345 Fields.add(*DI); 1346 ++DI; 1347 } 1348 } 1349 1350 template <class... As> 1351 static llvm::GlobalVariable * 1352 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1353 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1354 As &&... Args) { 1355 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1356 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1357 ConstantInitBuilder CIBuilder(CGM); 1358 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1359 buildStructValue(Fields, CGM, RD, RL, Data); 1360 return Fields.finishAndCreateGlobal( 1361 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1362 std::forward<As>(Args)...); 1363 } 1364 1365 template <typename T> 1366 static void 1367 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1368 ArrayRef<llvm::Constant *> Data, 1369 T &Parent) { 1370 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1371 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1372 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1373 buildStructValue(Fields, CGM, RD, RL, Data); 1374 Fields.finishAndAddTo(Parent); 1375 } 1376 1377 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1378 bool AtCurrentPoint) { 1379 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1380 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1381 1382 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1383 if (AtCurrentPoint) { 1384 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1385 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1386 } else { 1387 Elem.second.ServiceInsertPt = 1388 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1389 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1390 } 1391 } 1392 1393 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1394 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1395 if (Elem.second.ServiceInsertPt) { 1396 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1397 Elem.second.ServiceInsertPt = nullptr; 1398 Ptr->eraseFromParent(); 1399 } 1400 } 1401 1402 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1403 SourceLocation Loc, 1404 SmallString<128> &Buffer) { 1405 llvm::raw_svector_ostream OS(Buffer); 1406 // Build debug location 1407 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1408 OS << ";" << PLoc.getFilename() << ";"; 1409 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1410 OS << FD->getQualifiedNameAsString(); 1411 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1412 return OS.str(); 1413 } 1414 1415 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1416 SourceLocation Loc, 1417 unsigned Flags) { 1418 uint32_t SrcLocStrSize; 1419 llvm::Constant *SrcLocStr; 1420 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1421 Loc.isInvalid()) { 1422 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1423 } else { 1424 std::string FunctionName; 1425 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1426 FunctionName = FD->getQualifiedNameAsString(); 1427 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1428 const char *FileName = PLoc.getFilename(); 1429 unsigned Line = PLoc.getLine(); 1430 unsigned Column = PLoc.getColumn(); 1431 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1432 Column, SrcLocStrSize); 1433 } 1434 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1435 return OMPBuilder.getOrCreateIdent( 1436 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1437 } 1438 1439 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1440 SourceLocation Loc) { 1441 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1442 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1443 // the clang invariants used below might be broken. 1444 if (CGM.getLangOpts().OpenMPIRBuilder) { 1445 SmallString<128> Buffer; 1446 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1447 uint32_t SrcLocStrSize; 1448 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1449 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1450 return OMPBuilder.getOrCreateThreadID( 1451 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1452 } 1453 1454 llvm::Value *ThreadID = nullptr; 1455 // Check whether we've already cached a load of the thread id in this 1456 // function. 1457 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1458 if (I != OpenMPLocThreadIDMap.end()) { 1459 ThreadID = I->second.ThreadID; 1460 if (ThreadID != nullptr) 1461 return ThreadID; 1462 } 1463 // If exceptions are enabled, do not use parameter to avoid possible crash. 1464 if (auto *OMPRegionInfo = 1465 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1466 if (OMPRegionInfo->getThreadIDVariable()) { 1467 // Check if this an outlined function with thread id passed as argument. 1468 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1469 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1470 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1471 !CGF.getLangOpts().CXXExceptions || 1472 CGF.Builder.GetInsertBlock() == TopBlock || 1473 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1474 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1475 TopBlock || 1476 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1477 CGF.Builder.GetInsertBlock()) { 1478 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1479 // If value loaded in entry block, cache it and use it everywhere in 1480 // function. 1481 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1482 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1483 Elem.second.ThreadID = ThreadID; 1484 } 1485 return ThreadID; 1486 } 1487 } 1488 } 1489 1490 // This is not an outlined function region - need to call __kmpc_int32 1491 // kmpc_global_thread_num(ident_t *loc). 1492 // Generate thread id value and cache this value for use across the 1493 // function. 1494 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1495 if (!Elem.second.ServiceInsertPt) 1496 setLocThreadIdInsertPt(CGF); 1497 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1498 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1499 llvm::CallInst *Call = CGF.Builder.CreateCall( 1500 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1501 OMPRTL___kmpc_global_thread_num), 1502 emitUpdateLocation(CGF, Loc)); 1503 Call->setCallingConv(CGF.getRuntimeCC()); 1504 Elem.second.ThreadID = Call; 1505 return Call; 1506 } 1507 1508 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1509 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1510 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1511 clearLocThreadIdInsertPt(CGF); 1512 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1513 } 1514 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1515 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1516 UDRMap.erase(D); 1517 FunctionUDRMap.erase(CGF.CurFn); 1518 } 1519 auto I = FunctionUDMMap.find(CGF.CurFn); 1520 if (I != FunctionUDMMap.end()) { 1521 for(const auto *D : I->second) 1522 UDMMap.erase(D); 1523 FunctionUDMMap.erase(I); 1524 } 1525 LastprivateConditionalToTypes.erase(CGF.CurFn); 1526 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1527 } 1528 1529 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1530 return OMPBuilder.IdentPtr; 1531 } 1532 1533 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1534 if (!Kmpc_MicroTy) { 1535 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1536 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1537 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1538 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1539 } 1540 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1541 } 1542 1543 llvm::FunctionCallee 1544 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1545 bool IsGPUDistribute) { 1546 assert((IVSize == 32 || IVSize == 64) && 1547 "IV size is not compatible with the omp runtime"); 1548 StringRef Name; 1549 if (IsGPUDistribute) 1550 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1551 : "__kmpc_distribute_static_init_4u") 1552 : (IVSigned ? "__kmpc_distribute_static_init_8" 1553 : "__kmpc_distribute_static_init_8u"); 1554 else 1555 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1556 : "__kmpc_for_static_init_4u") 1557 : (IVSigned ? "__kmpc_for_static_init_8" 1558 : "__kmpc_for_static_init_8u"); 1559 1560 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1561 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1562 llvm::Type *TypeParams[] = { 1563 getIdentTyPointerTy(), // loc 1564 CGM.Int32Ty, // tid 1565 CGM.Int32Ty, // schedtype 1566 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1567 PtrTy, // p_lower 1568 PtrTy, // p_upper 1569 PtrTy, // p_stride 1570 ITy, // incr 1571 ITy // chunk 1572 }; 1573 auto *FnTy = 1574 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1575 return CGM.CreateRuntimeFunction(FnTy, Name); 1576 } 1577 1578 llvm::FunctionCallee 1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1580 assert((IVSize == 32 || IVSize == 64) && 1581 "IV size is not compatible with the omp runtime"); 1582 StringRef Name = 1583 IVSize == 32 1584 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1585 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1586 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1587 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1588 CGM.Int32Ty, // tid 1589 CGM.Int32Ty, // schedtype 1590 ITy, // lower 1591 ITy, // upper 1592 ITy, // stride 1593 ITy // chunk 1594 }; 1595 auto *FnTy = 1596 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1597 return CGM.CreateRuntimeFunction(FnTy, Name); 1598 } 1599 1600 llvm::FunctionCallee 1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1602 assert((IVSize == 32 || IVSize == 64) && 1603 "IV size is not compatible with the omp runtime"); 1604 StringRef Name = 1605 IVSize == 32 1606 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1607 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1608 llvm::Type *TypeParams[] = { 1609 getIdentTyPointerTy(), // loc 1610 CGM.Int32Ty, // tid 1611 }; 1612 auto *FnTy = 1613 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1614 return CGM.CreateRuntimeFunction(FnTy, Name); 1615 } 1616 1617 llvm::FunctionCallee 1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1619 assert((IVSize == 32 || IVSize == 64) && 1620 "IV size is not compatible with the omp runtime"); 1621 StringRef Name = 1622 IVSize == 32 1623 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1624 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1625 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1626 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1627 llvm::Type *TypeParams[] = { 1628 getIdentTyPointerTy(), // loc 1629 CGM.Int32Ty, // tid 1630 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1631 PtrTy, // p_lower 1632 PtrTy, // p_upper 1633 PtrTy // p_stride 1634 }; 1635 auto *FnTy = 1636 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1637 return CGM.CreateRuntimeFunction(FnTy, Name); 1638 } 1639 1640 /// Obtain information that uniquely identifies a target entry. This 1641 /// consists of the file and device IDs as well as line number associated with 1642 /// the relevant entry source location. 1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1644 unsigned &DeviceID, unsigned &FileID, 1645 unsigned &LineNum) { 1646 SourceManager &SM = C.getSourceManager(); 1647 1648 // The loc should be always valid and have a file ID (the user cannot use 1649 // #pragma directives in macros) 1650 1651 assert(Loc.isValid() && "Source location is expected to be always valid."); 1652 1653 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1654 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1655 1656 llvm::sys::fs::UniqueID ID; 1657 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1658 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1659 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1660 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1661 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1662 << PLoc.getFilename() << EC.message(); 1663 } 1664 1665 DeviceID = ID.getDevice(); 1666 FileID = ID.getFile(); 1667 LineNum = PLoc.getLine(); 1668 } 1669 1670 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1671 if (CGM.getLangOpts().OpenMPSimd) 1672 return Address::invalid(); 1673 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1674 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1675 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1676 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1677 HasRequiresUnifiedSharedMemory))) { 1678 SmallString<64> PtrName; 1679 { 1680 llvm::raw_svector_ostream OS(PtrName); 1681 OS << CGM.getMangledName(GlobalDecl(VD)); 1682 if (!VD->isExternallyVisible()) { 1683 unsigned DeviceID, FileID, Line; 1684 getTargetEntryUniqueInfo(CGM.getContext(), 1685 VD->getCanonicalDecl()->getBeginLoc(), 1686 DeviceID, FileID, Line); 1687 OS << llvm::format("_%x", FileID); 1688 } 1689 OS << "_decl_tgt_ref_ptr"; 1690 } 1691 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1692 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1693 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); 1694 if (!Ptr) { 1695 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName); 1696 1697 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1698 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1699 1700 if (!CGM.getLangOpts().OpenMPIsDevice) 1701 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1702 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1703 } 1704 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1705 } 1706 return Address::invalid(); 1707 } 1708 1709 llvm::Constant * 1710 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1711 assert(!CGM.getLangOpts().OpenMPUseTLS || 1712 !CGM.getContext().getTargetInfo().isTLSSupported()); 1713 // Lookup the entry, lazily creating it if necessary. 1714 std::string Suffix = getName({"cache", ""}); 1715 return getOrCreateInternalVariable( 1716 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1717 } 1718 1719 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1720 const VarDecl *VD, 1721 Address VDAddr, 1722 SourceLocation Loc) { 1723 if (CGM.getLangOpts().OpenMPUseTLS && 1724 CGM.getContext().getTargetInfo().isTLSSupported()) 1725 return VDAddr; 1726 1727 llvm::Type *VarTy = VDAddr.getElementType(); 1728 llvm::Value *Args[] = { 1729 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1730 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1731 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1732 getOrCreateThreadPrivateCache(VD)}; 1733 return Address( 1734 CGF.EmitRuntimeCall( 1735 OMPBuilder.getOrCreateRuntimeFunction( 1736 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1737 Args), 1738 CGF.Int8Ty, VDAddr.getAlignment()); 1739 } 1740 1741 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1742 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1743 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1744 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1745 // library. 1746 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1747 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1748 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1749 OMPLoc); 1750 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1751 // to register constructor/destructor for variable. 1752 llvm::Value *Args[] = { 1753 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1754 Ctor, CopyCtor, Dtor}; 1755 CGF.EmitRuntimeCall( 1756 OMPBuilder.getOrCreateRuntimeFunction( 1757 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1758 Args); 1759 } 1760 1761 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1762 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1763 bool PerformInit, CodeGenFunction *CGF) { 1764 if (CGM.getLangOpts().OpenMPUseTLS && 1765 CGM.getContext().getTargetInfo().isTLSSupported()) 1766 return nullptr; 1767 1768 VD = VD->getDefinition(CGM.getContext()); 1769 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1770 QualType ASTTy = VD->getType(); 1771 1772 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1773 const Expr *Init = VD->getAnyInitializer(); 1774 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1775 // Generate function that re-emits the declaration's initializer into the 1776 // threadprivate copy of the variable VD 1777 CodeGenFunction CtorCGF(CGM); 1778 FunctionArgList Args; 1779 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1780 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1781 ImplicitParamDecl::Other); 1782 Args.push_back(&Dst); 1783 1784 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1785 CGM.getContext().VoidPtrTy, Args); 1786 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1787 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1788 llvm::Function *Fn = 1789 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1790 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1791 Args, Loc, Loc); 1792 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1793 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1794 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1795 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); 1796 Arg = CtorCGF.Builder.CreateElementBitCast( 1797 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1798 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1799 /*IsInitializer=*/true); 1800 ArgVal = CtorCGF.EmitLoadOfScalar( 1801 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1802 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1803 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1804 CtorCGF.FinishFunction(); 1805 Ctor = Fn; 1806 } 1807 if (VD->getType().isDestructedType() != QualType::DK_none) { 1808 // Generate function that emits destructor call for the threadprivate copy 1809 // of the variable VD 1810 CodeGenFunction DtorCGF(CGM); 1811 FunctionArgList Args; 1812 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1813 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1814 ImplicitParamDecl::Other); 1815 Args.push_back(&Dst); 1816 1817 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1818 CGM.getContext().VoidTy, Args); 1819 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1820 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1821 llvm::Function *Fn = 1822 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1823 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1824 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1825 Loc, Loc); 1826 // Create a scope with an artificial location for the body of this function. 1827 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1828 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1829 DtorCGF.GetAddrOfLocalVar(&Dst), 1830 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1831 DtorCGF.emitDestroy( 1832 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1833 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1834 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1835 DtorCGF.FinishFunction(); 1836 Dtor = Fn; 1837 } 1838 // Do not emit init function if it is not required. 1839 if (!Ctor && !Dtor) 1840 return nullptr; 1841 1842 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1843 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1844 /*isVarArg=*/false) 1845 ->getPointerTo(); 1846 // Copying constructor for the threadprivate variable. 1847 // Must be NULL - reserved by runtime, but currently it requires that this 1848 // parameter is always NULL. Otherwise it fires assertion. 1849 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1850 if (Ctor == nullptr) { 1851 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1852 /*isVarArg=*/false) 1853 ->getPointerTo(); 1854 Ctor = llvm::Constant::getNullValue(CtorTy); 1855 } 1856 if (Dtor == nullptr) { 1857 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1858 /*isVarArg=*/false) 1859 ->getPointerTo(); 1860 Dtor = llvm::Constant::getNullValue(DtorTy); 1861 } 1862 if (!CGF) { 1863 auto *InitFunctionTy = 1864 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1865 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1866 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1867 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1868 CodeGenFunction InitCGF(CGM); 1869 FunctionArgList ArgList; 1870 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1871 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1872 Loc, Loc); 1873 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1874 InitCGF.FinishFunction(); 1875 return InitFunction; 1876 } 1877 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1878 } 1879 return nullptr; 1880 } 1881 1882 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1883 llvm::GlobalVariable *Addr, 1884 bool PerformInit) { 1885 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1886 !CGM.getLangOpts().OpenMPIsDevice) 1887 return false; 1888 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1889 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1890 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1891 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1892 HasRequiresUnifiedSharedMemory)) 1893 return CGM.getLangOpts().OpenMPIsDevice; 1894 VD = VD->getDefinition(CGM.getContext()); 1895 assert(VD && "Unknown VarDecl"); 1896 1897 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1898 return CGM.getLangOpts().OpenMPIsDevice; 1899 1900 QualType ASTTy = VD->getType(); 1901 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1902 1903 // Produce the unique prefix to identify the new target regions. We use 1904 // the source location of the variable declaration which we know to not 1905 // conflict with any target region. 1906 unsigned DeviceID; 1907 unsigned FileID; 1908 unsigned Line; 1909 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1910 SmallString<128> Buffer, Out; 1911 { 1912 llvm::raw_svector_ostream OS(Buffer); 1913 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1914 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1915 } 1916 1917 const Expr *Init = VD->getAnyInitializer(); 1918 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1919 llvm::Constant *Ctor; 1920 llvm::Constant *ID; 1921 if (CGM.getLangOpts().OpenMPIsDevice) { 1922 // Generate function that re-emits the declaration's initializer into 1923 // the threadprivate copy of the variable VD 1924 CodeGenFunction CtorCGF(CGM); 1925 1926 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1927 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1928 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1929 FTy, Twine(Buffer, "_ctor"), FI, Loc, false, 1930 llvm::GlobalValue::WeakODRLinkage); 1931 if (CGM.getTriple().isAMDGCN()) 1932 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1933 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1934 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1935 FunctionArgList(), Loc, Loc); 1936 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1937 llvm::Constant *AddrInAS0 = Addr; 1938 if (Addr->getAddressSpace() != 0) 1939 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1940 Addr, llvm::PointerType::getWithSamePointeeType( 1941 cast<llvm::PointerType>(Addr->getType()), 0)); 1942 CtorCGF.EmitAnyExprToMem(Init, 1943 Address(AddrInAS0, Addr->getValueType(), 1944 CGM.getContext().getDeclAlign(VD)), 1945 Init->getType().getQualifiers(), 1946 /*IsInitializer=*/true); 1947 CtorCGF.FinishFunction(); 1948 Ctor = Fn; 1949 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1950 } else { 1951 Ctor = new llvm::GlobalVariable( 1952 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1953 llvm::GlobalValue::PrivateLinkage, 1954 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1955 ID = Ctor; 1956 } 1957 1958 // Register the information for the entry associated with the constructor. 1959 Out.clear(); 1960 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1961 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1962 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1963 } 1964 if (VD->getType().isDestructedType() != QualType::DK_none) { 1965 llvm::Constant *Dtor; 1966 llvm::Constant *ID; 1967 if (CGM.getLangOpts().OpenMPIsDevice) { 1968 // Generate function that emits destructor call for the threadprivate 1969 // copy of the variable VD 1970 CodeGenFunction DtorCGF(CGM); 1971 1972 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1973 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1974 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1975 FTy, Twine(Buffer, "_dtor"), FI, Loc, false, 1976 llvm::GlobalValue::WeakODRLinkage); 1977 if (CGM.getTriple().isAMDGCN()) 1978 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1979 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1980 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1981 FunctionArgList(), Loc, Loc); 1982 // Create a scope with an artificial location for the body of this 1983 // function. 1984 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1985 llvm::Constant *AddrInAS0 = Addr; 1986 if (Addr->getAddressSpace() != 0) 1987 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1988 Addr, llvm::PointerType::getWithSamePointeeType( 1989 cast<llvm::PointerType>(Addr->getType()), 0)); 1990 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), 1991 CGM.getContext().getDeclAlign(VD)), 1992 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1993 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1994 DtorCGF.FinishFunction(); 1995 Dtor = Fn; 1996 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1997 } else { 1998 Dtor = new llvm::GlobalVariable( 1999 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2000 llvm::GlobalValue::PrivateLinkage, 2001 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2002 ID = Dtor; 2003 } 2004 // Register the information for the entry associated with the destructor. 2005 Out.clear(); 2006 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2007 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2008 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2009 } 2010 return CGM.getLangOpts().OpenMPIsDevice; 2011 } 2012 2013 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2014 QualType VarType, 2015 StringRef Name) { 2016 std::string Suffix = getName({"artificial", ""}); 2017 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2018 llvm::GlobalVariable *GAddr = 2019 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2020 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2021 CGM.getTarget().isTLSSupported()) { 2022 GAddr->setThreadLocal(/*Val=*/true); 2023 return Address(GAddr, GAddr->getValueType(), 2024 CGM.getContext().getTypeAlignInChars(VarType)); 2025 } 2026 std::string CacheSuffix = getName({"cache", ""}); 2027 llvm::Value *Args[] = { 2028 emitUpdateLocation(CGF, SourceLocation()), 2029 getThreadID(CGF, SourceLocation()), 2030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2031 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2032 /*isSigned=*/false), 2033 getOrCreateInternalVariable( 2034 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2035 return Address( 2036 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2037 CGF.EmitRuntimeCall( 2038 OMPBuilder.getOrCreateRuntimeFunction( 2039 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2040 Args), 2041 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2042 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2043 } 2044 2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2046 const RegionCodeGenTy &ThenGen, 2047 const RegionCodeGenTy &ElseGen) { 2048 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2049 2050 // If the condition constant folds and can be elided, try to avoid emitting 2051 // the condition and the dead arm of the if/else. 2052 bool CondConstant; 2053 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2054 if (CondConstant) 2055 ThenGen(CGF); 2056 else 2057 ElseGen(CGF); 2058 return; 2059 } 2060 2061 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2062 // emit the conditional branch. 2063 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2064 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2065 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2066 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2067 2068 // Emit the 'then' code. 2069 CGF.EmitBlock(ThenBlock); 2070 ThenGen(CGF); 2071 CGF.EmitBranch(ContBlock); 2072 // Emit the 'else' code if present. 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBlock(ElseBlock); 2076 ElseGen(CGF); 2077 // There is no need to emit line number for unconditional branch. 2078 (void)ApplyDebugLocation::CreateEmpty(CGF); 2079 CGF.EmitBranch(ContBlock); 2080 // Emit the continuation block for code after the if. 2081 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2082 } 2083 2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2085 llvm::Function *OutlinedFn, 2086 ArrayRef<llvm::Value *> CapturedVars, 2087 const Expr *IfCond, 2088 llvm::Value *NumThreads) { 2089 if (!CGF.HaveInsertPoint()) 2090 return; 2091 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2092 auto &M = CGM.getModule(); 2093 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2094 this](CodeGenFunction &CGF, PrePostActionTy &) { 2095 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2096 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2097 llvm::Value *Args[] = { 2098 RTLoc, 2099 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2100 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2101 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2102 RealArgs.append(std::begin(Args), std::end(Args)); 2103 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2104 2105 llvm::FunctionCallee RTLFn = 2106 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2107 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2108 }; 2109 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2110 this](CodeGenFunction &CGF, PrePostActionTy &) { 2111 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2112 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2113 // Build calls: 2114 // __kmpc_serialized_parallel(&Loc, GTid); 2115 llvm::Value *Args[] = {RTLoc, ThreadID}; 2116 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2117 M, OMPRTL___kmpc_serialized_parallel), 2118 Args); 2119 2120 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2121 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2122 Address ZeroAddrBound = 2123 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2124 /*Name=*/".bound.zero.addr"); 2125 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2126 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2127 // ThreadId for serialized parallels is 0. 2128 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2129 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2130 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2131 2132 // Ensure we do not inline the function. This is trivially true for the ones 2133 // passed to __kmpc_fork_call but the ones called in serialized regions 2134 // could be inlined. This is not a perfect but it is closer to the invariant 2135 // we want, namely, every data environment starts with a new function. 2136 // TODO: We should pass the if condition to the runtime function and do the 2137 // handling there. Much cleaner code. 2138 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2139 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2140 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2141 2142 // __kmpc_end_serialized_parallel(&Loc, GTid); 2143 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2144 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2145 M, OMPRTL___kmpc_end_serialized_parallel), 2146 EndArgs); 2147 }; 2148 if (IfCond) { 2149 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2150 } else { 2151 RegionCodeGenTy ThenRCG(ThenGen); 2152 ThenRCG(CGF); 2153 } 2154 } 2155 2156 // If we're inside an (outlined) parallel region, use the region info's 2157 // thread-ID variable (it is passed in a first argument of the outlined function 2158 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2159 // regular serial code region, get thread ID by calling kmp_int32 2160 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2161 // return the address of that temp. 2162 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2163 SourceLocation Loc) { 2164 if (auto *OMPRegionInfo = 2165 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2166 if (OMPRegionInfo->getThreadIDVariable()) 2167 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2168 2169 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2170 QualType Int32Ty = 2171 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2172 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2173 CGF.EmitStoreOfScalar(ThreadID, 2174 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2175 2176 return ThreadIDTemp; 2177 } 2178 2179 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2180 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2181 SmallString<256> Buffer; 2182 llvm::raw_svector_ostream Out(Buffer); 2183 Out << Name; 2184 StringRef RuntimeName = Out.str(); 2185 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2186 if (Elem.second) { 2187 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2188 "OMP internal variable has different type than requested"); 2189 return &*Elem.second; 2190 } 2191 2192 return Elem.second = new llvm::GlobalVariable( 2193 CGM.getModule(), Ty, /*IsConstant*/ false, 2194 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2195 Elem.first(), /*InsertBefore=*/nullptr, 2196 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2197 } 2198 2199 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2200 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2201 std::string Name = getName({Prefix, "var"}); 2202 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2203 } 2204 2205 namespace { 2206 /// Common pre(post)-action for different OpenMP constructs. 2207 class CommonActionTy final : public PrePostActionTy { 2208 llvm::FunctionCallee EnterCallee; 2209 ArrayRef<llvm::Value *> EnterArgs; 2210 llvm::FunctionCallee ExitCallee; 2211 ArrayRef<llvm::Value *> ExitArgs; 2212 bool Conditional; 2213 llvm::BasicBlock *ContBlock = nullptr; 2214 2215 public: 2216 CommonActionTy(llvm::FunctionCallee EnterCallee, 2217 ArrayRef<llvm::Value *> EnterArgs, 2218 llvm::FunctionCallee ExitCallee, 2219 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2220 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2221 ExitArgs(ExitArgs), Conditional(Conditional) {} 2222 void Enter(CodeGenFunction &CGF) override { 2223 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2224 if (Conditional) { 2225 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2226 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2227 ContBlock = CGF.createBasicBlock("omp_if.end"); 2228 // Generate the branch (If-stmt) 2229 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2230 CGF.EmitBlock(ThenBlock); 2231 } 2232 } 2233 void Done(CodeGenFunction &CGF) { 2234 // Emit the rest of blocks/branches 2235 CGF.EmitBranch(ContBlock); 2236 CGF.EmitBlock(ContBlock, true); 2237 } 2238 void Exit(CodeGenFunction &CGF) override { 2239 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2240 } 2241 }; 2242 } // anonymous namespace 2243 2244 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2245 StringRef CriticalName, 2246 const RegionCodeGenTy &CriticalOpGen, 2247 SourceLocation Loc, const Expr *Hint) { 2248 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2249 // CriticalOpGen(); 2250 // __kmpc_end_critical(ident_t *, gtid, Lock); 2251 // Prepare arguments and build a call to __kmpc_critical 2252 if (!CGF.HaveInsertPoint()) 2253 return; 2254 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2255 getCriticalRegionLock(CriticalName)}; 2256 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2257 std::end(Args)); 2258 if (Hint) { 2259 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2260 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2261 } 2262 CommonActionTy Action( 2263 OMPBuilder.getOrCreateRuntimeFunction( 2264 CGM.getModule(), 2265 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2266 EnterArgs, 2267 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2268 OMPRTL___kmpc_end_critical), 2269 Args); 2270 CriticalOpGen.setAction(Action); 2271 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2272 } 2273 2274 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2275 const RegionCodeGenTy &MasterOpGen, 2276 SourceLocation Loc) { 2277 if (!CGF.HaveInsertPoint()) 2278 return; 2279 // if(__kmpc_master(ident_t *, gtid)) { 2280 // MasterOpGen(); 2281 // __kmpc_end_master(ident_t *, gtid); 2282 // } 2283 // Prepare arguments and build a call to __kmpc_master 2284 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2285 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2286 CGM.getModule(), OMPRTL___kmpc_master), 2287 Args, 2288 OMPBuilder.getOrCreateRuntimeFunction( 2289 CGM.getModule(), OMPRTL___kmpc_end_master), 2290 Args, 2291 /*Conditional=*/true); 2292 MasterOpGen.setAction(Action); 2293 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2294 Action.Done(CGF); 2295 } 2296 2297 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2298 const RegionCodeGenTy &MaskedOpGen, 2299 SourceLocation Loc, const Expr *Filter) { 2300 if (!CGF.HaveInsertPoint()) 2301 return; 2302 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2303 // MaskedOpGen(); 2304 // __kmpc_end_masked(iden_t *, gtid); 2305 // } 2306 // Prepare arguments and build a call to __kmpc_masked 2307 llvm::Value *FilterVal = Filter 2308 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2309 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2311 FilterVal}; 2312 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2313 getThreadID(CGF, Loc)}; 2314 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2315 CGM.getModule(), OMPRTL___kmpc_masked), 2316 Args, 2317 OMPBuilder.getOrCreateRuntimeFunction( 2318 CGM.getModule(), OMPRTL___kmpc_end_masked), 2319 ArgsEnd, 2320 /*Conditional=*/true); 2321 MaskedOpGen.setAction(Action); 2322 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2323 Action.Done(CGF); 2324 } 2325 2326 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2327 SourceLocation Loc) { 2328 if (!CGF.HaveInsertPoint()) 2329 return; 2330 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2331 OMPBuilder.createTaskyield(CGF.Builder); 2332 } else { 2333 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2334 llvm::Value *Args[] = { 2335 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2336 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2337 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2338 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2339 Args); 2340 } 2341 2342 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2343 Region->emitUntiedSwitch(CGF); 2344 } 2345 2346 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2347 const RegionCodeGenTy &TaskgroupOpGen, 2348 SourceLocation Loc) { 2349 if (!CGF.HaveInsertPoint()) 2350 return; 2351 // __kmpc_taskgroup(ident_t *, gtid); 2352 // TaskgroupOpGen(); 2353 // __kmpc_end_taskgroup(ident_t *, gtid); 2354 // Prepare arguments and build a call to __kmpc_taskgroup 2355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2356 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2357 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2358 Args, 2359 OMPBuilder.getOrCreateRuntimeFunction( 2360 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2361 Args); 2362 TaskgroupOpGen.setAction(Action); 2363 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2364 } 2365 2366 /// Given an array of pointers to variables, project the address of a 2367 /// given variable. 2368 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2369 unsigned Index, const VarDecl *Var) { 2370 // Pull out the pointer to the variable. 2371 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2372 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2373 2374 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2375 return Address( 2376 CGF.Builder.CreateBitCast( 2377 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2378 ElemTy, CGF.getContext().getDeclAlign(Var)); 2379 } 2380 2381 static llvm::Value *emitCopyprivateCopyFunction( 2382 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2383 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2384 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2385 SourceLocation Loc) { 2386 ASTContext &C = CGM.getContext(); 2387 // void copy_func(void *LHSArg, void *RHSArg); 2388 FunctionArgList Args; 2389 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2390 ImplicitParamDecl::Other); 2391 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2392 ImplicitParamDecl::Other); 2393 Args.push_back(&LHSArg); 2394 Args.push_back(&RHSArg); 2395 const auto &CGFI = 2396 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2397 std::string Name = 2398 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2399 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2400 llvm::GlobalValue::InternalLinkage, Name, 2401 &CGM.getModule()); 2402 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2403 Fn->setDoesNotRecurse(); 2404 CodeGenFunction CGF(CGM); 2405 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2406 // Dest = (void*[n])(LHSArg); 2407 // Src = (void*[n])(RHSArg); 2408 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2410 ArgsElemType->getPointerTo()), 2411 ArgsElemType, CGF.getPointerAlign()); 2412 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2413 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2414 ArgsElemType->getPointerTo()), 2415 ArgsElemType, CGF.getPointerAlign()); 2416 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2417 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2418 // ... 2419 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2420 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2421 const auto *DestVar = 2422 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2423 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2424 2425 const auto *SrcVar = 2426 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2427 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2428 2429 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2430 QualType Type = VD->getType(); 2431 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2432 } 2433 CGF.FinishFunction(); 2434 return Fn; 2435 } 2436 2437 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2438 const RegionCodeGenTy &SingleOpGen, 2439 SourceLocation Loc, 2440 ArrayRef<const Expr *> CopyprivateVars, 2441 ArrayRef<const Expr *> SrcExprs, 2442 ArrayRef<const Expr *> DstExprs, 2443 ArrayRef<const Expr *> AssignmentOps) { 2444 if (!CGF.HaveInsertPoint()) 2445 return; 2446 assert(CopyprivateVars.size() == SrcExprs.size() && 2447 CopyprivateVars.size() == DstExprs.size() && 2448 CopyprivateVars.size() == AssignmentOps.size()); 2449 ASTContext &C = CGM.getContext(); 2450 // int32 did_it = 0; 2451 // if(__kmpc_single(ident_t *, gtid)) { 2452 // SingleOpGen(); 2453 // __kmpc_end_single(ident_t *, gtid); 2454 // did_it = 1; 2455 // } 2456 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2457 // <copy_func>, did_it); 2458 2459 Address DidIt = Address::invalid(); 2460 if (!CopyprivateVars.empty()) { 2461 // int32 did_it = 0; 2462 QualType KmpInt32Ty = 2463 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2464 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2465 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2466 } 2467 // Prepare arguments and build a call to __kmpc_single 2468 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2469 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2470 CGM.getModule(), OMPRTL___kmpc_single), 2471 Args, 2472 OMPBuilder.getOrCreateRuntimeFunction( 2473 CGM.getModule(), OMPRTL___kmpc_end_single), 2474 Args, 2475 /*Conditional=*/true); 2476 SingleOpGen.setAction(Action); 2477 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2478 if (DidIt.isValid()) { 2479 // did_it = 1; 2480 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2481 } 2482 Action.Done(CGF); 2483 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2484 // <copy_func>, did_it); 2485 if (DidIt.isValid()) { 2486 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2487 QualType CopyprivateArrayTy = C.getConstantArrayType( 2488 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2489 /*IndexTypeQuals=*/0); 2490 // Create a list of all private variables for copyprivate. 2491 Address CopyprivateList = 2492 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2493 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2494 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2495 CGF.Builder.CreateStore( 2496 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2497 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2498 CGF.VoidPtrTy), 2499 Elem); 2500 } 2501 // Build function that copies private values from single region to all other 2502 // threads in the corresponding parallel region. 2503 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2504 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2505 SrcExprs, DstExprs, AssignmentOps, Loc); 2506 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2507 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2508 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2509 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2510 llvm::Value *Args[] = { 2511 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2512 getThreadID(CGF, Loc), // i32 <gtid> 2513 BufSize, // size_t <buf_size> 2514 CL.getPointer(), // void *<copyprivate list> 2515 CpyFn, // void (*) (void *, void *) <copy_func> 2516 DidItVal // i32 did_it 2517 }; 2518 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2519 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2520 Args); 2521 } 2522 } 2523 2524 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2525 const RegionCodeGenTy &OrderedOpGen, 2526 SourceLocation Loc, bool IsThreads) { 2527 if (!CGF.HaveInsertPoint()) 2528 return; 2529 // __kmpc_ordered(ident_t *, gtid); 2530 // OrderedOpGen(); 2531 // __kmpc_end_ordered(ident_t *, gtid); 2532 // Prepare arguments and build a call to __kmpc_ordered 2533 if (IsThreads) { 2534 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2535 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2536 CGM.getModule(), OMPRTL___kmpc_ordered), 2537 Args, 2538 OMPBuilder.getOrCreateRuntimeFunction( 2539 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2540 Args); 2541 OrderedOpGen.setAction(Action); 2542 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2543 return; 2544 } 2545 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2546 } 2547 2548 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2549 unsigned Flags; 2550 if (Kind == OMPD_for) 2551 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2552 else if (Kind == OMPD_sections) 2553 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2554 else if (Kind == OMPD_single) 2555 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2556 else if (Kind == OMPD_barrier) 2557 Flags = OMP_IDENT_BARRIER_EXPL; 2558 else 2559 Flags = OMP_IDENT_BARRIER_IMPL; 2560 return Flags; 2561 } 2562 2563 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2564 CodeGenFunction &CGF, const OMPLoopDirective &S, 2565 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2566 // Check if the loop directive is actually a doacross loop directive. In this 2567 // case choose static, 1 schedule. 2568 if (llvm::any_of( 2569 S.getClausesOfKind<OMPOrderedClause>(), 2570 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2571 ScheduleKind = OMPC_SCHEDULE_static; 2572 // Chunk size is 1 in this case. 2573 llvm::APInt ChunkSize(32, 1); 2574 ChunkExpr = IntegerLiteral::Create( 2575 CGF.getContext(), ChunkSize, 2576 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2577 SourceLocation()); 2578 } 2579 } 2580 2581 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2582 OpenMPDirectiveKind Kind, bool EmitChecks, 2583 bool ForceSimpleCall) { 2584 // Check if we should use the OMPBuilder 2585 auto *OMPRegionInfo = 2586 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2587 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2588 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2589 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2590 return; 2591 } 2592 2593 if (!CGF.HaveInsertPoint()) 2594 return; 2595 // Build call __kmpc_cancel_barrier(loc, thread_id); 2596 // Build call __kmpc_barrier(loc, thread_id); 2597 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2598 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2599 // thread_id); 2600 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2601 getThreadID(CGF, Loc)}; 2602 if (OMPRegionInfo) { 2603 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2604 llvm::Value *Result = CGF.EmitRuntimeCall( 2605 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2606 OMPRTL___kmpc_cancel_barrier), 2607 Args); 2608 if (EmitChecks) { 2609 // if (__kmpc_cancel_barrier()) { 2610 // exit from construct; 2611 // } 2612 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2613 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2614 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2615 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2616 CGF.EmitBlock(ExitBB); 2617 // exit from construct; 2618 CodeGenFunction::JumpDest CancelDestination = 2619 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2620 CGF.EmitBranchThroughCleanup(CancelDestination); 2621 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2622 } 2623 return; 2624 } 2625 } 2626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2627 CGM.getModule(), OMPRTL___kmpc_barrier), 2628 Args); 2629 } 2630 2631 /// Map the OpenMP loop schedule to the runtime enumeration. 2632 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2633 bool Chunked, bool Ordered) { 2634 switch (ScheduleKind) { 2635 case OMPC_SCHEDULE_static: 2636 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2637 : (Ordered ? OMP_ord_static : OMP_sch_static); 2638 case OMPC_SCHEDULE_dynamic: 2639 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2640 case OMPC_SCHEDULE_guided: 2641 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2642 case OMPC_SCHEDULE_runtime: 2643 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2644 case OMPC_SCHEDULE_auto: 2645 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2646 case OMPC_SCHEDULE_unknown: 2647 assert(!Chunked && "chunk was specified but schedule kind not known"); 2648 return Ordered ? OMP_ord_static : OMP_sch_static; 2649 } 2650 llvm_unreachable("Unexpected runtime schedule"); 2651 } 2652 2653 /// Map the OpenMP distribute schedule to the runtime enumeration. 2654 static OpenMPSchedType 2655 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2656 // only static is allowed for dist_schedule 2657 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2658 } 2659 2660 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2661 bool Chunked) const { 2662 OpenMPSchedType Schedule = 2663 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2664 return Schedule == OMP_sch_static; 2665 } 2666 2667 bool CGOpenMPRuntime::isStaticNonchunked( 2668 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2669 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2670 return Schedule == OMP_dist_sch_static; 2671 } 2672 2673 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2674 bool Chunked) const { 2675 OpenMPSchedType Schedule = 2676 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2677 return Schedule == OMP_sch_static_chunked; 2678 } 2679 2680 bool CGOpenMPRuntime::isStaticChunked( 2681 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2682 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2683 return Schedule == OMP_dist_sch_static_chunked; 2684 } 2685 2686 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2687 OpenMPSchedType Schedule = 2688 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2689 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2690 return Schedule != OMP_sch_static; 2691 } 2692 2693 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2694 OpenMPScheduleClauseModifier M1, 2695 OpenMPScheduleClauseModifier M2) { 2696 int Modifier = 0; 2697 switch (M1) { 2698 case OMPC_SCHEDULE_MODIFIER_monotonic: 2699 Modifier = OMP_sch_modifier_monotonic; 2700 break; 2701 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2702 Modifier = OMP_sch_modifier_nonmonotonic; 2703 break; 2704 case OMPC_SCHEDULE_MODIFIER_simd: 2705 if (Schedule == OMP_sch_static_chunked) 2706 Schedule = OMP_sch_static_balanced_chunked; 2707 break; 2708 case OMPC_SCHEDULE_MODIFIER_last: 2709 case OMPC_SCHEDULE_MODIFIER_unknown: 2710 break; 2711 } 2712 switch (M2) { 2713 case OMPC_SCHEDULE_MODIFIER_monotonic: 2714 Modifier = OMP_sch_modifier_monotonic; 2715 break; 2716 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2717 Modifier = OMP_sch_modifier_nonmonotonic; 2718 break; 2719 case OMPC_SCHEDULE_MODIFIER_simd: 2720 if (Schedule == OMP_sch_static_chunked) 2721 Schedule = OMP_sch_static_balanced_chunked; 2722 break; 2723 case OMPC_SCHEDULE_MODIFIER_last: 2724 case OMPC_SCHEDULE_MODIFIER_unknown: 2725 break; 2726 } 2727 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2728 // If the static schedule kind is specified or if the ordered clause is 2729 // specified, and if the nonmonotonic modifier is not specified, the effect is 2730 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2731 // modifier is specified, the effect is as if the nonmonotonic modifier is 2732 // specified. 2733 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2734 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2735 Schedule == OMP_sch_static_balanced_chunked || 2736 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2737 Schedule == OMP_dist_sch_static_chunked || 2738 Schedule == OMP_dist_sch_static)) 2739 Modifier = OMP_sch_modifier_nonmonotonic; 2740 } 2741 return Schedule | Modifier; 2742 } 2743 2744 void CGOpenMPRuntime::emitForDispatchInit( 2745 CodeGenFunction &CGF, SourceLocation Loc, 2746 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2747 bool Ordered, const DispatchRTInput &DispatchValues) { 2748 if (!CGF.HaveInsertPoint()) 2749 return; 2750 OpenMPSchedType Schedule = getRuntimeSchedule( 2751 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2752 assert(Ordered || 2753 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2754 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2755 Schedule != OMP_sch_static_balanced_chunked)); 2756 // Call __kmpc_dispatch_init( 2757 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2758 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2759 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2760 2761 // If the Chunk was not specified in the clause - use default value 1. 2762 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2763 : CGF.Builder.getIntN(IVSize, 1); 2764 llvm::Value *Args[] = { 2765 emitUpdateLocation(CGF, Loc), 2766 getThreadID(CGF, Loc), 2767 CGF.Builder.getInt32(addMonoNonMonoModifier( 2768 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2769 DispatchValues.LB, // Lower 2770 DispatchValues.UB, // Upper 2771 CGF.Builder.getIntN(IVSize, 1), // Stride 2772 Chunk // Chunk 2773 }; 2774 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2775 } 2776 2777 static void emitForStaticInitCall( 2778 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2779 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2780 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2781 const CGOpenMPRuntime::StaticRTInput &Values) { 2782 if (!CGF.HaveInsertPoint()) 2783 return; 2784 2785 assert(!Values.Ordered); 2786 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2787 Schedule == OMP_sch_static_balanced_chunked || 2788 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2789 Schedule == OMP_dist_sch_static || 2790 Schedule == OMP_dist_sch_static_chunked); 2791 2792 // Call __kmpc_for_static_init( 2793 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2794 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2795 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2796 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2797 llvm::Value *Chunk = Values.Chunk; 2798 if (Chunk == nullptr) { 2799 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2800 Schedule == OMP_dist_sch_static) && 2801 "expected static non-chunked schedule"); 2802 // If the Chunk was not specified in the clause - use default value 1. 2803 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2804 } else { 2805 assert((Schedule == OMP_sch_static_chunked || 2806 Schedule == OMP_sch_static_balanced_chunked || 2807 Schedule == OMP_ord_static_chunked || 2808 Schedule == OMP_dist_sch_static_chunked) && 2809 "expected static chunked schedule"); 2810 } 2811 llvm::Value *Args[] = { 2812 UpdateLocation, 2813 ThreadId, 2814 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2815 M2)), // Schedule type 2816 Values.IL.getPointer(), // &isLastIter 2817 Values.LB.getPointer(), // &LB 2818 Values.UB.getPointer(), // &UB 2819 Values.ST.getPointer(), // &Stride 2820 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2821 Chunk // Chunk 2822 }; 2823 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2824 } 2825 2826 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2827 SourceLocation Loc, 2828 OpenMPDirectiveKind DKind, 2829 const OpenMPScheduleTy &ScheduleKind, 2830 const StaticRTInput &Values) { 2831 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2832 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2833 assert(isOpenMPWorksharingDirective(DKind) && 2834 "Expected loop-based or sections-based directive."); 2835 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2836 isOpenMPLoopDirective(DKind) 2837 ? OMP_IDENT_WORK_LOOP 2838 : OMP_IDENT_WORK_SECTIONS); 2839 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2840 llvm::FunctionCallee StaticInitFunction = 2841 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2842 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2843 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2844 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2845 } 2846 2847 void CGOpenMPRuntime::emitDistributeStaticInit( 2848 CodeGenFunction &CGF, SourceLocation Loc, 2849 OpenMPDistScheduleClauseKind SchedKind, 2850 const CGOpenMPRuntime::StaticRTInput &Values) { 2851 OpenMPSchedType ScheduleNum = 2852 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2853 llvm::Value *UpdatedLocation = 2854 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2855 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2856 llvm::FunctionCallee StaticInitFunction; 2857 bool isGPUDistribute = 2858 CGM.getLangOpts().OpenMPIsDevice && 2859 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2860 StaticInitFunction = createForStaticInitFunction( 2861 Values.IVSize, Values.IVSigned, isGPUDistribute); 2862 2863 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2864 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2865 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2866 } 2867 2868 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2869 SourceLocation Loc, 2870 OpenMPDirectiveKind DKind) { 2871 if (!CGF.HaveInsertPoint()) 2872 return; 2873 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2874 llvm::Value *Args[] = { 2875 emitUpdateLocation(CGF, Loc, 2876 isOpenMPDistributeDirective(DKind) 2877 ? OMP_IDENT_WORK_DISTRIBUTE 2878 : isOpenMPLoopDirective(DKind) 2879 ? OMP_IDENT_WORK_LOOP 2880 : OMP_IDENT_WORK_SECTIONS), 2881 getThreadID(CGF, Loc)}; 2882 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2883 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2884 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2885 CGF.EmitRuntimeCall( 2886 OMPBuilder.getOrCreateRuntimeFunction( 2887 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2888 Args); 2889 else 2890 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2891 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2892 Args); 2893 } 2894 2895 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2896 SourceLocation Loc, 2897 unsigned IVSize, 2898 bool IVSigned) { 2899 if (!CGF.HaveInsertPoint()) 2900 return; 2901 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2902 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2903 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2904 } 2905 2906 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2907 SourceLocation Loc, unsigned IVSize, 2908 bool IVSigned, Address IL, 2909 Address LB, Address UB, 2910 Address ST) { 2911 // Call __kmpc_dispatch_next( 2912 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2913 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2914 // kmp_int[32|64] *p_stride); 2915 llvm::Value *Args[] = { 2916 emitUpdateLocation(CGF, Loc), 2917 getThreadID(CGF, Loc), 2918 IL.getPointer(), // &isLastIter 2919 LB.getPointer(), // &Lower 2920 UB.getPointer(), // &Upper 2921 ST.getPointer() // &Stride 2922 }; 2923 llvm::Value *Call = 2924 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2925 return CGF.EmitScalarConversion( 2926 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2927 CGF.getContext().BoolTy, Loc); 2928 } 2929 2930 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2931 llvm::Value *NumThreads, 2932 SourceLocation Loc) { 2933 if (!CGF.HaveInsertPoint()) 2934 return; 2935 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2936 llvm::Value *Args[] = { 2937 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2938 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2939 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2940 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2941 Args); 2942 } 2943 2944 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2945 ProcBindKind ProcBind, 2946 SourceLocation Loc) { 2947 if (!CGF.HaveInsertPoint()) 2948 return; 2949 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2950 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2951 llvm::Value *Args[] = { 2952 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2953 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2954 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2955 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2956 Args); 2957 } 2958 2959 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2960 SourceLocation Loc, llvm::AtomicOrdering AO) { 2961 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2962 OMPBuilder.createFlush(CGF.Builder); 2963 } else { 2964 if (!CGF.HaveInsertPoint()) 2965 return; 2966 // Build call void __kmpc_flush(ident_t *loc) 2967 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2968 CGM.getModule(), OMPRTL___kmpc_flush), 2969 emitUpdateLocation(CGF, Loc)); 2970 } 2971 } 2972 2973 namespace { 2974 /// Indexes of fields for type kmp_task_t. 2975 enum KmpTaskTFields { 2976 /// List of shared variables. 2977 KmpTaskTShareds, 2978 /// Task routine. 2979 KmpTaskTRoutine, 2980 /// Partition id for the untied tasks. 2981 KmpTaskTPartId, 2982 /// Function with call of destructors for private variables. 2983 Data1, 2984 /// Task priority. 2985 Data2, 2986 /// (Taskloops only) Lower bound. 2987 KmpTaskTLowerBound, 2988 /// (Taskloops only) Upper bound. 2989 KmpTaskTUpperBound, 2990 /// (Taskloops only) Stride. 2991 KmpTaskTStride, 2992 /// (Taskloops only) Is last iteration flag. 2993 KmpTaskTLastIter, 2994 /// (Taskloops only) Reduction data. 2995 KmpTaskTReductions, 2996 }; 2997 } // anonymous namespace 2998 2999 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3000 return OffloadEntriesTargetRegion.empty() && 3001 OffloadEntriesDeviceGlobalVar.empty(); 3002 } 3003 3004 /// Initialize target region entry. 3005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3006 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3007 StringRef ParentName, unsigned LineNum, 3008 unsigned Order) { 3009 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3010 "only required for the device " 3011 "code generation."); 3012 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3013 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3014 OMPTargetRegionEntryTargetRegion); 3015 ++OffloadingEntriesNum; 3016 } 3017 3018 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3019 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3020 StringRef ParentName, unsigned LineNum, 3021 llvm::Constant *Addr, llvm::Constant *ID, 3022 OMPTargetRegionEntryKind Flags) { 3023 // If we are emitting code for a target, the entry is already initialized, 3024 // only has to be registered. 3025 if (CGM.getLangOpts().OpenMPIsDevice) { 3026 // This could happen if the device compilation is invoked standalone. 3027 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3028 return; 3029 auto &Entry = 3030 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3031 Entry.setAddress(Addr); 3032 Entry.setID(ID); 3033 Entry.setFlags(Flags); 3034 } else { 3035 if (Flags == 3036 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3037 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3038 /*IgnoreAddressId*/ true)) 3039 return; 3040 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3041 "Target region entry already registered!"); 3042 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3043 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3044 ++OffloadingEntriesNum; 3045 } 3046 } 3047 3048 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3049 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3050 bool IgnoreAddressId) const { 3051 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3052 if (PerDevice == OffloadEntriesTargetRegion.end()) 3053 return false; 3054 auto PerFile = PerDevice->second.find(FileID); 3055 if (PerFile == PerDevice->second.end()) 3056 return false; 3057 auto PerParentName = PerFile->second.find(ParentName); 3058 if (PerParentName == PerFile->second.end()) 3059 return false; 3060 auto PerLine = PerParentName->second.find(LineNum); 3061 if (PerLine == PerParentName->second.end()) 3062 return false; 3063 // Fail if this entry is already registered. 3064 if (!IgnoreAddressId && 3065 (PerLine->second.getAddress() || PerLine->second.getID())) 3066 return false; 3067 return true; 3068 } 3069 3070 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3071 const OffloadTargetRegionEntryInfoActTy &Action) { 3072 // Scan all target region entries and perform the provided action. 3073 for (const auto &D : OffloadEntriesTargetRegion) 3074 for (const auto &F : D.second) 3075 for (const auto &P : F.second) 3076 for (const auto &L : P.second) 3077 Action(D.first, F.first, P.first(), L.first, L.second); 3078 } 3079 3080 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3081 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3082 OMPTargetGlobalVarEntryKind Flags, 3083 unsigned Order) { 3084 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3085 "only required for the device " 3086 "code generation."); 3087 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3088 ++OffloadingEntriesNum; 3089 } 3090 3091 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3092 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3093 CharUnits VarSize, 3094 OMPTargetGlobalVarEntryKind Flags, 3095 llvm::GlobalValue::LinkageTypes Linkage) { 3096 if (CGM.getLangOpts().OpenMPIsDevice) { 3097 // This could happen if the device compilation is invoked standalone. 3098 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3099 return; 3100 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3101 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3102 if (Entry.getVarSize().isZero()) { 3103 Entry.setVarSize(VarSize); 3104 Entry.setLinkage(Linkage); 3105 } 3106 return; 3107 } 3108 Entry.setVarSize(VarSize); 3109 Entry.setLinkage(Linkage); 3110 Entry.setAddress(Addr); 3111 } else { 3112 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3113 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3114 assert(Entry.isValid() && Entry.getFlags() == Flags && 3115 "Entry not initialized!"); 3116 if (Entry.getVarSize().isZero()) { 3117 Entry.setVarSize(VarSize); 3118 Entry.setLinkage(Linkage); 3119 } 3120 return; 3121 } 3122 OffloadEntriesDeviceGlobalVar.try_emplace( 3123 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3124 ++OffloadingEntriesNum; 3125 } 3126 } 3127 3128 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3129 actOnDeviceGlobalVarEntriesInfo( 3130 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3131 // Scan all target region entries and perform the provided action. 3132 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3133 Action(E.getKey(), E.getValue()); 3134 } 3135 3136 void CGOpenMPRuntime::createOffloadEntry( 3137 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3138 llvm::GlobalValue::LinkageTypes Linkage) { 3139 StringRef Name = Addr->getName(); 3140 llvm::Module &M = CGM.getModule(); 3141 llvm::LLVMContext &C = M.getContext(); 3142 3143 // Create constant string with the name. 3144 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3145 3146 std::string StringName = getName({"omp_offloading", "entry_name"}); 3147 auto *Str = new llvm::GlobalVariable( 3148 M, StrPtrInit->getType(), /*isConstant=*/true, 3149 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3150 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3151 3152 llvm::Constant *Data[] = { 3153 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3154 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3155 llvm::ConstantInt::get(CGM.SizeTy, Size), 3156 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3157 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3158 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3159 llvm::GlobalVariable *Entry = createGlobalStruct( 3160 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3161 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3162 3163 // The entry has to be created in the section the linker expects it to be. 3164 Entry->setSection("omp_offloading_entries"); 3165 } 3166 3167 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3168 // Emit the offloading entries and metadata so that the device codegen side 3169 // can easily figure out what to emit. The produced metadata looks like 3170 // this: 3171 // 3172 // !omp_offload.info = !{!1, ...} 3173 // 3174 // Right now we only generate metadata for function that contain target 3175 // regions. 3176 3177 // If we are in simd mode or there are no entries, we don't need to do 3178 // anything. 3179 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3180 return; 3181 3182 llvm::Module &M = CGM.getModule(); 3183 llvm::LLVMContext &C = M.getContext(); 3184 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3185 SourceLocation, StringRef>, 3186 16> 3187 OrderedEntries(OffloadEntriesInfoManager.size()); 3188 llvm::SmallVector<StringRef, 16> ParentFunctions( 3189 OffloadEntriesInfoManager.size()); 3190 3191 // Auxiliary methods to create metadata values and strings. 3192 auto &&GetMDInt = [this](unsigned V) { 3193 return llvm::ConstantAsMetadata::get( 3194 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3195 }; 3196 3197 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3198 3199 // Create the offloading info metadata node. 3200 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3201 3202 // Create function that emits metadata for each target region entry; 3203 auto &&TargetRegionMetadataEmitter = 3204 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3205 &GetMDString]( 3206 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3207 unsigned Line, 3208 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3209 // Generate metadata for target regions. Each entry of this metadata 3210 // contains: 3211 // - Entry 0 -> Kind of this type of metadata (0). 3212 // - Entry 1 -> Device ID of the file where the entry was identified. 3213 // - Entry 2 -> File ID of the file where the entry was identified. 3214 // - Entry 3 -> Mangled name of the function where the entry was 3215 // identified. 3216 // - Entry 4 -> Line in the file where the entry was identified. 3217 // - Entry 5 -> Order the entry was created. 3218 // The first element of the metadata node is the kind. 3219 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3220 GetMDInt(FileID), GetMDString(ParentName), 3221 GetMDInt(Line), GetMDInt(E.getOrder())}; 3222 3223 SourceLocation Loc; 3224 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3225 E = CGM.getContext().getSourceManager().fileinfo_end(); 3226 I != E; ++I) { 3227 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3228 I->getFirst()->getUniqueID().getFile() == FileID) { 3229 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3230 I->getFirst(), Line, 1); 3231 break; 3232 } 3233 } 3234 // Save this entry in the right position of the ordered entries array. 3235 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3236 ParentFunctions[E.getOrder()] = ParentName; 3237 3238 // Add metadata to the named metadata node. 3239 MD->addOperand(llvm::MDNode::get(C, Ops)); 3240 }; 3241 3242 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3243 TargetRegionMetadataEmitter); 3244 3245 // Create function that emits metadata for each device global variable entry; 3246 auto &&DeviceGlobalVarMetadataEmitter = 3247 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3248 MD](StringRef MangledName, 3249 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3250 &E) { 3251 // Generate metadata for global variables. Each entry of this metadata 3252 // contains: 3253 // - Entry 0 -> Kind of this type of metadata (1). 3254 // - Entry 1 -> Mangled name of the variable. 3255 // - Entry 2 -> Declare target kind. 3256 // - Entry 3 -> Order the entry was created. 3257 // The first element of the metadata node is the kind. 3258 llvm::Metadata *Ops[] = { 3259 GetMDInt(E.getKind()), GetMDString(MangledName), 3260 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3261 3262 // Save this entry in the right position of the ordered entries array. 3263 OrderedEntries[E.getOrder()] = 3264 std::make_tuple(&E, SourceLocation(), MangledName); 3265 3266 // Add metadata to the named metadata node. 3267 MD->addOperand(llvm::MDNode::get(C, Ops)); 3268 }; 3269 3270 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3271 DeviceGlobalVarMetadataEmitter); 3272 3273 for (const auto &E : OrderedEntries) { 3274 assert(std::get<0>(E) && "All ordered entries must exist!"); 3275 if (const auto *CE = 3276 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3277 std::get<0>(E))) { 3278 if (!CE->getID() || !CE->getAddress()) { 3279 // Do not blame the entry if the parent funtion is not emitted. 3280 StringRef FnName = ParentFunctions[CE->getOrder()]; 3281 if (!CGM.GetGlobalValue(FnName)) 3282 continue; 3283 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3284 DiagnosticsEngine::Error, 3285 "Offloading entry for target region in %0 is incorrect: either the " 3286 "address or the ID is invalid."); 3287 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3288 continue; 3289 } 3290 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3291 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3292 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3293 OffloadEntryInfoDeviceGlobalVar>( 3294 std::get<0>(E))) { 3295 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3296 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3297 CE->getFlags()); 3298 switch (Flags) { 3299 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3300 if (CGM.getLangOpts().OpenMPIsDevice && 3301 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3302 continue; 3303 if (!CE->getAddress()) { 3304 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3305 DiagnosticsEngine::Error, "Offloading entry for declare target " 3306 "variable %0 is incorrect: the " 3307 "address is invalid."); 3308 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3309 continue; 3310 } 3311 // The vaiable has no definition - no need to add the entry. 3312 if (CE->getVarSize().isZero()) 3313 continue; 3314 break; 3315 } 3316 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3317 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3318 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3319 "Declaret target link address is set."); 3320 if (CGM.getLangOpts().OpenMPIsDevice) 3321 continue; 3322 if (!CE->getAddress()) { 3323 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3324 DiagnosticsEngine::Error, 3325 "Offloading entry for declare target variable is incorrect: the " 3326 "address is invalid."); 3327 CGM.getDiags().Report(DiagID); 3328 continue; 3329 } 3330 break; 3331 } 3332 3333 // Hidden or internal symbols on the device are not externally visible. We 3334 // should not attempt to register them by creating an offloading entry. 3335 if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress())) 3336 if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) 3337 continue; 3338 3339 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3340 CE->getVarSize().getQuantity(), Flags, 3341 CE->getLinkage()); 3342 } else { 3343 llvm_unreachable("Unsupported entry kind."); 3344 } 3345 } 3346 } 3347 3348 /// Loads all the offload entries information from the host IR 3349 /// metadata. 3350 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3351 // If we are in target mode, load the metadata from the host IR. This code has 3352 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3353 3354 if (!CGM.getLangOpts().OpenMPIsDevice) 3355 return; 3356 3357 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3358 return; 3359 3360 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3361 if (auto EC = Buf.getError()) { 3362 CGM.getDiags().Report(diag::err_cannot_open_file) 3363 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3364 return; 3365 } 3366 3367 llvm::LLVMContext C; 3368 auto ME = expectedToErrorOrAndEmitErrors( 3369 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3370 3371 if (auto EC = ME.getError()) { 3372 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3373 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3374 CGM.getDiags().Report(DiagID) 3375 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3376 return; 3377 } 3378 3379 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3380 if (!MD) 3381 return; 3382 3383 for (llvm::MDNode *MN : MD->operands()) { 3384 auto &&GetMDInt = [MN](unsigned Idx) { 3385 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3386 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3387 }; 3388 3389 auto &&GetMDString = [MN](unsigned Idx) { 3390 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3391 return V->getString(); 3392 }; 3393 3394 switch (GetMDInt(0)) { 3395 default: 3396 llvm_unreachable("Unexpected metadata!"); 3397 break; 3398 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3399 OffloadingEntryInfoTargetRegion: 3400 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3401 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3402 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3403 /*Order=*/GetMDInt(5)); 3404 break; 3405 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3406 OffloadingEntryInfoDeviceGlobalVar: 3407 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3408 /*MangledName=*/GetMDString(1), 3409 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3410 /*Flags=*/GetMDInt(2)), 3411 /*Order=*/GetMDInt(3)); 3412 break; 3413 } 3414 } 3415 } 3416 3417 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3418 if (!KmpRoutineEntryPtrTy) { 3419 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3420 ASTContext &C = CGM.getContext(); 3421 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3422 FunctionProtoType::ExtProtoInfo EPI; 3423 KmpRoutineEntryPtrQTy = C.getPointerType( 3424 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3425 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3426 } 3427 } 3428 3429 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3430 // Make sure the type of the entry is already created. This is the type we 3431 // have to create: 3432 // struct __tgt_offload_entry{ 3433 // void *addr; // Pointer to the offload entry info. 3434 // // (function or global) 3435 // char *name; // Name of the function or global. 3436 // size_t size; // Size of the entry info (0 if it a function). 3437 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3438 // int32_t reserved; // Reserved, to use by the runtime library. 3439 // }; 3440 if (TgtOffloadEntryQTy.isNull()) { 3441 ASTContext &C = CGM.getContext(); 3442 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3443 RD->startDefinition(); 3444 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3445 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3446 addFieldToRecordDecl(C, RD, C.getSizeType()); 3447 addFieldToRecordDecl( 3448 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3449 addFieldToRecordDecl( 3450 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3451 RD->completeDefinition(); 3452 RD->addAttr(PackedAttr::CreateImplicit(C)); 3453 TgtOffloadEntryQTy = C.getRecordType(RD); 3454 } 3455 return TgtOffloadEntryQTy; 3456 } 3457 3458 namespace { 3459 struct PrivateHelpersTy { 3460 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3461 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3462 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3463 PrivateElemInit(PrivateElemInit) {} 3464 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3465 const Expr *OriginalRef = nullptr; 3466 const VarDecl *Original = nullptr; 3467 const VarDecl *PrivateCopy = nullptr; 3468 const VarDecl *PrivateElemInit = nullptr; 3469 bool isLocalPrivate() const { 3470 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3471 } 3472 }; 3473 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3474 } // anonymous namespace 3475 3476 static bool isAllocatableDecl(const VarDecl *VD) { 3477 const VarDecl *CVD = VD->getCanonicalDecl(); 3478 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3479 return false; 3480 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3481 // Use the default allocation. 3482 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3483 !AA->getAllocator()); 3484 } 3485 3486 static RecordDecl * 3487 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3488 if (!Privates.empty()) { 3489 ASTContext &C = CGM.getContext(); 3490 // Build struct .kmp_privates_t. { 3491 // /* private vars */ 3492 // }; 3493 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3494 RD->startDefinition(); 3495 for (const auto &Pair : Privates) { 3496 const VarDecl *VD = Pair.second.Original; 3497 QualType Type = VD->getType().getNonReferenceType(); 3498 // If the private variable is a local variable with lvalue ref type, 3499 // allocate the pointer instead of the pointee type. 3500 if (Pair.second.isLocalPrivate()) { 3501 if (VD->getType()->isLValueReferenceType()) 3502 Type = C.getPointerType(Type); 3503 if (isAllocatableDecl(VD)) 3504 Type = C.getPointerType(Type); 3505 } 3506 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3507 if (VD->hasAttrs()) { 3508 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3509 E(VD->getAttrs().end()); 3510 I != E; ++I) 3511 FD->addAttr(*I); 3512 } 3513 } 3514 RD->completeDefinition(); 3515 return RD; 3516 } 3517 return nullptr; 3518 } 3519 3520 static RecordDecl * 3521 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3522 QualType KmpInt32Ty, 3523 QualType KmpRoutineEntryPointerQTy) { 3524 ASTContext &C = CGM.getContext(); 3525 // Build struct kmp_task_t { 3526 // void * shareds; 3527 // kmp_routine_entry_t routine; 3528 // kmp_int32 part_id; 3529 // kmp_cmplrdata_t data1; 3530 // kmp_cmplrdata_t data2; 3531 // For taskloops additional fields: 3532 // kmp_uint64 lb; 3533 // kmp_uint64 ub; 3534 // kmp_int64 st; 3535 // kmp_int32 liter; 3536 // void * reductions; 3537 // }; 3538 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3539 UD->startDefinition(); 3540 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3541 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3542 UD->completeDefinition(); 3543 QualType KmpCmplrdataTy = C.getRecordType(UD); 3544 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3545 RD->startDefinition(); 3546 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3547 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3548 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3549 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3550 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3551 if (isOpenMPTaskLoopDirective(Kind)) { 3552 QualType KmpUInt64Ty = 3553 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3554 QualType KmpInt64Ty = 3555 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3556 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3557 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3558 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3559 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3560 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3561 } 3562 RD->completeDefinition(); 3563 return RD; 3564 } 3565 3566 static RecordDecl * 3567 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3568 ArrayRef<PrivateDataTy> Privates) { 3569 ASTContext &C = CGM.getContext(); 3570 // Build struct kmp_task_t_with_privates { 3571 // kmp_task_t task_data; 3572 // .kmp_privates_t. privates; 3573 // }; 3574 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3575 RD->startDefinition(); 3576 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3577 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3578 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3579 RD->completeDefinition(); 3580 return RD; 3581 } 3582 3583 /// Emit a proxy function which accepts kmp_task_t as the second 3584 /// argument. 3585 /// \code 3586 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3587 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3588 /// For taskloops: 3589 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3590 /// tt->reductions, tt->shareds); 3591 /// return 0; 3592 /// } 3593 /// \endcode 3594 static llvm::Function * 3595 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3596 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3597 QualType KmpTaskTWithPrivatesPtrQTy, 3598 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3599 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3600 llvm::Value *TaskPrivatesMap) { 3601 ASTContext &C = CGM.getContext(); 3602 FunctionArgList Args; 3603 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3604 ImplicitParamDecl::Other); 3605 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3606 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3607 ImplicitParamDecl::Other); 3608 Args.push_back(&GtidArg); 3609 Args.push_back(&TaskTypeArg); 3610 const auto &TaskEntryFnInfo = 3611 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3612 llvm::FunctionType *TaskEntryTy = 3613 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3614 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3615 auto *TaskEntry = llvm::Function::Create( 3616 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3617 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3618 TaskEntry->setDoesNotRecurse(); 3619 CodeGenFunction CGF(CGM); 3620 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3621 Loc, Loc); 3622 3623 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3624 // tt, 3625 // For taskloops: 3626 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3627 // tt->task_data.shareds); 3628 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3629 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3630 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3631 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3632 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3633 const auto *KmpTaskTWithPrivatesQTyRD = 3634 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3635 LValue Base = 3636 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3637 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3638 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3639 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3640 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3641 3642 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3643 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3644 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3645 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3646 CGF.ConvertTypeForMem(SharedsPtrTy)); 3647 3648 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3649 llvm::Value *PrivatesParam; 3650 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3651 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3652 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3653 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3654 } else { 3655 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3656 } 3657 3658 llvm::Value *CommonArgs[] = { 3659 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3660 CGF.Builder 3661 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3662 CGF.VoidPtrTy, CGF.Int8Ty) 3663 .getPointer()}; 3664 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3665 std::end(CommonArgs)); 3666 if (isOpenMPTaskLoopDirective(Kind)) { 3667 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3668 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3669 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3670 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3671 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3672 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3673 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3674 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3675 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3676 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3677 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3678 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3679 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3680 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3681 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3682 CallArgs.push_back(LBParam); 3683 CallArgs.push_back(UBParam); 3684 CallArgs.push_back(StParam); 3685 CallArgs.push_back(LIParam); 3686 CallArgs.push_back(RParam); 3687 } 3688 CallArgs.push_back(SharedsParam); 3689 3690 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3691 CallArgs); 3692 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3693 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3694 CGF.FinishFunction(); 3695 return TaskEntry; 3696 } 3697 3698 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3699 SourceLocation Loc, 3700 QualType KmpInt32Ty, 3701 QualType KmpTaskTWithPrivatesPtrQTy, 3702 QualType KmpTaskTWithPrivatesQTy) { 3703 ASTContext &C = CGM.getContext(); 3704 FunctionArgList Args; 3705 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3706 ImplicitParamDecl::Other); 3707 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3708 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3709 ImplicitParamDecl::Other); 3710 Args.push_back(&GtidArg); 3711 Args.push_back(&TaskTypeArg); 3712 const auto &DestructorFnInfo = 3713 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3714 llvm::FunctionType *DestructorFnTy = 3715 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3716 std::string Name = 3717 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3718 auto *DestructorFn = 3719 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3720 Name, &CGM.getModule()); 3721 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3722 DestructorFnInfo); 3723 DestructorFn->setDoesNotRecurse(); 3724 CodeGenFunction CGF(CGM); 3725 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3726 Args, Loc, Loc); 3727 3728 LValue Base = CGF.EmitLoadOfPointerLValue( 3729 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3730 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3731 const auto *KmpTaskTWithPrivatesQTyRD = 3732 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3733 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3734 Base = CGF.EmitLValueForField(Base, *FI); 3735 for (const auto *Field : 3736 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3737 if (QualType::DestructionKind DtorKind = 3738 Field->getType().isDestructedType()) { 3739 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3740 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3741 } 3742 } 3743 CGF.FinishFunction(); 3744 return DestructorFn; 3745 } 3746 3747 /// Emit a privates mapping function for correct handling of private and 3748 /// firstprivate variables. 3749 /// \code 3750 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3751 /// **noalias priv1,..., <tyn> **noalias privn) { 3752 /// *priv1 = &.privates.priv1; 3753 /// ...; 3754 /// *privn = &.privates.privn; 3755 /// } 3756 /// \endcode 3757 static llvm::Value * 3758 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3759 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3760 ArrayRef<PrivateDataTy> Privates) { 3761 ASTContext &C = CGM.getContext(); 3762 FunctionArgList Args; 3763 ImplicitParamDecl TaskPrivatesArg( 3764 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3765 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3766 ImplicitParamDecl::Other); 3767 Args.push_back(&TaskPrivatesArg); 3768 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3769 unsigned Counter = 1; 3770 for (const Expr *E : Data.PrivateVars) { 3771 Args.push_back(ImplicitParamDecl::Create( 3772 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3773 C.getPointerType(C.getPointerType(E->getType())) 3774 .withConst() 3775 .withRestrict(), 3776 ImplicitParamDecl::Other)); 3777 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3778 PrivateVarsPos[VD] = Counter; 3779 ++Counter; 3780 } 3781 for (const Expr *E : Data.FirstprivateVars) { 3782 Args.push_back(ImplicitParamDecl::Create( 3783 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3784 C.getPointerType(C.getPointerType(E->getType())) 3785 .withConst() 3786 .withRestrict(), 3787 ImplicitParamDecl::Other)); 3788 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3789 PrivateVarsPos[VD] = Counter; 3790 ++Counter; 3791 } 3792 for (const Expr *E : Data.LastprivateVars) { 3793 Args.push_back(ImplicitParamDecl::Create( 3794 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3795 C.getPointerType(C.getPointerType(E->getType())) 3796 .withConst() 3797 .withRestrict(), 3798 ImplicitParamDecl::Other)); 3799 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3800 PrivateVarsPos[VD] = Counter; 3801 ++Counter; 3802 } 3803 for (const VarDecl *VD : Data.PrivateLocals) { 3804 QualType Ty = VD->getType().getNonReferenceType(); 3805 if (VD->getType()->isLValueReferenceType()) 3806 Ty = C.getPointerType(Ty); 3807 if (isAllocatableDecl(VD)) 3808 Ty = C.getPointerType(Ty); 3809 Args.push_back(ImplicitParamDecl::Create( 3810 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3811 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3812 ImplicitParamDecl::Other)); 3813 PrivateVarsPos[VD] = Counter; 3814 ++Counter; 3815 } 3816 const auto &TaskPrivatesMapFnInfo = 3817 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3818 llvm::FunctionType *TaskPrivatesMapTy = 3819 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3820 std::string Name = 3821 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3822 auto *TaskPrivatesMap = llvm::Function::Create( 3823 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3824 &CGM.getModule()); 3825 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3826 TaskPrivatesMapFnInfo); 3827 if (CGM.getLangOpts().Optimize) { 3828 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3829 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3830 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3831 } 3832 CodeGenFunction CGF(CGM); 3833 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3834 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3835 3836 // *privi = &.privates.privi; 3837 LValue Base = CGF.EmitLoadOfPointerLValue( 3838 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3839 TaskPrivatesArg.getType()->castAs<PointerType>()); 3840 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3841 Counter = 0; 3842 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3843 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3844 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3845 LValue RefLVal = 3846 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3847 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3848 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3849 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3850 ++Counter; 3851 } 3852 CGF.FinishFunction(); 3853 return TaskPrivatesMap; 3854 } 3855 3856 /// Emit initialization for private variables in task-based directives. 3857 static void emitPrivatesInit(CodeGenFunction &CGF, 3858 const OMPExecutableDirective &D, 3859 Address KmpTaskSharedsPtr, LValue TDBase, 3860 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3861 QualType SharedsTy, QualType SharedsPtrTy, 3862 const OMPTaskDataTy &Data, 3863 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3864 ASTContext &C = CGF.getContext(); 3865 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3866 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3867 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3868 ? OMPD_taskloop 3869 : OMPD_task; 3870 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3871 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3872 LValue SrcBase; 3873 bool IsTargetTask = 3874 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3875 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3876 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3877 // PointersArray, SizesArray, and MappersArray. The original variables for 3878 // these arrays are not captured and we get their addresses explicitly. 3879 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3880 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3881 SrcBase = CGF.MakeAddrLValue( 3882 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3883 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3884 CGF.ConvertTypeForMem(SharedsTy)), 3885 SharedsTy); 3886 } 3887 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3888 for (const PrivateDataTy &Pair : Privates) { 3889 // Do not initialize private locals. 3890 if (Pair.second.isLocalPrivate()) { 3891 ++FI; 3892 continue; 3893 } 3894 const VarDecl *VD = Pair.second.PrivateCopy; 3895 const Expr *Init = VD->getAnyInitializer(); 3896 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3897 !CGF.isTrivialInitializer(Init)))) { 3898 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3899 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3900 const VarDecl *OriginalVD = Pair.second.Original; 3901 // Check if the variable is the target-based BasePointersArray, 3902 // PointersArray, SizesArray, or MappersArray. 3903 LValue SharedRefLValue; 3904 QualType Type = PrivateLValue.getType(); 3905 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3906 if (IsTargetTask && !SharedField) { 3907 assert(isa<ImplicitParamDecl>(OriginalVD) && 3908 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3909 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3910 ->getNumParams() == 0 && 3911 isa<TranslationUnitDecl>( 3912 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3913 ->getDeclContext()) && 3914 "Expected artificial target data variable."); 3915 SharedRefLValue = 3916 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3917 } else if (ForDup) { 3918 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3919 SharedRefLValue = CGF.MakeAddrLValue( 3920 SharedRefLValue.getAddress(CGF).withAlignment( 3921 C.getDeclAlign(OriginalVD)), 3922 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3923 SharedRefLValue.getTBAAInfo()); 3924 } else if (CGF.LambdaCaptureFields.count( 3925 Pair.second.Original->getCanonicalDecl()) > 0 || 3926 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3927 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3928 } else { 3929 // Processing for implicitly captured variables. 3930 InlinedOpenMPRegionRAII Region( 3931 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3932 /*HasCancel=*/false, /*NoInheritance=*/true); 3933 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3934 } 3935 if (Type->isArrayType()) { 3936 // Initialize firstprivate array. 3937 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3938 // Perform simple memcpy. 3939 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3940 } else { 3941 // Initialize firstprivate array using element-by-element 3942 // initialization. 3943 CGF.EmitOMPAggregateAssign( 3944 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3945 Type, 3946 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3947 Address SrcElement) { 3948 // Clean up any temporaries needed by the initialization. 3949 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3950 InitScope.addPrivate(Elem, SrcElement); 3951 (void)InitScope.Privatize(); 3952 // Emit initialization for single element. 3953 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3954 CGF, &CapturesInfo); 3955 CGF.EmitAnyExprToMem(Init, DestElement, 3956 Init->getType().getQualifiers(), 3957 /*IsInitializer=*/false); 3958 }); 3959 } 3960 } else { 3961 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3962 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3963 (void)InitScope.Privatize(); 3964 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3965 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3966 /*capturedByInit=*/false); 3967 } 3968 } else { 3969 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3970 } 3971 } 3972 ++FI; 3973 } 3974 } 3975 3976 /// Check if duplication function is required for taskloops. 3977 static bool checkInitIsRequired(CodeGenFunction &CGF, 3978 ArrayRef<PrivateDataTy> Privates) { 3979 bool InitRequired = false; 3980 for (const PrivateDataTy &Pair : Privates) { 3981 if (Pair.second.isLocalPrivate()) 3982 continue; 3983 const VarDecl *VD = Pair.second.PrivateCopy; 3984 const Expr *Init = VD->getAnyInitializer(); 3985 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3986 !CGF.isTrivialInitializer(Init)); 3987 if (InitRequired) 3988 break; 3989 } 3990 return InitRequired; 3991 } 3992 3993 3994 /// Emit task_dup function (for initialization of 3995 /// private/firstprivate/lastprivate vars and last_iter flag) 3996 /// \code 3997 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3998 /// lastpriv) { 3999 /// // setup lastprivate flag 4000 /// task_dst->last = lastpriv; 4001 /// // could be constructor calls here... 4002 /// } 4003 /// \endcode 4004 static llvm::Value * 4005 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4006 const OMPExecutableDirective &D, 4007 QualType KmpTaskTWithPrivatesPtrQTy, 4008 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4009 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4010 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4011 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4012 ASTContext &C = CGM.getContext(); 4013 FunctionArgList Args; 4014 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4015 KmpTaskTWithPrivatesPtrQTy, 4016 ImplicitParamDecl::Other); 4017 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4018 KmpTaskTWithPrivatesPtrQTy, 4019 ImplicitParamDecl::Other); 4020 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4021 ImplicitParamDecl::Other); 4022 Args.push_back(&DstArg); 4023 Args.push_back(&SrcArg); 4024 Args.push_back(&LastprivArg); 4025 const auto &TaskDupFnInfo = 4026 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4027 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4028 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4029 auto *TaskDup = llvm::Function::Create( 4030 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4031 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4032 TaskDup->setDoesNotRecurse(); 4033 CodeGenFunction CGF(CGM); 4034 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4035 Loc); 4036 4037 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4038 CGF.GetAddrOfLocalVar(&DstArg), 4039 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4040 // task_dst->liter = lastpriv; 4041 if (WithLastIter) { 4042 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4043 LValue Base = CGF.EmitLValueForField( 4044 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4045 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4046 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4047 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4048 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4049 } 4050 4051 // Emit initial values for private copies (if any). 4052 assert(!Privates.empty()); 4053 Address KmpTaskSharedsPtr = Address::invalid(); 4054 if (!Data.FirstprivateVars.empty()) { 4055 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4056 CGF.GetAddrOfLocalVar(&SrcArg), 4057 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4058 LValue Base = CGF.EmitLValueForField( 4059 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4060 KmpTaskSharedsPtr = Address( 4061 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4062 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4063 KmpTaskTShareds)), 4064 Loc), 4065 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4066 } 4067 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4068 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4069 CGF.FinishFunction(); 4070 return TaskDup; 4071 } 4072 4073 /// Checks if destructor function is required to be generated. 4074 /// \return true if cleanups are required, false otherwise. 4075 static bool 4076 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4077 ArrayRef<PrivateDataTy> Privates) { 4078 for (const PrivateDataTy &P : Privates) { 4079 if (P.second.isLocalPrivate()) 4080 continue; 4081 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4082 if (Ty.isDestructedType()) 4083 return true; 4084 } 4085 return false; 4086 } 4087 4088 namespace { 4089 /// Loop generator for OpenMP iterator expression. 4090 class OMPIteratorGeneratorScope final 4091 : public CodeGenFunction::OMPPrivateScope { 4092 CodeGenFunction &CGF; 4093 const OMPIteratorExpr *E = nullptr; 4094 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4095 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4096 OMPIteratorGeneratorScope() = delete; 4097 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4098 4099 public: 4100 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4101 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4102 if (!E) 4103 return; 4104 SmallVector<llvm::Value *, 4> Uppers; 4105 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4106 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4107 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4108 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 4109 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4110 addPrivate( 4111 HelperData.CounterVD, 4112 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 4113 } 4114 Privatize(); 4115 4116 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4117 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4118 LValue CLVal = 4119 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4120 HelperData.CounterVD->getType()); 4121 // Counter = 0; 4122 CGF.EmitStoreOfScalar( 4123 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4124 CLVal); 4125 CodeGenFunction::JumpDest &ContDest = 4126 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4127 CodeGenFunction::JumpDest &ExitDest = 4128 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4129 // N = <number-of_iterations>; 4130 llvm::Value *N = Uppers[I]; 4131 // cont: 4132 // if (Counter < N) goto body; else goto exit; 4133 CGF.EmitBlock(ContDest.getBlock()); 4134 auto *CVal = 4135 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4136 llvm::Value *Cmp = 4137 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4138 ? CGF.Builder.CreateICmpSLT(CVal, N) 4139 : CGF.Builder.CreateICmpULT(CVal, N); 4140 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4141 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4142 // body: 4143 CGF.EmitBlock(BodyBB); 4144 // Iteri = Begini + Counter * Stepi; 4145 CGF.EmitIgnoredExpr(HelperData.Update); 4146 } 4147 } 4148 ~OMPIteratorGeneratorScope() { 4149 if (!E) 4150 return; 4151 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4152 // Counter = Counter + 1; 4153 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4154 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4155 // goto cont; 4156 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4157 // exit: 4158 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4159 } 4160 } 4161 }; 4162 } // namespace 4163 4164 static std::pair<llvm::Value *, llvm::Value *> 4165 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4166 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4167 llvm::Value *Addr; 4168 if (OASE) { 4169 const Expr *Base = OASE->getBase(); 4170 Addr = CGF.EmitScalarExpr(Base); 4171 } else { 4172 Addr = CGF.EmitLValue(E).getPointer(CGF); 4173 } 4174 llvm::Value *SizeVal; 4175 QualType Ty = E->getType(); 4176 if (OASE) { 4177 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4178 for (const Expr *SE : OASE->getDimensions()) { 4179 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4180 Sz = CGF.EmitScalarConversion( 4181 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4182 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4183 } 4184 } else if (const auto *ASE = 4185 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4186 LValue UpAddrLVal = 4187 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4188 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4189 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4190 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4191 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4192 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4193 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4194 } else { 4195 SizeVal = CGF.getTypeSize(Ty); 4196 } 4197 return std::make_pair(Addr, SizeVal); 4198 } 4199 4200 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4201 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4202 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4203 if (KmpTaskAffinityInfoTy.isNull()) { 4204 RecordDecl *KmpAffinityInfoRD = 4205 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4206 KmpAffinityInfoRD->startDefinition(); 4207 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4208 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4209 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4210 KmpAffinityInfoRD->completeDefinition(); 4211 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4212 } 4213 } 4214 4215 CGOpenMPRuntime::TaskResultTy 4216 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4217 const OMPExecutableDirective &D, 4218 llvm::Function *TaskFunction, QualType SharedsTy, 4219 Address Shareds, const OMPTaskDataTy &Data) { 4220 ASTContext &C = CGM.getContext(); 4221 llvm::SmallVector<PrivateDataTy, 4> Privates; 4222 // Aggregate privates and sort them by the alignment. 4223 const auto *I = Data.PrivateCopies.begin(); 4224 for (const Expr *E : Data.PrivateVars) { 4225 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4226 Privates.emplace_back( 4227 C.getDeclAlign(VD), 4228 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4229 /*PrivateElemInit=*/nullptr)); 4230 ++I; 4231 } 4232 I = Data.FirstprivateCopies.begin(); 4233 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4234 for (const Expr *E : Data.FirstprivateVars) { 4235 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4236 Privates.emplace_back( 4237 C.getDeclAlign(VD), 4238 PrivateHelpersTy( 4239 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4240 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4241 ++I; 4242 ++IElemInitRef; 4243 } 4244 I = Data.LastprivateCopies.begin(); 4245 for (const Expr *E : Data.LastprivateVars) { 4246 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4247 Privates.emplace_back( 4248 C.getDeclAlign(VD), 4249 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4250 /*PrivateElemInit=*/nullptr)); 4251 ++I; 4252 } 4253 for (const VarDecl *VD : Data.PrivateLocals) { 4254 if (isAllocatableDecl(VD)) 4255 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4256 else 4257 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4258 } 4259 llvm::stable_sort(Privates, 4260 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4261 return L.first > R.first; 4262 }); 4263 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4264 // Build type kmp_routine_entry_t (if not built yet). 4265 emitKmpRoutineEntryT(KmpInt32Ty); 4266 // Build type kmp_task_t (if not built yet). 4267 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4268 if (SavedKmpTaskloopTQTy.isNull()) { 4269 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4270 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4271 } 4272 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4273 } else { 4274 assert((D.getDirectiveKind() == OMPD_task || 4275 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4276 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4277 "Expected taskloop, task or target directive"); 4278 if (SavedKmpTaskTQTy.isNull()) { 4279 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4280 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4281 } 4282 KmpTaskTQTy = SavedKmpTaskTQTy; 4283 } 4284 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4285 // Build particular struct kmp_task_t for the given task. 4286 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4287 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4288 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4289 QualType KmpTaskTWithPrivatesPtrQTy = 4290 C.getPointerType(KmpTaskTWithPrivatesQTy); 4291 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4292 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4293 KmpTaskTWithPrivatesTy->getPointerTo(); 4294 llvm::Value *KmpTaskTWithPrivatesTySize = 4295 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4296 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4297 4298 // Emit initial values for private copies (if any). 4299 llvm::Value *TaskPrivatesMap = nullptr; 4300 llvm::Type *TaskPrivatesMapTy = 4301 std::next(TaskFunction->arg_begin(), 3)->getType(); 4302 if (!Privates.empty()) { 4303 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4304 TaskPrivatesMap = 4305 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4306 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4307 TaskPrivatesMap, TaskPrivatesMapTy); 4308 } else { 4309 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4310 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4311 } 4312 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4313 // kmp_task_t *tt); 4314 llvm::Function *TaskEntry = emitProxyTaskFunction( 4315 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4316 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4317 TaskPrivatesMap); 4318 4319 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4320 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4321 // kmp_routine_entry_t *task_entry); 4322 // Task flags. Format is taken from 4323 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4324 // description of kmp_tasking_flags struct. 4325 enum { 4326 TiedFlag = 0x1, 4327 FinalFlag = 0x2, 4328 DestructorsFlag = 0x8, 4329 PriorityFlag = 0x20, 4330 DetachableFlag = 0x40, 4331 }; 4332 unsigned Flags = Data.Tied ? TiedFlag : 0; 4333 bool NeedsCleanup = false; 4334 if (!Privates.empty()) { 4335 NeedsCleanup = 4336 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4337 if (NeedsCleanup) 4338 Flags = Flags | DestructorsFlag; 4339 } 4340 if (Data.Priority.getInt()) 4341 Flags = Flags | PriorityFlag; 4342 if (D.hasClausesOfKind<OMPDetachClause>()) 4343 Flags = Flags | DetachableFlag; 4344 llvm::Value *TaskFlags = 4345 Data.Final.getPointer() 4346 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4347 CGF.Builder.getInt32(FinalFlag), 4348 CGF.Builder.getInt32(/*C=*/0)) 4349 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4350 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4351 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4352 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4353 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4354 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4355 TaskEntry, KmpRoutineEntryPtrTy)}; 4356 llvm::Value *NewTask; 4357 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4358 // Check if we have any device clause associated with the directive. 4359 const Expr *Device = nullptr; 4360 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4361 Device = C->getDevice(); 4362 // Emit device ID if any otherwise use default value. 4363 llvm::Value *DeviceID; 4364 if (Device) 4365 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4366 CGF.Int64Ty, /*isSigned=*/true); 4367 else 4368 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4369 AllocArgs.push_back(DeviceID); 4370 NewTask = CGF.EmitRuntimeCall( 4371 OMPBuilder.getOrCreateRuntimeFunction( 4372 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4373 AllocArgs); 4374 } else { 4375 NewTask = 4376 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4377 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4378 AllocArgs); 4379 } 4380 // Emit detach clause initialization. 4381 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4382 // task_descriptor); 4383 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4384 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4385 LValue EvtLVal = CGF.EmitLValue(Evt); 4386 4387 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4388 // int gtid, kmp_task_t *task); 4389 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4390 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4391 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4392 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4393 OMPBuilder.getOrCreateRuntimeFunction( 4394 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4395 {Loc, Tid, NewTask}); 4396 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4397 Evt->getExprLoc()); 4398 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4399 } 4400 // Process affinity clauses. 4401 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4402 // Process list of affinity data. 4403 ASTContext &C = CGM.getContext(); 4404 Address AffinitiesArray = Address::invalid(); 4405 // Calculate number of elements to form the array of affinity data. 4406 llvm::Value *NumOfElements = nullptr; 4407 unsigned NumAffinities = 0; 4408 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4409 if (const Expr *Modifier = C->getModifier()) { 4410 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4411 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4412 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4413 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4414 NumOfElements = 4415 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4416 } 4417 } else { 4418 NumAffinities += C->varlist_size(); 4419 } 4420 } 4421 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4422 // Fields ids in kmp_task_affinity_info record. 4423 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4424 4425 QualType KmpTaskAffinityInfoArrayTy; 4426 if (NumOfElements) { 4427 NumOfElements = CGF.Builder.CreateNUWAdd( 4428 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4429 auto *OVE = new (C) OpaqueValueExpr( 4430 Loc, 4431 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4432 VK_PRValue); 4433 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4434 RValue::get(NumOfElements)); 4435 KmpTaskAffinityInfoArrayTy = 4436 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4437 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4438 // Properly emit variable-sized array. 4439 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4440 ImplicitParamDecl::Other); 4441 CGF.EmitVarDecl(*PD); 4442 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4443 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4444 /*isSigned=*/false); 4445 } else { 4446 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4447 KmpTaskAffinityInfoTy, 4448 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4449 ArrayType::Normal, /*IndexTypeQuals=*/0); 4450 AffinitiesArray = 4451 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4452 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4453 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4454 /*isSigned=*/false); 4455 } 4456 4457 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4458 // Fill array by elements without iterators. 4459 unsigned Pos = 0; 4460 bool HasIterator = false; 4461 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4462 if (C->getModifier()) { 4463 HasIterator = true; 4464 continue; 4465 } 4466 for (const Expr *E : C->varlists()) { 4467 llvm::Value *Addr; 4468 llvm::Value *Size; 4469 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4470 LValue Base = 4471 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4472 KmpTaskAffinityInfoTy); 4473 // affs[i].base_addr = &<Affinities[i].second>; 4474 LValue BaseAddrLVal = CGF.EmitLValueForField( 4475 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4476 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4477 BaseAddrLVal); 4478 // affs[i].len = sizeof(<Affinities[i].second>); 4479 LValue LenLVal = CGF.EmitLValueForField( 4480 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4481 CGF.EmitStoreOfScalar(Size, LenLVal); 4482 ++Pos; 4483 } 4484 } 4485 LValue PosLVal; 4486 if (HasIterator) { 4487 PosLVal = CGF.MakeAddrLValue( 4488 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4489 C.getSizeType()); 4490 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4491 } 4492 // Process elements with iterators. 4493 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4494 const Expr *Modifier = C->getModifier(); 4495 if (!Modifier) 4496 continue; 4497 OMPIteratorGeneratorScope IteratorScope( 4498 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4499 for (const Expr *E : C->varlists()) { 4500 llvm::Value *Addr; 4501 llvm::Value *Size; 4502 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4503 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4504 LValue Base = CGF.MakeAddrLValue( 4505 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4506 // affs[i].base_addr = &<Affinities[i].second>; 4507 LValue BaseAddrLVal = CGF.EmitLValueForField( 4508 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4509 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4510 BaseAddrLVal); 4511 // affs[i].len = sizeof(<Affinities[i].second>); 4512 LValue LenLVal = CGF.EmitLValueForField( 4513 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4514 CGF.EmitStoreOfScalar(Size, LenLVal); 4515 Idx = CGF.Builder.CreateNUWAdd( 4516 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4517 CGF.EmitStoreOfScalar(Idx, PosLVal); 4518 } 4519 } 4520 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4521 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4522 // naffins, kmp_task_affinity_info_t *affin_list); 4523 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4524 llvm::Value *GTid = getThreadID(CGF, Loc); 4525 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4526 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4527 // FIXME: Emit the function and ignore its result for now unless the 4528 // runtime function is properly implemented. 4529 (void)CGF.EmitRuntimeCall( 4530 OMPBuilder.getOrCreateRuntimeFunction( 4531 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4532 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4533 } 4534 llvm::Value *NewTaskNewTaskTTy = 4535 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4536 NewTask, KmpTaskTWithPrivatesPtrTy); 4537 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4538 KmpTaskTWithPrivatesQTy); 4539 LValue TDBase = 4540 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4541 // Fill the data in the resulting kmp_task_t record. 4542 // Copy shareds if there are any. 4543 Address KmpTaskSharedsPtr = Address::invalid(); 4544 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4545 KmpTaskSharedsPtr = Address( 4546 CGF.EmitLoadOfScalar( 4547 CGF.EmitLValueForField( 4548 TDBase, 4549 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4550 Loc), 4551 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4552 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4553 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4554 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4555 } 4556 // Emit initial values for private copies (if any). 4557 TaskResultTy Result; 4558 if (!Privates.empty()) { 4559 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4560 SharedsTy, SharedsPtrTy, Data, Privates, 4561 /*ForDup=*/false); 4562 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4563 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4564 Result.TaskDupFn = emitTaskDupFunction( 4565 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4566 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4567 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4568 } 4569 } 4570 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4571 enum { Priority = 0, Destructors = 1 }; 4572 // Provide pointer to function with destructors for privates. 4573 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4574 const RecordDecl *KmpCmplrdataUD = 4575 (*FI)->getType()->getAsUnionType()->getDecl(); 4576 if (NeedsCleanup) { 4577 llvm::Value *DestructorFn = emitDestructorsFunction( 4578 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4579 KmpTaskTWithPrivatesQTy); 4580 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4581 LValue DestructorsLV = CGF.EmitLValueForField( 4582 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4583 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4584 DestructorFn, KmpRoutineEntryPtrTy), 4585 DestructorsLV); 4586 } 4587 // Set priority. 4588 if (Data.Priority.getInt()) { 4589 LValue Data2LV = CGF.EmitLValueForField( 4590 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4591 LValue PriorityLV = CGF.EmitLValueForField( 4592 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4593 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4594 } 4595 Result.NewTask = NewTask; 4596 Result.TaskEntry = TaskEntry; 4597 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4598 Result.TDBase = TDBase; 4599 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4600 return Result; 4601 } 4602 4603 namespace { 4604 /// Dependence kind for RTL. 4605 enum RTLDependenceKindTy { 4606 DepIn = 0x01, 4607 DepInOut = 0x3, 4608 DepMutexInOutSet = 0x4, 4609 DepInOutSet = 0x8 4610 }; 4611 /// Fields ids in kmp_depend_info record. 4612 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4613 } // namespace 4614 4615 /// Translates internal dependency kind into the runtime kind. 4616 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4617 RTLDependenceKindTy DepKind; 4618 switch (K) { 4619 case OMPC_DEPEND_in: 4620 DepKind = DepIn; 4621 break; 4622 // Out and InOut dependencies must use the same code. 4623 case OMPC_DEPEND_out: 4624 case OMPC_DEPEND_inout: 4625 DepKind = DepInOut; 4626 break; 4627 case OMPC_DEPEND_mutexinoutset: 4628 DepKind = DepMutexInOutSet; 4629 break; 4630 case OMPC_DEPEND_inoutset: 4631 DepKind = DepInOutSet; 4632 break; 4633 case OMPC_DEPEND_source: 4634 case OMPC_DEPEND_sink: 4635 case OMPC_DEPEND_depobj: 4636 case OMPC_DEPEND_unknown: 4637 llvm_unreachable("Unknown task dependence type"); 4638 } 4639 return DepKind; 4640 } 4641 4642 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4643 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4644 QualType &FlagsTy) { 4645 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4646 if (KmpDependInfoTy.isNull()) { 4647 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4648 KmpDependInfoRD->startDefinition(); 4649 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4650 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4651 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4652 KmpDependInfoRD->completeDefinition(); 4653 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4654 } 4655 } 4656 4657 std::pair<llvm::Value *, LValue> 4658 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4659 SourceLocation Loc) { 4660 ASTContext &C = CGM.getContext(); 4661 QualType FlagsTy; 4662 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4663 RecordDecl *KmpDependInfoRD = 4664 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4665 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4666 LValue Base = CGF.EmitLoadOfPointerLValue( 4667 CGF.Builder.CreateElementBitCast( 4668 DepobjLVal.getAddress(CGF), 4669 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4670 KmpDependInfoPtrTy->castAs<PointerType>()); 4671 Address DepObjAddr = CGF.Builder.CreateGEP( 4672 Base.getAddress(CGF), 4673 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4674 LValue NumDepsBase = CGF.MakeAddrLValue( 4675 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4676 // NumDeps = deps[i].base_addr; 4677 LValue BaseAddrLVal = CGF.EmitLValueForField( 4678 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4679 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4680 return std::make_pair(NumDeps, Base); 4681 } 4682 4683 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4684 llvm::PointerUnion<unsigned *, LValue *> Pos, 4685 const OMPTaskDataTy::DependData &Data, 4686 Address DependenciesArray) { 4687 CodeGenModule &CGM = CGF.CGM; 4688 ASTContext &C = CGM.getContext(); 4689 QualType FlagsTy; 4690 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4691 RecordDecl *KmpDependInfoRD = 4692 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4693 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4694 4695 OMPIteratorGeneratorScope IteratorScope( 4696 CGF, cast_or_null<OMPIteratorExpr>( 4697 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4698 : nullptr)); 4699 for (const Expr *E : Data.DepExprs) { 4700 llvm::Value *Addr; 4701 llvm::Value *Size; 4702 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4703 LValue Base; 4704 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4705 Base = CGF.MakeAddrLValue( 4706 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4707 } else { 4708 LValue &PosLVal = *Pos.get<LValue *>(); 4709 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4710 Base = CGF.MakeAddrLValue( 4711 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4712 } 4713 // deps[i].base_addr = &<Dependencies[i].second>; 4714 LValue BaseAddrLVal = CGF.EmitLValueForField( 4715 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4716 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4717 BaseAddrLVal); 4718 // deps[i].len = sizeof(<Dependencies[i].second>); 4719 LValue LenLVal = CGF.EmitLValueForField( 4720 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4721 CGF.EmitStoreOfScalar(Size, LenLVal); 4722 // deps[i].flags = <Dependencies[i].first>; 4723 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4724 LValue FlagsLVal = CGF.EmitLValueForField( 4725 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4726 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4727 FlagsLVal); 4728 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4729 ++(*P); 4730 } else { 4731 LValue &PosLVal = *Pos.get<LValue *>(); 4732 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4733 Idx = CGF.Builder.CreateNUWAdd(Idx, 4734 llvm::ConstantInt::get(Idx->getType(), 1)); 4735 CGF.EmitStoreOfScalar(Idx, PosLVal); 4736 } 4737 } 4738 } 4739 4740 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4741 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4742 const OMPTaskDataTy::DependData &Data) { 4743 assert(Data.DepKind == OMPC_DEPEND_depobj && 4744 "Expected depobj dependecy kind."); 4745 SmallVector<llvm::Value *, 4> Sizes; 4746 SmallVector<LValue, 4> SizeLVals; 4747 ASTContext &C = CGF.getContext(); 4748 { 4749 OMPIteratorGeneratorScope IteratorScope( 4750 CGF, cast_or_null<OMPIteratorExpr>( 4751 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4752 : nullptr)); 4753 for (const Expr *E : Data.DepExprs) { 4754 llvm::Value *NumDeps; 4755 LValue Base; 4756 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4757 std::tie(NumDeps, Base) = 4758 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4759 LValue NumLVal = CGF.MakeAddrLValue( 4760 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4761 C.getUIntPtrType()); 4762 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4763 NumLVal.getAddress(CGF)); 4764 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4765 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4766 CGF.EmitStoreOfScalar(Add, NumLVal); 4767 SizeLVals.push_back(NumLVal); 4768 } 4769 } 4770 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4771 llvm::Value *Size = 4772 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4773 Sizes.push_back(Size); 4774 } 4775 return Sizes; 4776 } 4777 4778 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4779 QualType &KmpDependInfoTy, 4780 LValue PosLVal, 4781 const OMPTaskDataTy::DependData &Data, 4782 Address DependenciesArray) { 4783 assert(Data.DepKind == OMPC_DEPEND_depobj && 4784 "Expected depobj dependecy kind."); 4785 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4786 { 4787 OMPIteratorGeneratorScope IteratorScope( 4788 CGF, cast_or_null<OMPIteratorExpr>( 4789 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4790 : nullptr)); 4791 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4792 const Expr *E = Data.DepExprs[I]; 4793 llvm::Value *NumDeps; 4794 LValue Base; 4795 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4796 std::tie(NumDeps, Base) = 4797 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4798 4799 // memcopy dependency data. 4800 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4801 ElSize, 4802 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4803 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4804 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4805 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4806 4807 // Increase pos. 4808 // pos += size; 4809 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4810 CGF.EmitStoreOfScalar(Add, PosLVal); 4811 } 4812 } 4813 } 4814 4815 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4816 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4817 SourceLocation Loc) { 4818 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4819 return D.DepExprs.empty(); 4820 })) 4821 return std::make_pair(nullptr, Address::invalid()); 4822 // Process list of dependencies. 4823 ASTContext &C = CGM.getContext(); 4824 Address DependenciesArray = Address::invalid(); 4825 llvm::Value *NumOfElements = nullptr; 4826 unsigned NumDependencies = std::accumulate( 4827 Dependencies.begin(), Dependencies.end(), 0, 4828 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4829 return D.DepKind == OMPC_DEPEND_depobj 4830 ? V 4831 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4832 }); 4833 QualType FlagsTy; 4834 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4835 bool HasDepobjDeps = false; 4836 bool HasRegularWithIterators = false; 4837 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4838 llvm::Value *NumOfRegularWithIterators = 4839 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4840 // Calculate number of depobj dependecies and regular deps with the iterators. 4841 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4842 if (D.DepKind == OMPC_DEPEND_depobj) { 4843 SmallVector<llvm::Value *, 4> Sizes = 4844 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4845 for (llvm::Value *Size : Sizes) { 4846 NumOfDepobjElements = 4847 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4848 } 4849 HasDepobjDeps = true; 4850 continue; 4851 } 4852 // Include number of iterations, if any. 4853 4854 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4855 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4856 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4857 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4858 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4859 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4860 NumOfRegularWithIterators = 4861 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4862 } 4863 HasRegularWithIterators = true; 4864 continue; 4865 } 4866 } 4867 4868 QualType KmpDependInfoArrayTy; 4869 if (HasDepobjDeps || HasRegularWithIterators) { 4870 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4871 /*isSigned=*/false); 4872 if (HasDepobjDeps) { 4873 NumOfElements = 4874 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4875 } 4876 if (HasRegularWithIterators) { 4877 NumOfElements = 4878 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4879 } 4880 auto *OVE = new (C) OpaqueValueExpr( 4881 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4882 VK_PRValue); 4883 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4884 RValue::get(NumOfElements)); 4885 KmpDependInfoArrayTy = 4886 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4887 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4888 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4889 // Properly emit variable-sized array. 4890 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4891 ImplicitParamDecl::Other); 4892 CGF.EmitVarDecl(*PD); 4893 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4894 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4895 /*isSigned=*/false); 4896 } else { 4897 KmpDependInfoArrayTy = C.getConstantArrayType( 4898 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4899 ArrayType::Normal, /*IndexTypeQuals=*/0); 4900 DependenciesArray = 4901 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4902 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4903 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4904 /*isSigned=*/false); 4905 } 4906 unsigned Pos = 0; 4907 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4908 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4909 Dependencies[I].IteratorExpr) 4910 continue; 4911 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4912 DependenciesArray); 4913 } 4914 // Copy regular dependecies with iterators. 4915 LValue PosLVal = CGF.MakeAddrLValue( 4916 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4917 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4918 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4919 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4920 !Dependencies[I].IteratorExpr) 4921 continue; 4922 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4923 DependenciesArray); 4924 } 4925 // Copy final depobj arrays without iterators. 4926 if (HasDepobjDeps) { 4927 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4928 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4929 continue; 4930 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4931 DependenciesArray); 4932 } 4933 } 4934 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4935 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4936 return std::make_pair(NumOfElements, DependenciesArray); 4937 } 4938 4939 Address CGOpenMPRuntime::emitDepobjDependClause( 4940 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4941 SourceLocation Loc) { 4942 if (Dependencies.DepExprs.empty()) 4943 return Address::invalid(); 4944 // Process list of dependencies. 4945 ASTContext &C = CGM.getContext(); 4946 Address DependenciesArray = Address::invalid(); 4947 unsigned NumDependencies = Dependencies.DepExprs.size(); 4948 QualType FlagsTy; 4949 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4950 RecordDecl *KmpDependInfoRD = 4951 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4952 4953 llvm::Value *Size; 4954 // Define type kmp_depend_info[<Dependencies.size()>]; 4955 // For depobj reserve one extra element to store the number of elements. 4956 // It is required to handle depobj(x) update(in) construct. 4957 // kmp_depend_info[<Dependencies.size()>] deps; 4958 llvm::Value *NumDepsVal; 4959 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4960 if (const auto *IE = 4961 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4962 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4963 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4964 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4965 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4966 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4967 } 4968 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4969 NumDepsVal); 4970 CharUnits SizeInBytes = 4971 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4972 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4973 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4974 NumDepsVal = 4975 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4976 } else { 4977 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4978 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4979 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4980 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4981 Size = CGM.getSize(Sz.alignTo(Align)); 4982 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4983 } 4984 // Need to allocate on the dynamic memory. 4985 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4986 // Use default allocator. 4987 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4988 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4989 4990 llvm::Value *Addr = 4991 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4992 CGM.getModule(), OMPRTL___kmpc_alloc), 4993 Args, ".dep.arr.addr"); 4994 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4995 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4996 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4997 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4998 // Write number of elements in the first element of array for depobj. 4999 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5000 // deps[i].base_addr = NumDependencies; 5001 LValue BaseAddrLVal = CGF.EmitLValueForField( 5002 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5003 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5004 llvm::PointerUnion<unsigned *, LValue *> Pos; 5005 unsigned Idx = 1; 5006 LValue PosLVal; 5007 if (Dependencies.IteratorExpr) { 5008 PosLVal = CGF.MakeAddrLValue( 5009 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5010 C.getSizeType()); 5011 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5012 /*IsInit=*/true); 5013 Pos = &PosLVal; 5014 } else { 5015 Pos = &Idx; 5016 } 5017 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5018 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5019 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 5020 CGF.Int8Ty); 5021 return DependenciesArray; 5022 } 5023 5024 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5025 SourceLocation Loc) { 5026 ASTContext &C = CGM.getContext(); 5027 QualType FlagsTy; 5028 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5029 LValue Base = CGF.EmitLoadOfPointerLValue( 5030 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 5031 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5032 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5033 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 5034 CGF.ConvertTypeForMem(KmpDependInfoTy)); 5035 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5036 Addr.getElementType(), Addr.getPointer(), 5037 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5038 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5039 CGF.VoidPtrTy); 5040 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5041 // Use default allocator. 5042 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5043 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5044 5045 // _kmpc_free(gtid, addr, nullptr); 5046 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5047 CGM.getModule(), OMPRTL___kmpc_free), 5048 Args); 5049 } 5050 5051 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5052 OpenMPDependClauseKind NewDepKind, 5053 SourceLocation Loc) { 5054 ASTContext &C = CGM.getContext(); 5055 QualType FlagsTy; 5056 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5057 RecordDecl *KmpDependInfoRD = 5058 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5059 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5060 llvm::Value *NumDeps; 5061 LValue Base; 5062 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5063 5064 Address Begin = Base.getAddress(CGF); 5065 // Cast from pointer to array type to pointer to single element. 5066 llvm::Value *End = CGF.Builder.CreateGEP( 5067 Begin.getElementType(), Begin.getPointer(), NumDeps); 5068 // The basic structure here is a while-do loop. 5069 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5070 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5071 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5072 CGF.EmitBlock(BodyBB); 5073 llvm::PHINode *ElementPHI = 5074 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5075 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5076 Begin = Begin.withPointer(ElementPHI); 5077 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5078 Base.getTBAAInfo()); 5079 // deps[i].flags = NewDepKind; 5080 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5081 LValue FlagsLVal = CGF.EmitLValueForField( 5082 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5083 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5084 FlagsLVal); 5085 5086 // Shift the address forward by one element. 5087 Address ElementNext = 5088 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5089 ElementPHI->addIncoming(ElementNext.getPointer(), 5090 CGF.Builder.GetInsertBlock()); 5091 llvm::Value *IsEmpty = 5092 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5093 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5094 // Done. 5095 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5096 } 5097 5098 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5099 const OMPExecutableDirective &D, 5100 llvm::Function *TaskFunction, 5101 QualType SharedsTy, Address Shareds, 5102 const Expr *IfCond, 5103 const OMPTaskDataTy &Data) { 5104 if (!CGF.HaveInsertPoint()) 5105 return; 5106 5107 TaskResultTy Result = 5108 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5109 llvm::Value *NewTask = Result.NewTask; 5110 llvm::Function *TaskEntry = Result.TaskEntry; 5111 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5112 LValue TDBase = Result.TDBase; 5113 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5114 // Process list of dependences. 5115 Address DependenciesArray = Address::invalid(); 5116 llvm::Value *NumOfElements; 5117 std::tie(NumOfElements, DependenciesArray) = 5118 emitDependClause(CGF, Data.Dependences, Loc); 5119 5120 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5121 // libcall. 5122 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5123 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5124 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5125 // list is not empty 5126 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5127 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5128 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5129 llvm::Value *DepTaskArgs[7]; 5130 if (!Data.Dependences.empty()) { 5131 DepTaskArgs[0] = UpLoc; 5132 DepTaskArgs[1] = ThreadID; 5133 DepTaskArgs[2] = NewTask; 5134 DepTaskArgs[3] = NumOfElements; 5135 DepTaskArgs[4] = DependenciesArray.getPointer(); 5136 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5137 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5138 } 5139 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5140 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5141 if (!Data.Tied) { 5142 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5143 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5144 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5145 } 5146 if (!Data.Dependences.empty()) { 5147 CGF.EmitRuntimeCall( 5148 OMPBuilder.getOrCreateRuntimeFunction( 5149 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5150 DepTaskArgs); 5151 } else { 5152 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5153 CGM.getModule(), OMPRTL___kmpc_omp_task), 5154 TaskArgs); 5155 } 5156 // Check if parent region is untied and build return for untied task; 5157 if (auto *Region = 5158 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5159 Region->emitUntiedSwitch(CGF); 5160 }; 5161 5162 llvm::Value *DepWaitTaskArgs[6]; 5163 if (!Data.Dependences.empty()) { 5164 DepWaitTaskArgs[0] = UpLoc; 5165 DepWaitTaskArgs[1] = ThreadID; 5166 DepWaitTaskArgs[2] = NumOfElements; 5167 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5168 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5169 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5170 } 5171 auto &M = CGM.getModule(); 5172 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5173 TaskEntry, &Data, &DepWaitTaskArgs, 5174 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5175 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5176 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5177 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5178 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5179 // is specified. 5180 if (!Data.Dependences.empty()) 5181 CGF.EmitRuntimeCall( 5182 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5183 DepWaitTaskArgs); 5184 // Call proxy_task_entry(gtid, new_task); 5185 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5186 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5187 Action.Enter(CGF); 5188 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5189 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5190 OutlinedFnArgs); 5191 }; 5192 5193 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5194 // kmp_task_t *new_task); 5195 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5196 // kmp_task_t *new_task); 5197 RegionCodeGenTy RCG(CodeGen); 5198 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5199 M, OMPRTL___kmpc_omp_task_begin_if0), 5200 TaskArgs, 5201 OMPBuilder.getOrCreateRuntimeFunction( 5202 M, OMPRTL___kmpc_omp_task_complete_if0), 5203 TaskArgs); 5204 RCG.setAction(Action); 5205 RCG(CGF); 5206 }; 5207 5208 if (IfCond) { 5209 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5210 } else { 5211 RegionCodeGenTy ThenRCG(ThenCodeGen); 5212 ThenRCG(CGF); 5213 } 5214 } 5215 5216 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5217 const OMPLoopDirective &D, 5218 llvm::Function *TaskFunction, 5219 QualType SharedsTy, Address Shareds, 5220 const Expr *IfCond, 5221 const OMPTaskDataTy &Data) { 5222 if (!CGF.HaveInsertPoint()) 5223 return; 5224 TaskResultTy Result = 5225 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5226 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5227 // libcall. 5228 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5229 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5230 // sched, kmp_uint64 grainsize, void *task_dup); 5231 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5232 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5233 llvm::Value *IfVal; 5234 if (IfCond) { 5235 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5236 /*isSigned=*/true); 5237 } else { 5238 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5239 } 5240 5241 LValue LBLVal = CGF.EmitLValueForField( 5242 Result.TDBase, 5243 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5244 const auto *LBVar = 5245 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5246 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5247 LBLVal.getQuals(), 5248 /*IsInitializer=*/true); 5249 LValue UBLVal = CGF.EmitLValueForField( 5250 Result.TDBase, 5251 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5252 const auto *UBVar = 5253 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5254 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5255 UBLVal.getQuals(), 5256 /*IsInitializer=*/true); 5257 LValue StLVal = CGF.EmitLValueForField( 5258 Result.TDBase, 5259 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5260 const auto *StVar = 5261 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5262 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5263 StLVal.getQuals(), 5264 /*IsInitializer=*/true); 5265 // Store reductions address. 5266 LValue RedLVal = CGF.EmitLValueForField( 5267 Result.TDBase, 5268 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5269 if (Data.Reductions) { 5270 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5271 } else { 5272 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5273 CGF.getContext().VoidPtrTy); 5274 } 5275 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5276 llvm::Value *TaskArgs[] = { 5277 UpLoc, 5278 ThreadID, 5279 Result.NewTask, 5280 IfVal, 5281 LBLVal.getPointer(CGF), 5282 UBLVal.getPointer(CGF), 5283 CGF.EmitLoadOfScalar(StLVal, Loc), 5284 llvm::ConstantInt::getSigned( 5285 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5286 llvm::ConstantInt::getSigned( 5287 CGF.IntTy, Data.Schedule.getPointer() 5288 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5289 : NoSchedule), 5290 Data.Schedule.getPointer() 5291 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5292 /*isSigned=*/false) 5293 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5294 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5295 Result.TaskDupFn, CGF.VoidPtrTy) 5296 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5298 CGM.getModule(), OMPRTL___kmpc_taskloop), 5299 TaskArgs); 5300 } 5301 5302 /// Emit reduction operation for each element of array (required for 5303 /// array sections) LHS op = RHS. 5304 /// \param Type Type of array. 5305 /// \param LHSVar Variable on the left side of the reduction operation 5306 /// (references element of array in original variable). 5307 /// \param RHSVar Variable on the right side of the reduction operation 5308 /// (references element of array in original variable). 5309 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5310 /// RHSVar. 5311 static void EmitOMPAggregateReduction( 5312 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5313 const VarDecl *RHSVar, 5314 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5315 const Expr *, const Expr *)> &RedOpGen, 5316 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5317 const Expr *UpExpr = nullptr) { 5318 // Perform element-by-element initialization. 5319 QualType ElementTy; 5320 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5321 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5322 5323 // Drill down to the base element type on both arrays. 5324 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5325 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5326 5327 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5328 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5329 // Cast from pointer to array type to pointer to single element. 5330 llvm::Value *LHSEnd = 5331 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5332 // The basic structure here is a while-do loop. 5333 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5334 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5335 llvm::Value *IsEmpty = 5336 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5337 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5338 5339 // Enter the loop body, making that address the current address. 5340 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5341 CGF.EmitBlock(BodyBB); 5342 5343 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5344 5345 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5346 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5347 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5348 Address RHSElementCurrent( 5349 RHSElementPHI, RHSAddr.getElementType(), 5350 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5351 5352 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5353 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5354 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5355 Address LHSElementCurrent( 5356 LHSElementPHI, LHSAddr.getElementType(), 5357 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5358 5359 // Emit copy. 5360 CodeGenFunction::OMPPrivateScope Scope(CGF); 5361 Scope.addPrivate(LHSVar, LHSElementCurrent); 5362 Scope.addPrivate(RHSVar, RHSElementCurrent); 5363 Scope.Privatize(); 5364 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5365 Scope.ForceCleanup(); 5366 5367 // Shift the address forward by one element. 5368 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5369 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5370 "omp.arraycpy.dest.element"); 5371 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5372 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5373 "omp.arraycpy.src.element"); 5374 // Check whether we've reached the end. 5375 llvm::Value *Done = 5376 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5377 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5378 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5379 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5380 5381 // Done. 5382 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5383 } 5384 5385 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5386 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5387 /// UDR combiner function. 5388 static void emitReductionCombiner(CodeGenFunction &CGF, 5389 const Expr *ReductionOp) { 5390 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5391 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5392 if (const auto *DRE = 5393 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5394 if (const auto *DRD = 5395 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5396 std::pair<llvm::Function *, llvm::Function *> Reduction = 5397 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5398 RValue Func = RValue::get(Reduction.first); 5399 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5400 CGF.EmitIgnoredExpr(ReductionOp); 5401 return; 5402 } 5403 CGF.EmitIgnoredExpr(ReductionOp); 5404 } 5405 5406 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5407 SourceLocation Loc, llvm::Type *ArgsElemType, 5408 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5409 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5410 ASTContext &C = CGM.getContext(); 5411 5412 // void reduction_func(void *LHSArg, void *RHSArg); 5413 FunctionArgList Args; 5414 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5415 ImplicitParamDecl::Other); 5416 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5417 ImplicitParamDecl::Other); 5418 Args.push_back(&LHSArg); 5419 Args.push_back(&RHSArg); 5420 const auto &CGFI = 5421 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5422 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5423 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5424 llvm::GlobalValue::InternalLinkage, Name, 5425 &CGM.getModule()); 5426 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5427 Fn->setDoesNotRecurse(); 5428 CodeGenFunction CGF(CGM); 5429 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5430 5431 // Dst = (void*[n])(LHSArg); 5432 // Src = (void*[n])(RHSArg); 5433 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5434 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5435 ArgsElemType->getPointerTo()), 5436 ArgsElemType, CGF.getPointerAlign()); 5437 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5438 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5439 ArgsElemType->getPointerTo()), 5440 ArgsElemType, CGF.getPointerAlign()); 5441 5442 // ... 5443 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5444 // ... 5445 CodeGenFunction::OMPPrivateScope Scope(CGF); 5446 const auto *IPriv = Privates.begin(); 5447 unsigned Idx = 0; 5448 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5449 const auto *RHSVar = 5450 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5451 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5452 const auto *LHSVar = 5453 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5454 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5455 QualType PrivTy = (*IPriv)->getType(); 5456 if (PrivTy->isVariablyModifiedType()) { 5457 // Get array size and emit VLA type. 5458 ++Idx; 5459 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5460 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5461 const VariableArrayType *VLA = 5462 CGF.getContext().getAsVariableArrayType(PrivTy); 5463 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5464 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5465 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5466 CGF.EmitVariablyModifiedType(PrivTy); 5467 } 5468 } 5469 Scope.Privatize(); 5470 IPriv = Privates.begin(); 5471 const auto *ILHS = LHSExprs.begin(); 5472 const auto *IRHS = RHSExprs.begin(); 5473 for (const Expr *E : ReductionOps) { 5474 if ((*IPriv)->getType()->isArrayType()) { 5475 // Emit reduction for array section. 5476 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5477 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5478 EmitOMPAggregateReduction( 5479 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5480 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5481 emitReductionCombiner(CGF, E); 5482 }); 5483 } else { 5484 // Emit reduction for array subscript or single variable. 5485 emitReductionCombiner(CGF, E); 5486 } 5487 ++IPriv; 5488 ++ILHS; 5489 ++IRHS; 5490 } 5491 Scope.ForceCleanup(); 5492 CGF.FinishFunction(); 5493 return Fn; 5494 } 5495 5496 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5497 const Expr *ReductionOp, 5498 const Expr *PrivateRef, 5499 const DeclRefExpr *LHS, 5500 const DeclRefExpr *RHS) { 5501 if (PrivateRef->getType()->isArrayType()) { 5502 // Emit reduction for array section. 5503 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5504 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5505 EmitOMPAggregateReduction( 5506 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5507 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5508 emitReductionCombiner(CGF, ReductionOp); 5509 }); 5510 } else { 5511 // Emit reduction for array subscript or single variable. 5512 emitReductionCombiner(CGF, ReductionOp); 5513 } 5514 } 5515 5516 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5517 ArrayRef<const Expr *> Privates, 5518 ArrayRef<const Expr *> LHSExprs, 5519 ArrayRef<const Expr *> RHSExprs, 5520 ArrayRef<const Expr *> ReductionOps, 5521 ReductionOptionsTy Options) { 5522 if (!CGF.HaveInsertPoint()) 5523 return; 5524 5525 bool WithNowait = Options.WithNowait; 5526 bool SimpleReduction = Options.SimpleReduction; 5527 5528 // Next code should be emitted for reduction: 5529 // 5530 // static kmp_critical_name lock = { 0 }; 5531 // 5532 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5533 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5534 // ... 5535 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5536 // *(Type<n>-1*)rhs[<n>-1]); 5537 // } 5538 // 5539 // ... 5540 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5541 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5542 // RedList, reduce_func, &<lock>)) { 5543 // case 1: 5544 // ... 5545 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5546 // ... 5547 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5548 // break; 5549 // case 2: 5550 // ... 5551 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5552 // ... 5553 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5554 // break; 5555 // default:; 5556 // } 5557 // 5558 // if SimpleReduction is true, only the next code is generated: 5559 // ... 5560 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5561 // ... 5562 5563 ASTContext &C = CGM.getContext(); 5564 5565 if (SimpleReduction) { 5566 CodeGenFunction::RunCleanupsScope Scope(CGF); 5567 const auto *IPriv = Privates.begin(); 5568 const auto *ILHS = LHSExprs.begin(); 5569 const auto *IRHS = RHSExprs.begin(); 5570 for (const Expr *E : ReductionOps) { 5571 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5572 cast<DeclRefExpr>(*IRHS)); 5573 ++IPriv; 5574 ++ILHS; 5575 ++IRHS; 5576 } 5577 return; 5578 } 5579 5580 // 1. Build a list of reduction variables. 5581 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5582 auto Size = RHSExprs.size(); 5583 for (const Expr *E : Privates) { 5584 if (E->getType()->isVariablyModifiedType()) 5585 // Reserve place for array size. 5586 ++Size; 5587 } 5588 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5589 QualType ReductionArrayTy = 5590 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5591 /*IndexTypeQuals=*/0); 5592 Address ReductionList = 5593 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5594 const auto *IPriv = Privates.begin(); 5595 unsigned Idx = 0; 5596 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5597 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5598 CGF.Builder.CreateStore( 5599 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5600 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5601 Elem); 5602 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5603 // Store array size. 5604 ++Idx; 5605 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5606 llvm::Value *Size = CGF.Builder.CreateIntCast( 5607 CGF.getVLASize( 5608 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5609 .NumElts, 5610 CGF.SizeTy, /*isSigned=*/false); 5611 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5612 Elem); 5613 } 5614 } 5615 5616 // 2. Emit reduce_func(). 5617 llvm::Function *ReductionFn = 5618 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5619 Privates, LHSExprs, RHSExprs, ReductionOps); 5620 5621 // 3. Create static kmp_critical_name lock = { 0 }; 5622 std::string Name = getName({"reduction"}); 5623 llvm::Value *Lock = getCriticalRegionLock(Name); 5624 5625 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5626 // RedList, reduce_func, &<lock>); 5627 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5628 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5629 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5630 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5631 ReductionList.getPointer(), CGF.VoidPtrTy); 5632 llvm::Value *Args[] = { 5633 IdentTLoc, // ident_t *<loc> 5634 ThreadId, // i32 <gtid> 5635 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5636 ReductionArrayTySize, // size_type sizeof(RedList) 5637 RL, // void *RedList 5638 ReductionFn, // void (*) (void *, void *) <reduce_func> 5639 Lock // kmp_critical_name *&<lock> 5640 }; 5641 llvm::Value *Res = CGF.EmitRuntimeCall( 5642 OMPBuilder.getOrCreateRuntimeFunction( 5643 CGM.getModule(), 5644 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5645 Args); 5646 5647 // 5. Build switch(res) 5648 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5649 llvm::SwitchInst *SwInst = 5650 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5651 5652 // 6. Build case 1: 5653 // ... 5654 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5655 // ... 5656 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5657 // break; 5658 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5659 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5660 CGF.EmitBlock(Case1BB); 5661 5662 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5663 llvm::Value *EndArgs[] = { 5664 IdentTLoc, // ident_t *<loc> 5665 ThreadId, // i32 <gtid> 5666 Lock // kmp_critical_name *&<lock> 5667 }; 5668 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5669 CodeGenFunction &CGF, PrePostActionTy &Action) { 5670 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5671 const auto *IPriv = Privates.begin(); 5672 const auto *ILHS = LHSExprs.begin(); 5673 const auto *IRHS = RHSExprs.begin(); 5674 for (const Expr *E : ReductionOps) { 5675 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5676 cast<DeclRefExpr>(*IRHS)); 5677 ++IPriv; 5678 ++ILHS; 5679 ++IRHS; 5680 } 5681 }; 5682 RegionCodeGenTy RCG(CodeGen); 5683 CommonActionTy Action( 5684 nullptr, llvm::None, 5685 OMPBuilder.getOrCreateRuntimeFunction( 5686 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5687 : OMPRTL___kmpc_end_reduce), 5688 EndArgs); 5689 RCG.setAction(Action); 5690 RCG(CGF); 5691 5692 CGF.EmitBranch(DefaultBB); 5693 5694 // 7. Build case 2: 5695 // ... 5696 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5697 // ... 5698 // break; 5699 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5700 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5701 CGF.EmitBlock(Case2BB); 5702 5703 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5704 CodeGenFunction &CGF, PrePostActionTy &Action) { 5705 const auto *ILHS = LHSExprs.begin(); 5706 const auto *IRHS = RHSExprs.begin(); 5707 const auto *IPriv = Privates.begin(); 5708 for (const Expr *E : ReductionOps) { 5709 const Expr *XExpr = nullptr; 5710 const Expr *EExpr = nullptr; 5711 const Expr *UpExpr = nullptr; 5712 BinaryOperatorKind BO = BO_Comma; 5713 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5714 if (BO->getOpcode() == BO_Assign) { 5715 XExpr = BO->getLHS(); 5716 UpExpr = BO->getRHS(); 5717 } 5718 } 5719 // Try to emit update expression as a simple atomic. 5720 const Expr *RHSExpr = UpExpr; 5721 if (RHSExpr) { 5722 // Analyze RHS part of the whole expression. 5723 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5724 RHSExpr->IgnoreParenImpCasts())) { 5725 // If this is a conditional operator, analyze its condition for 5726 // min/max reduction operator. 5727 RHSExpr = ACO->getCond(); 5728 } 5729 if (const auto *BORHS = 5730 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5731 EExpr = BORHS->getRHS(); 5732 BO = BORHS->getOpcode(); 5733 } 5734 } 5735 if (XExpr) { 5736 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5737 auto &&AtomicRedGen = [BO, VD, 5738 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5739 const Expr *EExpr, const Expr *UpExpr) { 5740 LValue X = CGF.EmitLValue(XExpr); 5741 RValue E; 5742 if (EExpr) 5743 E = CGF.EmitAnyExpr(EExpr); 5744 CGF.EmitOMPAtomicSimpleUpdateExpr( 5745 X, E, BO, /*IsXLHSInRHSPart=*/true, 5746 llvm::AtomicOrdering::Monotonic, Loc, 5747 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5748 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5749 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5750 CGF.emitOMPSimpleStore( 5751 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5752 VD->getType().getNonReferenceType(), Loc); 5753 PrivateScope.addPrivate(VD, LHSTemp); 5754 (void)PrivateScope.Privatize(); 5755 return CGF.EmitAnyExpr(UpExpr); 5756 }); 5757 }; 5758 if ((*IPriv)->getType()->isArrayType()) { 5759 // Emit atomic reduction for array section. 5760 const auto *RHSVar = 5761 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5762 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5763 AtomicRedGen, XExpr, EExpr, UpExpr); 5764 } else { 5765 // Emit atomic reduction for array subscript or single variable. 5766 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5767 } 5768 } else { 5769 // Emit as a critical region. 5770 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5771 const Expr *, const Expr *) { 5772 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5773 std::string Name = RT.getName({"atomic_reduction"}); 5774 RT.emitCriticalRegion( 5775 CGF, Name, 5776 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5777 Action.Enter(CGF); 5778 emitReductionCombiner(CGF, E); 5779 }, 5780 Loc); 5781 }; 5782 if ((*IPriv)->getType()->isArrayType()) { 5783 const auto *LHSVar = 5784 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5785 const auto *RHSVar = 5786 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5787 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5788 CritRedGen); 5789 } else { 5790 CritRedGen(CGF, nullptr, nullptr, nullptr); 5791 } 5792 } 5793 ++ILHS; 5794 ++IRHS; 5795 ++IPriv; 5796 } 5797 }; 5798 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5799 if (!WithNowait) { 5800 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5801 llvm::Value *EndArgs[] = { 5802 IdentTLoc, // ident_t *<loc> 5803 ThreadId, // i32 <gtid> 5804 Lock // kmp_critical_name *&<lock> 5805 }; 5806 CommonActionTy Action(nullptr, llvm::None, 5807 OMPBuilder.getOrCreateRuntimeFunction( 5808 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5809 EndArgs); 5810 AtomicRCG.setAction(Action); 5811 AtomicRCG(CGF); 5812 } else { 5813 AtomicRCG(CGF); 5814 } 5815 5816 CGF.EmitBranch(DefaultBB); 5817 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5818 } 5819 5820 /// Generates unique name for artificial threadprivate variables. 5821 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5822 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5823 const Expr *Ref) { 5824 SmallString<256> Buffer; 5825 llvm::raw_svector_ostream Out(Buffer); 5826 const clang::DeclRefExpr *DE; 5827 const VarDecl *D = ::getBaseDecl(Ref, DE); 5828 if (!D) 5829 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5830 D = D->getCanonicalDecl(); 5831 std::string Name = CGM.getOpenMPRuntime().getName( 5832 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5833 Out << Prefix << Name << "_" 5834 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5835 return std::string(Out.str()); 5836 } 5837 5838 /// Emits reduction initializer function: 5839 /// \code 5840 /// void @.red_init(void* %arg, void* %orig) { 5841 /// %0 = bitcast void* %arg to <type>* 5842 /// store <type> <init>, <type>* %0 5843 /// ret void 5844 /// } 5845 /// \endcode 5846 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5847 SourceLocation Loc, 5848 ReductionCodeGen &RCG, unsigned N) { 5849 ASTContext &C = CGM.getContext(); 5850 QualType VoidPtrTy = C.VoidPtrTy; 5851 VoidPtrTy.addRestrict(); 5852 FunctionArgList Args; 5853 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5854 ImplicitParamDecl::Other); 5855 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5856 ImplicitParamDecl::Other); 5857 Args.emplace_back(&Param); 5858 Args.emplace_back(&ParamOrig); 5859 const auto &FnInfo = 5860 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5861 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5862 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5863 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5864 Name, &CGM.getModule()); 5865 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5866 Fn->setDoesNotRecurse(); 5867 CodeGenFunction CGF(CGM); 5868 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5869 QualType PrivateType = RCG.getPrivateType(N); 5870 Address PrivateAddr = CGF.EmitLoadOfPointer( 5871 CGF.Builder.CreateElementBitCast( 5872 CGF.GetAddrOfLocalVar(&Param), 5873 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5874 C.getPointerType(PrivateType)->castAs<PointerType>()); 5875 llvm::Value *Size = nullptr; 5876 // If the size of the reduction item is non-constant, load it from global 5877 // threadprivate variable. 5878 if (RCG.getSizes(N).second) { 5879 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5880 CGF, CGM.getContext().getSizeType(), 5881 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5882 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5883 CGM.getContext().getSizeType(), Loc); 5884 } 5885 RCG.emitAggregateType(CGF, N, Size); 5886 Address OrigAddr = Address::invalid(); 5887 // If initializer uses initializer from declare reduction construct, emit a 5888 // pointer to the address of the original reduction item (reuired by reduction 5889 // initializer) 5890 if (RCG.usesReductionInitializer(N)) { 5891 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5892 OrigAddr = CGF.EmitLoadOfPointer( 5893 SharedAddr, 5894 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5895 } 5896 // Emit the initializer: 5897 // %0 = bitcast void* %arg to <type>* 5898 // store <type> <init>, <type>* %0 5899 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5900 [](CodeGenFunction &) { return false; }); 5901 CGF.FinishFunction(); 5902 return Fn; 5903 } 5904 5905 /// Emits reduction combiner function: 5906 /// \code 5907 /// void @.red_comb(void* %arg0, void* %arg1) { 5908 /// %lhs = bitcast void* %arg0 to <type>* 5909 /// %rhs = bitcast void* %arg1 to <type>* 5910 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5911 /// store <type> %2, <type>* %lhs 5912 /// ret void 5913 /// } 5914 /// \endcode 5915 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5916 SourceLocation Loc, 5917 ReductionCodeGen &RCG, unsigned N, 5918 const Expr *ReductionOp, 5919 const Expr *LHS, const Expr *RHS, 5920 const Expr *PrivateRef) { 5921 ASTContext &C = CGM.getContext(); 5922 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5923 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5924 FunctionArgList Args; 5925 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5926 C.VoidPtrTy, ImplicitParamDecl::Other); 5927 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5928 ImplicitParamDecl::Other); 5929 Args.emplace_back(&ParamInOut); 5930 Args.emplace_back(&ParamIn); 5931 const auto &FnInfo = 5932 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5933 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5934 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5935 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5936 Name, &CGM.getModule()); 5937 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5938 Fn->setDoesNotRecurse(); 5939 CodeGenFunction CGF(CGM); 5940 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5941 llvm::Value *Size = nullptr; 5942 // If the size of the reduction item is non-constant, load it from global 5943 // threadprivate variable. 5944 if (RCG.getSizes(N).second) { 5945 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5946 CGF, CGM.getContext().getSizeType(), 5947 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5948 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5949 CGM.getContext().getSizeType(), Loc); 5950 } 5951 RCG.emitAggregateType(CGF, N, Size); 5952 // Remap lhs and rhs variables to the addresses of the function arguments. 5953 // %lhs = bitcast void* %arg0 to <type>* 5954 // %rhs = bitcast void* %arg1 to <type>* 5955 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5956 PrivateScope.addPrivate( 5957 LHSVD, 5958 // Pull out the pointer to the variable. 5959 CGF.EmitLoadOfPointer( 5960 CGF.Builder.CreateElementBitCast( 5961 CGF.GetAddrOfLocalVar(&ParamInOut), 5962 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5963 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5964 PrivateScope.addPrivate( 5965 RHSVD, 5966 // Pull out the pointer to the variable. 5967 CGF.EmitLoadOfPointer( 5968 CGF.Builder.CreateElementBitCast( 5969 CGF.GetAddrOfLocalVar(&ParamIn), 5970 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5971 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5972 PrivateScope.Privatize(); 5973 // Emit the combiner body: 5974 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5975 // store <type> %2, <type>* %lhs 5976 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5977 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5978 cast<DeclRefExpr>(RHS)); 5979 CGF.FinishFunction(); 5980 return Fn; 5981 } 5982 5983 /// Emits reduction finalizer function: 5984 /// \code 5985 /// void @.red_fini(void* %arg) { 5986 /// %0 = bitcast void* %arg to <type>* 5987 /// <destroy>(<type>* %0) 5988 /// ret void 5989 /// } 5990 /// \endcode 5991 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5992 SourceLocation Loc, 5993 ReductionCodeGen &RCG, unsigned N) { 5994 if (!RCG.needCleanups(N)) 5995 return nullptr; 5996 ASTContext &C = CGM.getContext(); 5997 FunctionArgList Args; 5998 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5999 ImplicitParamDecl::Other); 6000 Args.emplace_back(&Param); 6001 const auto &FnInfo = 6002 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6003 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6004 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6005 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6006 Name, &CGM.getModule()); 6007 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6008 Fn->setDoesNotRecurse(); 6009 CodeGenFunction CGF(CGM); 6010 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6011 Address PrivateAddr = CGF.EmitLoadOfPointer( 6012 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 6013 llvm::Value *Size = nullptr; 6014 // If the size of the reduction item is non-constant, load it from global 6015 // threadprivate variable. 6016 if (RCG.getSizes(N).second) { 6017 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6018 CGF, CGM.getContext().getSizeType(), 6019 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6020 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6021 CGM.getContext().getSizeType(), Loc); 6022 } 6023 RCG.emitAggregateType(CGF, N, Size); 6024 // Emit the finalizer body: 6025 // <destroy>(<type>* %0) 6026 RCG.emitCleanups(CGF, N, PrivateAddr); 6027 CGF.FinishFunction(Loc); 6028 return Fn; 6029 } 6030 6031 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6032 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6033 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6034 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6035 return nullptr; 6036 6037 // Build typedef struct: 6038 // kmp_taskred_input { 6039 // void *reduce_shar; // shared reduction item 6040 // void *reduce_orig; // original reduction item used for initialization 6041 // size_t reduce_size; // size of data item 6042 // void *reduce_init; // data initialization routine 6043 // void *reduce_fini; // data finalization routine 6044 // void *reduce_comb; // data combiner routine 6045 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6046 // } kmp_taskred_input_t; 6047 ASTContext &C = CGM.getContext(); 6048 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6049 RD->startDefinition(); 6050 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6051 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6052 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6053 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6054 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6055 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6056 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6057 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6058 RD->completeDefinition(); 6059 QualType RDType = C.getRecordType(RD); 6060 unsigned Size = Data.ReductionVars.size(); 6061 llvm::APInt ArraySize(/*numBits=*/64, Size); 6062 QualType ArrayRDType = C.getConstantArrayType( 6063 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6064 // kmp_task_red_input_t .rd_input.[Size]; 6065 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6066 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6067 Data.ReductionCopies, Data.ReductionOps); 6068 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6069 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6070 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6071 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6072 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6073 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6074 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6075 ".rd_input.gep."); 6076 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6077 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6078 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6079 RCG.emitSharedOrigLValue(CGF, Cnt); 6080 llvm::Value *CastedShared = 6081 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6082 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6083 // ElemLVal.reduce_orig = &Origs[Cnt]; 6084 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6085 llvm::Value *CastedOrig = 6086 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6087 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6088 RCG.emitAggregateType(CGF, Cnt); 6089 llvm::Value *SizeValInChars; 6090 llvm::Value *SizeVal; 6091 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6092 // We use delayed creation/initialization for VLAs and array sections. It is 6093 // required because runtime does not provide the way to pass the sizes of 6094 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6095 // threadprivate global variables are used to store these values and use 6096 // them in the functions. 6097 bool DelayedCreation = !!SizeVal; 6098 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6099 /*isSigned=*/false); 6100 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6101 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6102 // ElemLVal.reduce_init = init; 6103 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6104 llvm::Value *InitAddr = 6105 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6106 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6107 // ElemLVal.reduce_fini = fini; 6108 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6109 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6110 llvm::Value *FiniAddr = Fini 6111 ? CGF.EmitCastToVoidPtr(Fini) 6112 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6113 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6114 // ElemLVal.reduce_comb = comb; 6115 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6116 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6117 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6118 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6119 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6120 // ElemLVal.flags = 0; 6121 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6122 if (DelayedCreation) { 6123 CGF.EmitStoreOfScalar( 6124 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6125 FlagsLVal); 6126 } else 6127 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6128 FlagsLVal.getType()); 6129 } 6130 if (Data.IsReductionWithTaskMod) { 6131 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6132 // is_ws, int num, void *data); 6133 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6134 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6135 CGM.IntTy, /*isSigned=*/true); 6136 llvm::Value *Args[] = { 6137 IdentTLoc, GTid, 6138 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6139 /*isSigned=*/true), 6140 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6141 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6142 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6143 return CGF.EmitRuntimeCall( 6144 OMPBuilder.getOrCreateRuntimeFunction( 6145 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6146 Args); 6147 } 6148 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6149 llvm::Value *Args[] = { 6150 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6151 /*isSigned=*/true), 6152 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6153 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6154 CGM.VoidPtrTy)}; 6155 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6156 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6157 Args); 6158 } 6159 6160 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6161 SourceLocation Loc, 6162 bool IsWorksharingReduction) { 6163 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6164 // is_ws, int num, void *data); 6165 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6166 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6167 CGM.IntTy, /*isSigned=*/true); 6168 llvm::Value *Args[] = {IdentTLoc, GTid, 6169 llvm::ConstantInt::get(CGM.IntTy, 6170 IsWorksharingReduction ? 1 : 0, 6171 /*isSigned=*/true)}; 6172 (void)CGF.EmitRuntimeCall( 6173 OMPBuilder.getOrCreateRuntimeFunction( 6174 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6175 Args); 6176 } 6177 6178 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6179 SourceLocation Loc, 6180 ReductionCodeGen &RCG, 6181 unsigned N) { 6182 auto Sizes = RCG.getSizes(N); 6183 // Emit threadprivate global variable if the type is non-constant 6184 // (Sizes.second = nullptr). 6185 if (Sizes.second) { 6186 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6187 /*isSigned=*/false); 6188 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6189 CGF, CGM.getContext().getSizeType(), 6190 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6191 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6192 } 6193 } 6194 6195 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6196 SourceLocation Loc, 6197 llvm::Value *ReductionsPtr, 6198 LValue SharedLVal) { 6199 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6200 // *d); 6201 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6202 CGM.IntTy, 6203 /*isSigned=*/true), 6204 ReductionsPtr, 6205 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6206 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6207 return Address( 6208 CGF.EmitRuntimeCall( 6209 OMPBuilder.getOrCreateRuntimeFunction( 6210 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6211 Args), 6212 CGF.Int8Ty, SharedLVal.getAlignment()); 6213 } 6214 6215 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6216 const OMPTaskDataTy &Data) { 6217 if (!CGF.HaveInsertPoint()) 6218 return; 6219 6220 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6221 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6222 OMPBuilder.createTaskwait(CGF.Builder); 6223 } else { 6224 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6225 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6226 auto &M = CGM.getModule(); 6227 Address DependenciesArray = Address::invalid(); 6228 llvm::Value *NumOfElements; 6229 std::tie(NumOfElements, DependenciesArray) = 6230 emitDependClause(CGF, Data.Dependences, Loc); 6231 llvm::Value *DepWaitTaskArgs[6]; 6232 if (!Data.Dependences.empty()) { 6233 DepWaitTaskArgs[0] = UpLoc; 6234 DepWaitTaskArgs[1] = ThreadID; 6235 DepWaitTaskArgs[2] = NumOfElements; 6236 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6237 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6238 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6239 6240 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6241 6242 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6243 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6244 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6245 // is specified. 6246 CGF.EmitRuntimeCall( 6247 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6248 DepWaitTaskArgs); 6249 6250 } else { 6251 6252 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6253 // global_tid); 6254 llvm::Value *Args[] = {UpLoc, ThreadID}; 6255 // Ignore return result until untied tasks are supported. 6256 CGF.EmitRuntimeCall( 6257 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6258 Args); 6259 } 6260 } 6261 6262 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6263 Region->emitUntiedSwitch(CGF); 6264 } 6265 6266 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6267 OpenMPDirectiveKind InnerKind, 6268 const RegionCodeGenTy &CodeGen, 6269 bool HasCancel) { 6270 if (!CGF.HaveInsertPoint()) 6271 return; 6272 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6273 InnerKind != OMPD_critical && 6274 InnerKind != OMPD_master && 6275 InnerKind != OMPD_masked); 6276 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6277 } 6278 6279 namespace { 6280 enum RTCancelKind { 6281 CancelNoreq = 0, 6282 CancelParallel = 1, 6283 CancelLoop = 2, 6284 CancelSections = 3, 6285 CancelTaskgroup = 4 6286 }; 6287 } // anonymous namespace 6288 6289 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6290 RTCancelKind CancelKind = CancelNoreq; 6291 if (CancelRegion == OMPD_parallel) 6292 CancelKind = CancelParallel; 6293 else if (CancelRegion == OMPD_for) 6294 CancelKind = CancelLoop; 6295 else if (CancelRegion == OMPD_sections) 6296 CancelKind = CancelSections; 6297 else { 6298 assert(CancelRegion == OMPD_taskgroup); 6299 CancelKind = CancelTaskgroup; 6300 } 6301 return CancelKind; 6302 } 6303 6304 void CGOpenMPRuntime::emitCancellationPointCall( 6305 CodeGenFunction &CGF, SourceLocation Loc, 6306 OpenMPDirectiveKind CancelRegion) { 6307 if (!CGF.HaveInsertPoint()) 6308 return; 6309 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6310 // global_tid, kmp_int32 cncl_kind); 6311 if (auto *OMPRegionInfo = 6312 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6313 // For 'cancellation point taskgroup', the task region info may not have a 6314 // cancel. This may instead happen in another adjacent task. 6315 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6316 llvm::Value *Args[] = { 6317 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6318 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6319 // Ignore return result until untied tasks are supported. 6320 llvm::Value *Result = CGF.EmitRuntimeCall( 6321 OMPBuilder.getOrCreateRuntimeFunction( 6322 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6323 Args); 6324 // if (__kmpc_cancellationpoint()) { 6325 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6326 // exit from construct; 6327 // } 6328 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6329 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6330 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6331 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6332 CGF.EmitBlock(ExitBB); 6333 if (CancelRegion == OMPD_parallel) 6334 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6335 // exit from construct; 6336 CodeGenFunction::JumpDest CancelDest = 6337 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6338 CGF.EmitBranchThroughCleanup(CancelDest); 6339 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6340 } 6341 } 6342 } 6343 6344 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6345 const Expr *IfCond, 6346 OpenMPDirectiveKind CancelRegion) { 6347 if (!CGF.HaveInsertPoint()) 6348 return; 6349 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6350 // kmp_int32 cncl_kind); 6351 auto &M = CGM.getModule(); 6352 if (auto *OMPRegionInfo = 6353 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6354 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6355 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6356 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6357 llvm::Value *Args[] = { 6358 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6359 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6360 // Ignore return result until untied tasks are supported. 6361 llvm::Value *Result = CGF.EmitRuntimeCall( 6362 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6363 // if (__kmpc_cancel()) { 6364 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6365 // exit from construct; 6366 // } 6367 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6368 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6369 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6370 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6371 CGF.EmitBlock(ExitBB); 6372 if (CancelRegion == OMPD_parallel) 6373 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6374 // exit from construct; 6375 CodeGenFunction::JumpDest CancelDest = 6376 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6377 CGF.EmitBranchThroughCleanup(CancelDest); 6378 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6379 }; 6380 if (IfCond) { 6381 emitIfClause(CGF, IfCond, ThenGen, 6382 [](CodeGenFunction &, PrePostActionTy &) {}); 6383 } else { 6384 RegionCodeGenTy ThenRCG(ThenGen); 6385 ThenRCG(CGF); 6386 } 6387 } 6388 } 6389 6390 namespace { 6391 /// Cleanup action for uses_allocators support. 6392 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6393 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6394 6395 public: 6396 OMPUsesAllocatorsActionTy( 6397 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6398 : Allocators(Allocators) {} 6399 void Enter(CodeGenFunction &CGF) override { 6400 if (!CGF.HaveInsertPoint()) 6401 return; 6402 for (const auto &AllocatorData : Allocators) { 6403 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6404 CGF, AllocatorData.first, AllocatorData.second); 6405 } 6406 } 6407 void Exit(CodeGenFunction &CGF) override { 6408 if (!CGF.HaveInsertPoint()) 6409 return; 6410 for (const auto &AllocatorData : Allocators) { 6411 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6412 AllocatorData.first); 6413 } 6414 } 6415 }; 6416 } // namespace 6417 6418 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6419 const OMPExecutableDirective &D, StringRef ParentName, 6420 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6421 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6422 assert(!ParentName.empty() && "Invalid target region parent name!"); 6423 HasEmittedTargetRegion = true; 6424 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6425 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6426 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6427 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6428 if (!D.AllocatorTraits) 6429 continue; 6430 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6431 } 6432 } 6433 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6434 CodeGen.setAction(UsesAllocatorAction); 6435 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6436 IsOffloadEntry, CodeGen); 6437 } 6438 6439 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6440 const Expr *Allocator, 6441 const Expr *AllocatorTraits) { 6442 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6443 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6444 // Use default memspace handle. 6445 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6446 llvm::Value *NumTraits = llvm::ConstantInt::get( 6447 CGF.IntTy, cast<ConstantArrayType>( 6448 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6449 ->getSize() 6450 .getLimitedValue()); 6451 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6452 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6453 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6454 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6455 AllocatorTraitsLVal.getBaseInfo(), 6456 AllocatorTraitsLVal.getTBAAInfo()); 6457 llvm::Value *Traits = 6458 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6459 6460 llvm::Value *AllocatorVal = 6461 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6462 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6463 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6464 // Store to allocator. 6465 CGF.EmitVarDecl(*cast<VarDecl>( 6466 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6467 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6468 AllocatorVal = 6469 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6470 Allocator->getType(), Allocator->getExprLoc()); 6471 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6472 } 6473 6474 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6475 const Expr *Allocator) { 6476 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6477 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6478 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6479 llvm::Value *AllocatorVal = 6480 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6481 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6482 CGF.getContext().VoidPtrTy, 6483 Allocator->getExprLoc()); 6484 (void)CGF.EmitRuntimeCall( 6485 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6486 OMPRTL___kmpc_destroy_allocator), 6487 {ThreadId, AllocatorVal}); 6488 } 6489 6490 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6491 const OMPExecutableDirective &D, StringRef ParentName, 6492 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6493 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6494 // Create a unique name for the entry function using the source location 6495 // information of the current target region. The name will be something like: 6496 // 6497 // __omp_offloading_DD_FFFF_PP_lBB 6498 // 6499 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6500 // mangled name of the function that encloses the target region and BB is the 6501 // line number of the target region. 6502 6503 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6504 !CGM.getLangOpts().OpenMPOffloadMandatory; 6505 unsigned DeviceID; 6506 unsigned FileID; 6507 unsigned Line; 6508 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6509 Line); 6510 SmallString<64> EntryFnName; 6511 { 6512 llvm::raw_svector_ostream OS(EntryFnName); 6513 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6514 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6515 } 6516 6517 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6518 6519 CodeGenFunction CGF(CGM, true); 6520 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6521 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6522 6523 if (BuildOutlinedFn) 6524 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6525 6526 // If this target outline function is not an offload entry, we don't need to 6527 // register it. 6528 if (!IsOffloadEntry) 6529 return; 6530 6531 // The target region ID is used by the runtime library to identify the current 6532 // target region, so it only has to be unique and not necessarily point to 6533 // anything. It could be the pointer to the outlined function that implements 6534 // the target region, but we aren't using that so that the compiler doesn't 6535 // need to keep that, and could therefore inline the host function if proven 6536 // worthwhile during optimization. In the other hand, if emitting code for the 6537 // device, the ID has to be the function address so that it can retrieved from 6538 // the offloading entry and launched by the runtime library. We also mark the 6539 // outlined function to have external linkage in case we are emitting code for 6540 // the device, because these functions will be entry points to the device. 6541 6542 if (CGM.getLangOpts().OpenMPIsDevice) { 6543 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6544 OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage); 6545 OutlinedFn->setDSOLocal(false); 6546 if (CGM.getTriple().isAMDGCN()) 6547 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6548 } else { 6549 std::string Name = getName({EntryFnName, "region_id"}); 6550 OutlinedFnID = new llvm::GlobalVariable( 6551 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6552 llvm::GlobalValue::WeakAnyLinkage, 6553 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6554 } 6555 6556 // If we do not allow host fallback we still need a named address to use. 6557 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6558 if (!BuildOutlinedFn) { 6559 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6560 "Named kernel already exists?"); 6561 TargetRegionEntryAddr = new llvm::GlobalVariable( 6562 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6563 llvm::GlobalValue::InternalLinkage, 6564 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6565 } 6566 6567 // Register the information for the entry associated with this target region. 6568 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6569 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6570 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6571 6572 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6573 int32_t DefaultValTeams = -1; 6574 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6575 if (DefaultValTeams > 0 && OutlinedFn) { 6576 OutlinedFn->addFnAttr("omp_target_num_teams", 6577 std::to_string(DefaultValTeams)); 6578 } 6579 int32_t DefaultValThreads = -1; 6580 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6581 if (DefaultValThreads > 0 && OutlinedFn) { 6582 OutlinedFn->addFnAttr("omp_target_thread_limit", 6583 std::to_string(DefaultValThreads)); 6584 } 6585 6586 if (BuildOutlinedFn) 6587 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6588 } 6589 6590 /// Checks if the expression is constant or does not have non-trivial function 6591 /// calls. 6592 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6593 // We can skip constant expressions. 6594 // We can skip expressions with trivial calls or simple expressions. 6595 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6596 !E->hasNonTrivialCall(Ctx)) && 6597 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6598 } 6599 6600 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6601 const Stmt *Body) { 6602 const Stmt *Child = Body->IgnoreContainers(); 6603 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6604 Child = nullptr; 6605 for (const Stmt *S : C->body()) { 6606 if (const auto *E = dyn_cast<Expr>(S)) { 6607 if (isTrivial(Ctx, E)) 6608 continue; 6609 } 6610 // Some of the statements can be ignored. 6611 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6612 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6613 continue; 6614 // Analyze declarations. 6615 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6616 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6617 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6618 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6619 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6620 isa<UsingDirectiveDecl>(D) || 6621 isa<OMPDeclareReductionDecl>(D) || 6622 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6623 return true; 6624 const auto *VD = dyn_cast<VarDecl>(D); 6625 if (!VD) 6626 return false; 6627 return VD->hasGlobalStorage() || !VD->isUsed(); 6628 })) 6629 continue; 6630 } 6631 // Found multiple children - cannot get the one child only. 6632 if (Child) 6633 return nullptr; 6634 Child = S; 6635 } 6636 if (Child) 6637 Child = Child->IgnoreContainers(); 6638 } 6639 return Child; 6640 } 6641 6642 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6643 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6644 int32_t &DefaultVal) { 6645 6646 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6647 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6648 "Expected target-based executable directive."); 6649 switch (DirectiveKind) { 6650 case OMPD_target: { 6651 const auto *CS = D.getInnermostCapturedStmt(); 6652 const auto *Body = 6653 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6654 const Stmt *ChildStmt = 6655 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6656 if (const auto *NestedDir = 6657 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6658 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6659 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6660 const Expr *NumTeams = 6661 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6662 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6663 if (auto Constant = 6664 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6665 DefaultVal = Constant->getExtValue(); 6666 return NumTeams; 6667 } 6668 DefaultVal = 0; 6669 return nullptr; 6670 } 6671 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6672 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6673 DefaultVal = 1; 6674 return nullptr; 6675 } 6676 DefaultVal = 1; 6677 return nullptr; 6678 } 6679 // A value of -1 is used to check if we need to emit no teams region 6680 DefaultVal = -1; 6681 return nullptr; 6682 } 6683 case OMPD_target_teams: 6684 case OMPD_target_teams_distribute: 6685 case OMPD_target_teams_distribute_simd: 6686 case OMPD_target_teams_distribute_parallel_for: 6687 case OMPD_target_teams_distribute_parallel_for_simd: { 6688 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6689 const Expr *NumTeams = 6690 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6691 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6692 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6693 DefaultVal = Constant->getExtValue(); 6694 return NumTeams; 6695 } 6696 DefaultVal = 0; 6697 return nullptr; 6698 } 6699 case OMPD_target_parallel: 6700 case OMPD_target_parallel_for: 6701 case OMPD_target_parallel_for_simd: 6702 case OMPD_target_simd: 6703 DefaultVal = 1; 6704 return nullptr; 6705 case OMPD_parallel: 6706 case OMPD_for: 6707 case OMPD_parallel_for: 6708 case OMPD_parallel_master: 6709 case OMPD_parallel_sections: 6710 case OMPD_for_simd: 6711 case OMPD_parallel_for_simd: 6712 case OMPD_cancel: 6713 case OMPD_cancellation_point: 6714 case OMPD_ordered: 6715 case OMPD_threadprivate: 6716 case OMPD_allocate: 6717 case OMPD_task: 6718 case OMPD_simd: 6719 case OMPD_tile: 6720 case OMPD_unroll: 6721 case OMPD_sections: 6722 case OMPD_section: 6723 case OMPD_single: 6724 case OMPD_master: 6725 case OMPD_critical: 6726 case OMPD_taskyield: 6727 case OMPD_barrier: 6728 case OMPD_taskwait: 6729 case OMPD_taskgroup: 6730 case OMPD_atomic: 6731 case OMPD_flush: 6732 case OMPD_depobj: 6733 case OMPD_scan: 6734 case OMPD_teams: 6735 case OMPD_target_data: 6736 case OMPD_target_exit_data: 6737 case OMPD_target_enter_data: 6738 case OMPD_distribute: 6739 case OMPD_distribute_simd: 6740 case OMPD_distribute_parallel_for: 6741 case OMPD_distribute_parallel_for_simd: 6742 case OMPD_teams_distribute: 6743 case OMPD_teams_distribute_simd: 6744 case OMPD_teams_distribute_parallel_for: 6745 case OMPD_teams_distribute_parallel_for_simd: 6746 case OMPD_target_update: 6747 case OMPD_declare_simd: 6748 case OMPD_declare_variant: 6749 case OMPD_begin_declare_variant: 6750 case OMPD_end_declare_variant: 6751 case OMPD_declare_target: 6752 case OMPD_end_declare_target: 6753 case OMPD_declare_reduction: 6754 case OMPD_declare_mapper: 6755 case OMPD_taskloop: 6756 case OMPD_taskloop_simd: 6757 case OMPD_master_taskloop: 6758 case OMPD_master_taskloop_simd: 6759 case OMPD_parallel_master_taskloop: 6760 case OMPD_parallel_master_taskloop_simd: 6761 case OMPD_requires: 6762 case OMPD_metadirective: 6763 case OMPD_unknown: 6764 break; 6765 default: 6766 break; 6767 } 6768 llvm_unreachable("Unexpected directive kind."); 6769 } 6770 6771 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6772 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6773 assert(!CGF.getLangOpts().OpenMPIsDevice && 6774 "Clauses associated with the teams directive expected to be emitted " 6775 "only for the host!"); 6776 CGBuilderTy &Bld = CGF.Builder; 6777 int32_t DefaultNT = -1; 6778 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6779 if (NumTeams != nullptr) { 6780 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6781 6782 switch (DirectiveKind) { 6783 case OMPD_target: { 6784 const auto *CS = D.getInnermostCapturedStmt(); 6785 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6786 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6787 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6788 /*IgnoreResultAssign*/ true); 6789 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6790 /*isSigned=*/true); 6791 } 6792 case OMPD_target_teams: 6793 case OMPD_target_teams_distribute: 6794 case OMPD_target_teams_distribute_simd: 6795 case OMPD_target_teams_distribute_parallel_for: 6796 case OMPD_target_teams_distribute_parallel_for_simd: { 6797 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6798 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6799 /*IgnoreResultAssign*/ true); 6800 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6801 /*isSigned=*/true); 6802 } 6803 default: 6804 break; 6805 } 6806 } else if (DefaultNT == -1) { 6807 return nullptr; 6808 } 6809 6810 return Bld.getInt32(DefaultNT); 6811 } 6812 6813 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6814 llvm::Value *DefaultThreadLimitVal) { 6815 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6816 CGF.getContext(), CS->getCapturedStmt()); 6817 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6818 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6819 llvm::Value *NumThreads = nullptr; 6820 llvm::Value *CondVal = nullptr; 6821 // Handle if clause. If if clause present, the number of threads is 6822 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6823 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6824 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6825 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6826 const OMPIfClause *IfClause = nullptr; 6827 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6828 if (C->getNameModifier() == OMPD_unknown || 6829 C->getNameModifier() == OMPD_parallel) { 6830 IfClause = C; 6831 break; 6832 } 6833 } 6834 if (IfClause) { 6835 const Expr *Cond = IfClause->getCondition(); 6836 bool Result; 6837 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6838 if (!Result) 6839 return CGF.Builder.getInt32(1); 6840 } else { 6841 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6842 if (const auto *PreInit = 6843 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6844 for (const auto *I : PreInit->decls()) { 6845 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6846 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6847 } else { 6848 CodeGenFunction::AutoVarEmission Emission = 6849 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6850 CGF.EmitAutoVarCleanups(Emission); 6851 } 6852 } 6853 } 6854 CondVal = CGF.EvaluateExprAsBool(Cond); 6855 } 6856 } 6857 } 6858 // Check the value of num_threads clause iff if clause was not specified 6859 // or is not evaluated to false. 6860 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6861 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6862 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6863 const auto *NumThreadsClause = 6864 Dir->getSingleClause<OMPNumThreadsClause>(); 6865 CodeGenFunction::LexicalScope Scope( 6866 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6867 if (const auto *PreInit = 6868 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6869 for (const auto *I : PreInit->decls()) { 6870 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6871 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6872 } else { 6873 CodeGenFunction::AutoVarEmission Emission = 6874 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6875 CGF.EmitAutoVarCleanups(Emission); 6876 } 6877 } 6878 } 6879 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6880 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6881 /*isSigned=*/false); 6882 if (DefaultThreadLimitVal) 6883 NumThreads = CGF.Builder.CreateSelect( 6884 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6885 DefaultThreadLimitVal, NumThreads); 6886 } else { 6887 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6888 : CGF.Builder.getInt32(0); 6889 } 6890 // Process condition of the if clause. 6891 if (CondVal) { 6892 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6893 CGF.Builder.getInt32(1)); 6894 } 6895 return NumThreads; 6896 } 6897 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6898 return CGF.Builder.getInt32(1); 6899 return DefaultThreadLimitVal; 6900 } 6901 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6902 : CGF.Builder.getInt32(0); 6903 } 6904 6905 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6906 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6907 int32_t &DefaultVal) { 6908 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6909 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6910 "Expected target-based executable directive."); 6911 6912 switch (DirectiveKind) { 6913 case OMPD_target: 6914 // Teams have no clause thread_limit 6915 return nullptr; 6916 case OMPD_target_teams: 6917 case OMPD_target_teams_distribute: 6918 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6919 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6920 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6921 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6922 if (auto Constant = 6923 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6924 DefaultVal = Constant->getExtValue(); 6925 return ThreadLimit; 6926 } 6927 return nullptr; 6928 case OMPD_target_parallel: 6929 case OMPD_target_parallel_for: 6930 case OMPD_target_parallel_for_simd: 6931 case OMPD_target_teams_distribute_parallel_for: 6932 case OMPD_target_teams_distribute_parallel_for_simd: { 6933 Expr *ThreadLimit = nullptr; 6934 Expr *NumThreads = nullptr; 6935 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6936 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6937 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6938 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6939 if (auto Constant = 6940 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6941 DefaultVal = Constant->getExtValue(); 6942 } 6943 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6944 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6945 NumThreads = NumThreadsClause->getNumThreads(); 6946 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6947 if (auto Constant = 6948 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6949 if (Constant->getExtValue() < DefaultVal) { 6950 DefaultVal = Constant->getExtValue(); 6951 ThreadLimit = NumThreads; 6952 } 6953 } 6954 } 6955 } 6956 return ThreadLimit; 6957 } 6958 case OMPD_target_teams_distribute_simd: 6959 case OMPD_target_simd: 6960 DefaultVal = 1; 6961 return nullptr; 6962 case OMPD_parallel: 6963 case OMPD_for: 6964 case OMPD_parallel_for: 6965 case OMPD_parallel_master: 6966 case OMPD_parallel_sections: 6967 case OMPD_for_simd: 6968 case OMPD_parallel_for_simd: 6969 case OMPD_cancel: 6970 case OMPD_cancellation_point: 6971 case OMPD_ordered: 6972 case OMPD_threadprivate: 6973 case OMPD_allocate: 6974 case OMPD_task: 6975 case OMPD_simd: 6976 case OMPD_tile: 6977 case OMPD_unroll: 6978 case OMPD_sections: 6979 case OMPD_section: 6980 case OMPD_single: 6981 case OMPD_master: 6982 case OMPD_critical: 6983 case OMPD_taskyield: 6984 case OMPD_barrier: 6985 case OMPD_taskwait: 6986 case OMPD_taskgroup: 6987 case OMPD_atomic: 6988 case OMPD_flush: 6989 case OMPD_depobj: 6990 case OMPD_scan: 6991 case OMPD_teams: 6992 case OMPD_target_data: 6993 case OMPD_target_exit_data: 6994 case OMPD_target_enter_data: 6995 case OMPD_distribute: 6996 case OMPD_distribute_simd: 6997 case OMPD_distribute_parallel_for: 6998 case OMPD_distribute_parallel_for_simd: 6999 case OMPD_teams_distribute: 7000 case OMPD_teams_distribute_simd: 7001 case OMPD_teams_distribute_parallel_for: 7002 case OMPD_teams_distribute_parallel_for_simd: 7003 case OMPD_target_update: 7004 case OMPD_declare_simd: 7005 case OMPD_declare_variant: 7006 case OMPD_begin_declare_variant: 7007 case OMPD_end_declare_variant: 7008 case OMPD_declare_target: 7009 case OMPD_end_declare_target: 7010 case OMPD_declare_reduction: 7011 case OMPD_declare_mapper: 7012 case OMPD_taskloop: 7013 case OMPD_taskloop_simd: 7014 case OMPD_master_taskloop: 7015 case OMPD_master_taskloop_simd: 7016 case OMPD_parallel_master_taskloop: 7017 case OMPD_parallel_master_taskloop_simd: 7018 case OMPD_requires: 7019 case OMPD_unknown: 7020 break; 7021 default: 7022 break; 7023 } 7024 llvm_unreachable("Unsupported directive kind."); 7025 } 7026 7027 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7028 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7029 assert(!CGF.getLangOpts().OpenMPIsDevice && 7030 "Clauses associated with the teams directive expected to be emitted " 7031 "only for the host!"); 7032 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7033 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7034 "Expected target-based executable directive."); 7035 CGBuilderTy &Bld = CGF.Builder; 7036 llvm::Value *ThreadLimitVal = nullptr; 7037 llvm::Value *NumThreadsVal = nullptr; 7038 switch (DirectiveKind) { 7039 case OMPD_target: { 7040 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7041 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7042 return NumThreads; 7043 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7044 CGF.getContext(), CS->getCapturedStmt()); 7045 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7046 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7047 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7048 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7049 const auto *ThreadLimitClause = 7050 Dir->getSingleClause<OMPThreadLimitClause>(); 7051 CodeGenFunction::LexicalScope Scope( 7052 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7053 if (const auto *PreInit = 7054 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7055 for (const auto *I : PreInit->decls()) { 7056 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7057 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7058 } else { 7059 CodeGenFunction::AutoVarEmission Emission = 7060 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7061 CGF.EmitAutoVarCleanups(Emission); 7062 } 7063 } 7064 } 7065 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7066 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7067 ThreadLimitVal = 7068 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7069 } 7070 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7071 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7072 CS = Dir->getInnermostCapturedStmt(); 7073 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7074 CGF.getContext(), CS->getCapturedStmt()); 7075 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7076 } 7077 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7078 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7079 CS = Dir->getInnermostCapturedStmt(); 7080 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7081 return NumThreads; 7082 } 7083 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7084 return Bld.getInt32(1); 7085 } 7086 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7087 } 7088 case OMPD_target_teams: { 7089 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7090 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7091 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7092 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7093 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7094 ThreadLimitVal = 7095 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7096 } 7097 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7098 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7099 return NumThreads; 7100 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7101 CGF.getContext(), CS->getCapturedStmt()); 7102 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7103 if (Dir->getDirectiveKind() == OMPD_distribute) { 7104 CS = Dir->getInnermostCapturedStmt(); 7105 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7106 return NumThreads; 7107 } 7108 } 7109 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7110 } 7111 case OMPD_target_teams_distribute: 7112 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7113 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7114 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7115 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7116 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7117 ThreadLimitVal = 7118 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7119 } 7120 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7121 case OMPD_target_parallel: 7122 case OMPD_target_parallel_for: 7123 case OMPD_target_parallel_for_simd: 7124 case OMPD_target_teams_distribute_parallel_for: 7125 case OMPD_target_teams_distribute_parallel_for_simd: { 7126 llvm::Value *CondVal = nullptr; 7127 // Handle if clause. If if clause present, the number of threads is 7128 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7129 if (D.hasClausesOfKind<OMPIfClause>()) { 7130 const OMPIfClause *IfClause = nullptr; 7131 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7132 if (C->getNameModifier() == OMPD_unknown || 7133 C->getNameModifier() == OMPD_parallel) { 7134 IfClause = C; 7135 break; 7136 } 7137 } 7138 if (IfClause) { 7139 const Expr *Cond = IfClause->getCondition(); 7140 bool Result; 7141 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7142 if (!Result) 7143 return Bld.getInt32(1); 7144 } else { 7145 CodeGenFunction::RunCleanupsScope Scope(CGF); 7146 CondVal = CGF.EvaluateExprAsBool(Cond); 7147 } 7148 } 7149 } 7150 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7151 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7152 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7153 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7154 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7155 ThreadLimitVal = 7156 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7157 } 7158 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7159 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7160 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7161 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7162 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7163 NumThreadsVal = 7164 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7165 ThreadLimitVal = ThreadLimitVal 7166 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7167 ThreadLimitVal), 7168 NumThreadsVal, ThreadLimitVal) 7169 : NumThreadsVal; 7170 } 7171 if (!ThreadLimitVal) 7172 ThreadLimitVal = Bld.getInt32(0); 7173 if (CondVal) 7174 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7175 return ThreadLimitVal; 7176 } 7177 case OMPD_target_teams_distribute_simd: 7178 case OMPD_target_simd: 7179 return Bld.getInt32(1); 7180 case OMPD_parallel: 7181 case OMPD_for: 7182 case OMPD_parallel_for: 7183 case OMPD_parallel_master: 7184 case OMPD_parallel_sections: 7185 case OMPD_for_simd: 7186 case OMPD_parallel_for_simd: 7187 case OMPD_cancel: 7188 case OMPD_cancellation_point: 7189 case OMPD_ordered: 7190 case OMPD_threadprivate: 7191 case OMPD_allocate: 7192 case OMPD_task: 7193 case OMPD_simd: 7194 case OMPD_tile: 7195 case OMPD_unroll: 7196 case OMPD_sections: 7197 case OMPD_section: 7198 case OMPD_single: 7199 case OMPD_master: 7200 case OMPD_critical: 7201 case OMPD_taskyield: 7202 case OMPD_barrier: 7203 case OMPD_taskwait: 7204 case OMPD_taskgroup: 7205 case OMPD_atomic: 7206 case OMPD_flush: 7207 case OMPD_depobj: 7208 case OMPD_scan: 7209 case OMPD_teams: 7210 case OMPD_target_data: 7211 case OMPD_target_exit_data: 7212 case OMPD_target_enter_data: 7213 case OMPD_distribute: 7214 case OMPD_distribute_simd: 7215 case OMPD_distribute_parallel_for: 7216 case OMPD_distribute_parallel_for_simd: 7217 case OMPD_teams_distribute: 7218 case OMPD_teams_distribute_simd: 7219 case OMPD_teams_distribute_parallel_for: 7220 case OMPD_teams_distribute_parallel_for_simd: 7221 case OMPD_target_update: 7222 case OMPD_declare_simd: 7223 case OMPD_declare_variant: 7224 case OMPD_begin_declare_variant: 7225 case OMPD_end_declare_variant: 7226 case OMPD_declare_target: 7227 case OMPD_end_declare_target: 7228 case OMPD_declare_reduction: 7229 case OMPD_declare_mapper: 7230 case OMPD_taskloop: 7231 case OMPD_taskloop_simd: 7232 case OMPD_master_taskloop: 7233 case OMPD_master_taskloop_simd: 7234 case OMPD_parallel_master_taskloop: 7235 case OMPD_parallel_master_taskloop_simd: 7236 case OMPD_requires: 7237 case OMPD_metadirective: 7238 case OMPD_unknown: 7239 break; 7240 default: 7241 break; 7242 } 7243 llvm_unreachable("Unsupported directive kind."); 7244 } 7245 7246 namespace { 7247 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7248 7249 // Utility to handle information from clauses associated with a given 7250 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7251 // It provides a convenient interface to obtain the information and generate 7252 // code for that information. 7253 class MappableExprsHandler { 7254 public: 7255 /// Values for bit flags used to specify the mapping type for 7256 /// offloading. 7257 enum OpenMPOffloadMappingFlags : uint64_t { 7258 /// No flags 7259 OMP_MAP_NONE = 0x0, 7260 /// Allocate memory on the device and move data from host to device. 7261 OMP_MAP_TO = 0x01, 7262 /// Allocate memory on the device and move data from device to host. 7263 OMP_MAP_FROM = 0x02, 7264 /// Always perform the requested mapping action on the element, even 7265 /// if it was already mapped before. 7266 OMP_MAP_ALWAYS = 0x04, 7267 /// Delete the element from the device environment, ignoring the 7268 /// current reference count associated with the element. 7269 OMP_MAP_DELETE = 0x08, 7270 /// The element being mapped is a pointer-pointee pair; both the 7271 /// pointer and the pointee should be mapped. 7272 OMP_MAP_PTR_AND_OBJ = 0x10, 7273 /// This flags signals that the base address of an entry should be 7274 /// passed to the target kernel as an argument. 7275 OMP_MAP_TARGET_PARAM = 0x20, 7276 /// Signal that the runtime library has to return the device pointer 7277 /// in the current position for the data being mapped. Used when we have the 7278 /// use_device_ptr or use_device_addr clause. 7279 OMP_MAP_RETURN_PARAM = 0x40, 7280 /// This flag signals that the reference being passed is a pointer to 7281 /// private data. 7282 OMP_MAP_PRIVATE = 0x80, 7283 /// Pass the element to the device by value. 7284 OMP_MAP_LITERAL = 0x100, 7285 /// Implicit map 7286 OMP_MAP_IMPLICIT = 0x200, 7287 /// Close is a hint to the runtime to allocate memory close to 7288 /// the target device. 7289 OMP_MAP_CLOSE = 0x400, 7290 /// 0x800 is reserved for compatibility with XLC. 7291 /// Produce a runtime error if the data is not already allocated. 7292 OMP_MAP_PRESENT = 0x1000, 7293 // Increment and decrement a separate reference counter so that the data 7294 // cannot be unmapped within the associated region. Thus, this flag is 7295 // intended to be used on 'target' and 'target data' directives because they 7296 // are inherently structured. It is not intended to be used on 'target 7297 // enter data' and 'target exit data' directives because they are inherently 7298 // dynamic. 7299 // This is an OpenMP extension for the sake of OpenACC support. 7300 OMP_MAP_OMPX_HOLD = 0x2000, 7301 /// Signal that the runtime library should use args as an array of 7302 /// descriptor_dim pointers and use args_size as dims. Used when we have 7303 /// non-contiguous list items in target update directive 7304 OMP_MAP_NON_CONTIG = 0x100000000000, 7305 /// The 16 MSBs of the flags indicate whether the entry is member of some 7306 /// struct/class. 7307 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7308 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7309 }; 7310 7311 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7312 static unsigned getFlagMemberOffset() { 7313 unsigned Offset = 0; 7314 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7315 Remain = Remain >> 1) 7316 Offset++; 7317 return Offset; 7318 } 7319 7320 /// Class that holds debugging information for a data mapping to be passed to 7321 /// the runtime library. 7322 class MappingExprInfo { 7323 /// The variable declaration used for the data mapping. 7324 const ValueDecl *MapDecl = nullptr; 7325 /// The original expression used in the map clause, or null if there is 7326 /// none. 7327 const Expr *MapExpr = nullptr; 7328 7329 public: 7330 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7331 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7332 7333 const ValueDecl *getMapDecl() const { return MapDecl; } 7334 const Expr *getMapExpr() const { return MapExpr; } 7335 }; 7336 7337 /// Class that associates information with a base pointer to be passed to the 7338 /// runtime library. 7339 class BasePointerInfo { 7340 /// The base pointer. 7341 llvm::Value *Ptr = nullptr; 7342 /// The base declaration that refers to this device pointer, or null if 7343 /// there is none. 7344 const ValueDecl *DevPtrDecl = nullptr; 7345 7346 public: 7347 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7348 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7349 llvm::Value *operator*() const { return Ptr; } 7350 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7351 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7352 }; 7353 7354 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7355 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7356 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7357 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7358 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7359 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7360 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7361 7362 /// This structure contains combined information generated for mappable 7363 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7364 /// mappers, and non-contiguous information. 7365 struct MapCombinedInfoTy { 7366 struct StructNonContiguousInfo { 7367 bool IsNonContiguous = false; 7368 MapDimArrayTy Dims; 7369 MapNonContiguousArrayTy Offsets; 7370 MapNonContiguousArrayTy Counts; 7371 MapNonContiguousArrayTy Strides; 7372 }; 7373 MapExprsArrayTy Exprs; 7374 MapBaseValuesArrayTy BasePointers; 7375 MapValuesArrayTy Pointers; 7376 MapValuesArrayTy Sizes; 7377 MapFlagsArrayTy Types; 7378 MapMappersArrayTy Mappers; 7379 StructNonContiguousInfo NonContigInfo; 7380 7381 /// Append arrays in \a CurInfo. 7382 void append(MapCombinedInfoTy &CurInfo) { 7383 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7384 BasePointers.append(CurInfo.BasePointers.begin(), 7385 CurInfo.BasePointers.end()); 7386 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7387 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7388 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7389 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7390 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7391 CurInfo.NonContigInfo.Dims.end()); 7392 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7393 CurInfo.NonContigInfo.Offsets.end()); 7394 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7395 CurInfo.NonContigInfo.Counts.end()); 7396 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7397 CurInfo.NonContigInfo.Strides.end()); 7398 } 7399 }; 7400 7401 /// Map between a struct and the its lowest & highest elements which have been 7402 /// mapped. 7403 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7404 /// HE(FieldIndex, Pointer)} 7405 struct StructRangeInfoTy { 7406 MapCombinedInfoTy PreliminaryMapData; 7407 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7408 0, Address::invalid()}; 7409 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7410 0, Address::invalid()}; 7411 Address Base = Address::invalid(); 7412 Address LB = Address::invalid(); 7413 bool IsArraySection = false; 7414 bool HasCompleteRecord = false; 7415 }; 7416 7417 private: 7418 /// Kind that defines how a device pointer has to be returned. 7419 struct MapInfo { 7420 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7421 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7422 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7423 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7424 bool ReturnDevicePointer = false; 7425 bool IsImplicit = false; 7426 const ValueDecl *Mapper = nullptr; 7427 const Expr *VarRef = nullptr; 7428 bool ForDeviceAddr = false; 7429 7430 MapInfo() = default; 7431 MapInfo( 7432 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7433 OpenMPMapClauseKind MapType, 7434 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7435 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7436 bool ReturnDevicePointer, bool IsImplicit, 7437 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7438 bool ForDeviceAddr = false) 7439 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7440 MotionModifiers(MotionModifiers), 7441 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7442 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7443 }; 7444 7445 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7446 /// member and there is no map information about it, then emission of that 7447 /// entry is deferred until the whole struct has been processed. 7448 struct DeferredDevicePtrEntryTy { 7449 const Expr *IE = nullptr; 7450 const ValueDecl *VD = nullptr; 7451 bool ForDeviceAddr = false; 7452 7453 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7454 bool ForDeviceAddr) 7455 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7456 }; 7457 7458 /// The target directive from where the mappable clauses were extracted. It 7459 /// is either a executable directive or a user-defined mapper directive. 7460 llvm::PointerUnion<const OMPExecutableDirective *, 7461 const OMPDeclareMapperDecl *> 7462 CurDir; 7463 7464 /// Function the directive is being generated for. 7465 CodeGenFunction &CGF; 7466 7467 /// Set of all first private variables in the current directive. 7468 /// bool data is set to true if the variable is implicitly marked as 7469 /// firstprivate, false otherwise. 7470 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7471 7472 /// Map between device pointer declarations and their expression components. 7473 /// The key value for declarations in 'this' is null. 7474 llvm::DenseMap< 7475 const ValueDecl *, 7476 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7477 DevPointersMap; 7478 7479 /// Map between lambda declarations and their map type. 7480 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7481 7482 llvm::Value *getExprTypeSize(const Expr *E) const { 7483 QualType ExprTy = E->getType().getCanonicalType(); 7484 7485 // Calculate the size for array shaping expression. 7486 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7487 llvm::Value *Size = 7488 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7489 for (const Expr *SE : OAE->getDimensions()) { 7490 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7491 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7492 CGF.getContext().getSizeType(), 7493 SE->getExprLoc()); 7494 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7495 } 7496 return Size; 7497 } 7498 7499 // Reference types are ignored for mapping purposes. 7500 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7501 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7502 7503 // Given that an array section is considered a built-in type, we need to 7504 // do the calculation based on the length of the section instead of relying 7505 // on CGF.getTypeSize(E->getType()). 7506 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7507 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7508 OAE->getBase()->IgnoreParenImpCasts()) 7509 .getCanonicalType(); 7510 7511 // If there is no length associated with the expression and lower bound is 7512 // not specified too, that means we are using the whole length of the 7513 // base. 7514 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7515 !OAE->getLowerBound()) 7516 return CGF.getTypeSize(BaseTy); 7517 7518 llvm::Value *ElemSize; 7519 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7520 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7521 } else { 7522 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7523 assert(ATy && "Expecting array type if not a pointer type."); 7524 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7525 } 7526 7527 // If we don't have a length at this point, that is because we have an 7528 // array section with a single element. 7529 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7530 return ElemSize; 7531 7532 if (const Expr *LenExpr = OAE->getLength()) { 7533 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7534 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7535 CGF.getContext().getSizeType(), 7536 LenExpr->getExprLoc()); 7537 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7538 } 7539 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7540 OAE->getLowerBound() && "expected array_section[lb:]."); 7541 // Size = sizetype - lb * elemtype; 7542 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7543 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7544 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7545 CGF.getContext().getSizeType(), 7546 OAE->getLowerBound()->getExprLoc()); 7547 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7548 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7549 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7550 LengthVal = CGF.Builder.CreateSelect( 7551 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7552 return LengthVal; 7553 } 7554 return CGF.getTypeSize(ExprTy); 7555 } 7556 7557 /// Return the corresponding bits for a given map clause modifier. Add 7558 /// a flag marking the map as a pointer if requested. Add a flag marking the 7559 /// map as the first one of a series of maps that relate to the same map 7560 /// expression. 7561 OpenMPOffloadMappingFlags getMapTypeBits( 7562 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7563 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7564 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7565 OpenMPOffloadMappingFlags Bits = 7566 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7567 switch (MapType) { 7568 case OMPC_MAP_alloc: 7569 case OMPC_MAP_release: 7570 // alloc and release is the default behavior in the runtime library, i.e. 7571 // if we don't pass any bits alloc/release that is what the runtime is 7572 // going to do. Therefore, we don't need to signal anything for these two 7573 // type modifiers. 7574 break; 7575 case OMPC_MAP_to: 7576 Bits |= OMP_MAP_TO; 7577 break; 7578 case OMPC_MAP_from: 7579 Bits |= OMP_MAP_FROM; 7580 break; 7581 case OMPC_MAP_tofrom: 7582 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7583 break; 7584 case OMPC_MAP_delete: 7585 Bits |= OMP_MAP_DELETE; 7586 break; 7587 case OMPC_MAP_unknown: 7588 llvm_unreachable("Unexpected map type!"); 7589 } 7590 if (AddPtrFlag) 7591 Bits |= OMP_MAP_PTR_AND_OBJ; 7592 if (AddIsTargetParamFlag) 7593 Bits |= OMP_MAP_TARGET_PARAM; 7594 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7595 Bits |= OMP_MAP_ALWAYS; 7596 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7597 Bits |= OMP_MAP_CLOSE; 7598 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7599 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7600 Bits |= OMP_MAP_PRESENT; 7601 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7602 Bits |= OMP_MAP_OMPX_HOLD; 7603 if (IsNonContiguous) 7604 Bits |= OMP_MAP_NON_CONTIG; 7605 return Bits; 7606 } 7607 7608 /// Return true if the provided expression is a final array section. A 7609 /// final array section, is one whose length can't be proved to be one. 7610 bool isFinalArraySectionExpression(const Expr *E) const { 7611 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7612 7613 // It is not an array section and therefore not a unity-size one. 7614 if (!OASE) 7615 return false; 7616 7617 // An array section with no colon always refer to a single element. 7618 if (OASE->getColonLocFirst().isInvalid()) 7619 return false; 7620 7621 const Expr *Length = OASE->getLength(); 7622 7623 // If we don't have a length we have to check if the array has size 1 7624 // for this dimension. Also, we should always expect a length if the 7625 // base type is pointer. 7626 if (!Length) { 7627 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7628 OASE->getBase()->IgnoreParenImpCasts()) 7629 .getCanonicalType(); 7630 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7631 return ATy->getSize().getSExtValue() != 1; 7632 // If we don't have a constant dimension length, we have to consider 7633 // the current section as having any size, so it is not necessarily 7634 // unitary. If it happen to be unity size, that's user fault. 7635 return true; 7636 } 7637 7638 // Check if the length evaluates to 1. 7639 Expr::EvalResult Result; 7640 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7641 return true; // Can have more that size 1. 7642 7643 llvm::APSInt ConstLength = Result.Val.getInt(); 7644 return ConstLength.getSExtValue() != 1; 7645 } 7646 7647 /// Generate the base pointers, section pointers, sizes, map type bits, and 7648 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7649 /// map type, map or motion modifiers, and expression components. 7650 /// \a IsFirstComponent should be set to true if the provided set of 7651 /// components is the first associated with a capture. 7652 void generateInfoForComponentList( 7653 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7654 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7655 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7656 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7657 bool IsFirstComponentList, bool IsImplicit, 7658 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7659 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7660 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7661 OverlappedElements = llvm::None) const { 7662 // The following summarizes what has to be generated for each map and the 7663 // types below. The generated information is expressed in this order: 7664 // base pointer, section pointer, size, flags 7665 // (to add to the ones that come from the map type and modifier). 7666 // 7667 // double d; 7668 // int i[100]; 7669 // float *p; 7670 // 7671 // struct S1 { 7672 // int i; 7673 // float f[50]; 7674 // } 7675 // struct S2 { 7676 // int i; 7677 // float f[50]; 7678 // S1 s; 7679 // double *p; 7680 // struct S2 *ps; 7681 // int &ref; 7682 // } 7683 // S2 s; 7684 // S2 *ps; 7685 // 7686 // map(d) 7687 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7688 // 7689 // map(i) 7690 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7691 // 7692 // map(i[1:23]) 7693 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7694 // 7695 // map(p) 7696 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7697 // 7698 // map(p[1:24]) 7699 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7700 // in unified shared memory mode or for local pointers 7701 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7702 // 7703 // map(s) 7704 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7705 // 7706 // map(s.i) 7707 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7708 // 7709 // map(s.s.f) 7710 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7711 // 7712 // map(s.p) 7713 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7714 // 7715 // map(to: s.p[:22]) 7716 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7717 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7718 // &(s.p), &(s.p[0]), 22*sizeof(double), 7719 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7720 // (*) alloc space for struct members, only this is a target parameter 7721 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7722 // optimizes this entry out, same in the examples below) 7723 // (***) map the pointee (map: to) 7724 // 7725 // map(to: s.ref) 7726 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7727 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7728 // (*) alloc space for struct members, only this is a target parameter 7729 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7730 // optimizes this entry out, same in the examples below) 7731 // (***) map the pointee (map: to) 7732 // 7733 // map(s.ps) 7734 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7735 // 7736 // map(from: s.ps->s.i) 7737 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7738 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7739 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7740 // 7741 // map(to: s.ps->ps) 7742 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7743 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7744 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7745 // 7746 // map(s.ps->ps->ps) 7747 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7748 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7749 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7750 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7751 // 7752 // map(to: s.ps->ps->s.f[:22]) 7753 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7754 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7755 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7756 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7757 // 7758 // map(ps) 7759 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7760 // 7761 // map(ps->i) 7762 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7763 // 7764 // map(ps->s.f) 7765 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7766 // 7767 // map(from: ps->p) 7768 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7769 // 7770 // map(to: ps->p[:22]) 7771 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7772 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7773 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7774 // 7775 // map(ps->ps) 7776 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7777 // 7778 // map(from: ps->ps->s.i) 7779 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7780 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7781 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7782 // 7783 // map(from: ps->ps->ps) 7784 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7785 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7786 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7787 // 7788 // map(ps->ps->ps->ps) 7789 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7790 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7791 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7792 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7793 // 7794 // map(to: ps->ps->ps->s.f[:22]) 7795 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7796 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7797 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7798 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7799 // 7800 // map(to: s.f[:22]) map(from: s.p[:33]) 7801 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7802 // sizeof(double*) (**), TARGET_PARAM 7803 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7804 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7805 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7806 // (*) allocate contiguous space needed to fit all mapped members even if 7807 // we allocate space for members not mapped (in this example, 7808 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7809 // them as well because they fall between &s.f[0] and &s.p) 7810 // 7811 // map(from: s.f[:22]) map(to: ps->p[:33]) 7812 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7813 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7814 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7815 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7816 // (*) the struct this entry pertains to is the 2nd element in the list of 7817 // arguments, hence MEMBER_OF(2) 7818 // 7819 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7820 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7821 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7822 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7823 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7824 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7825 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7826 // (*) the struct this entry pertains to is the 4th element in the list 7827 // of arguments, hence MEMBER_OF(4) 7828 7829 // Track if the map information being generated is the first for a capture. 7830 bool IsCaptureFirstInfo = IsFirstComponentList; 7831 // When the variable is on a declare target link or in a to clause with 7832 // unified memory, a reference is needed to hold the host/device address 7833 // of the variable. 7834 bool RequiresReference = false; 7835 7836 // Scan the components from the base to the complete expression. 7837 auto CI = Components.rbegin(); 7838 auto CE = Components.rend(); 7839 auto I = CI; 7840 7841 // Track if the map information being generated is the first for a list of 7842 // components. 7843 bool IsExpressionFirstInfo = true; 7844 bool FirstPointerInComplexData = false; 7845 Address BP = Address::invalid(); 7846 const Expr *AssocExpr = I->getAssociatedExpression(); 7847 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7848 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7849 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7850 7851 if (isa<MemberExpr>(AssocExpr)) { 7852 // The base is the 'this' pointer. The content of the pointer is going 7853 // to be the base of the field being mapped. 7854 BP = CGF.LoadCXXThisAddress(); 7855 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7856 (OASE && 7857 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7858 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7859 } else if (OAShE && 7860 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7861 BP = Address( 7862 CGF.EmitScalarExpr(OAShE->getBase()), 7863 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7864 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7865 } else { 7866 // The base is the reference to the variable. 7867 // BP = &Var. 7868 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7869 if (const auto *VD = 7870 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7871 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7872 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7873 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7874 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7875 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7876 RequiresReference = true; 7877 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7878 } 7879 } 7880 } 7881 7882 // If the variable is a pointer and is being dereferenced (i.e. is not 7883 // the last component), the base has to be the pointer itself, not its 7884 // reference. References are ignored for mapping purposes. 7885 QualType Ty = 7886 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7887 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7888 // No need to generate individual map information for the pointer, it 7889 // can be associated with the combined storage if shared memory mode is 7890 // active or the base declaration is not global variable. 7891 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7892 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7893 !VD || VD->hasLocalStorage()) 7894 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7895 else 7896 FirstPointerInComplexData = true; 7897 ++I; 7898 } 7899 } 7900 7901 // Track whether a component of the list should be marked as MEMBER_OF some 7902 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7903 // in a component list should be marked as MEMBER_OF, all subsequent entries 7904 // do not belong to the base struct. E.g. 7905 // struct S2 s; 7906 // s.ps->ps->ps->f[:] 7907 // (1) (2) (3) (4) 7908 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7909 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7910 // is the pointee of ps(2) which is not member of struct s, so it should not 7911 // be marked as such (it is still PTR_AND_OBJ). 7912 // The variable is initialized to false so that PTR_AND_OBJ entries which 7913 // are not struct members are not considered (e.g. array of pointers to 7914 // data). 7915 bool ShouldBeMemberOf = false; 7916 7917 // Variable keeping track of whether or not we have encountered a component 7918 // in the component list which is a member expression. Useful when we have a 7919 // pointer or a final array section, in which case it is the previous 7920 // component in the list which tells us whether we have a member expression. 7921 // E.g. X.f[:] 7922 // While processing the final array section "[:]" it is "f" which tells us 7923 // whether we are dealing with a member of a declared struct. 7924 const MemberExpr *EncounteredME = nullptr; 7925 7926 // Track for the total number of dimension. Start from one for the dummy 7927 // dimension. 7928 uint64_t DimSize = 1; 7929 7930 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7931 bool IsPrevMemberReference = false; 7932 7933 for (; I != CE; ++I) { 7934 // If the current component is member of a struct (parent struct) mark it. 7935 if (!EncounteredME) { 7936 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7937 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7938 // as MEMBER_OF the parent struct. 7939 if (EncounteredME) { 7940 ShouldBeMemberOf = true; 7941 // Do not emit as complex pointer if this is actually not array-like 7942 // expression. 7943 if (FirstPointerInComplexData) { 7944 QualType Ty = std::prev(I) 7945 ->getAssociatedDeclaration() 7946 ->getType() 7947 .getNonReferenceType(); 7948 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7949 FirstPointerInComplexData = false; 7950 } 7951 } 7952 } 7953 7954 auto Next = std::next(I); 7955 7956 // We need to generate the addresses and sizes if this is the last 7957 // component, if the component is a pointer or if it is an array section 7958 // whose length can't be proved to be one. If this is a pointer, it 7959 // becomes the base address for the following components. 7960 7961 // A final array section, is one whose length can't be proved to be one. 7962 // If the map item is non-contiguous then we don't treat any array section 7963 // as final array section. 7964 bool IsFinalArraySection = 7965 !IsNonContiguous && 7966 isFinalArraySectionExpression(I->getAssociatedExpression()); 7967 7968 // If we have a declaration for the mapping use that, otherwise use 7969 // the base declaration of the map clause. 7970 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7971 ? I->getAssociatedDeclaration() 7972 : BaseDecl; 7973 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7974 : MapExpr; 7975 7976 // Get information on whether the element is a pointer. Have to do a 7977 // special treatment for array sections given that they are built-in 7978 // types. 7979 const auto *OASE = 7980 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7981 const auto *OAShE = 7982 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7983 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7984 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7985 bool IsPointer = 7986 OAShE || 7987 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7988 .getCanonicalType() 7989 ->isAnyPointerType()) || 7990 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7991 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7992 MapDecl && 7993 MapDecl->getType()->isLValueReferenceType(); 7994 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7995 7996 if (OASE) 7997 ++DimSize; 7998 7999 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8000 IsFinalArraySection) { 8001 // If this is not the last component, we expect the pointer to be 8002 // associated with an array expression or member expression. 8003 assert((Next == CE || 8004 isa<MemberExpr>(Next->getAssociatedExpression()) || 8005 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8006 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8007 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8008 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8009 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8010 "Unexpected expression"); 8011 8012 Address LB = Address::invalid(); 8013 Address LowestElem = Address::invalid(); 8014 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8015 const MemberExpr *E) { 8016 const Expr *BaseExpr = E->getBase(); 8017 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8018 // scalar. 8019 LValue BaseLV; 8020 if (E->isArrow()) { 8021 LValueBaseInfo BaseInfo; 8022 TBAAAccessInfo TBAAInfo; 8023 Address Addr = 8024 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8025 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8026 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8027 } else { 8028 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8029 } 8030 return BaseLV; 8031 }; 8032 if (OAShE) { 8033 LowestElem = LB = 8034 Address(CGF.EmitScalarExpr(OAShE->getBase()), 8035 CGF.ConvertTypeForMem( 8036 OAShE->getBase()->getType()->getPointeeType()), 8037 CGF.getContext().getTypeAlignInChars( 8038 OAShE->getBase()->getType())); 8039 } else if (IsMemberReference) { 8040 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8041 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8042 LowestElem = CGF.EmitLValueForFieldInitialization( 8043 BaseLVal, cast<FieldDecl>(MapDecl)) 8044 .getAddress(CGF); 8045 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8046 .getAddress(CGF); 8047 } else { 8048 LowestElem = LB = 8049 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8050 .getAddress(CGF); 8051 } 8052 8053 // If this component is a pointer inside the base struct then we don't 8054 // need to create any entry for it - it will be combined with the object 8055 // it is pointing to into a single PTR_AND_OBJ entry. 8056 bool IsMemberPointerOrAddr = 8057 EncounteredME && 8058 (((IsPointer || ForDeviceAddr) && 8059 I->getAssociatedExpression() == EncounteredME) || 8060 (IsPrevMemberReference && !IsPointer) || 8061 (IsMemberReference && Next != CE && 8062 !Next->getAssociatedExpression()->getType()->isPointerType())); 8063 if (!OverlappedElements.empty() && Next == CE) { 8064 // Handle base element with the info for overlapped elements. 8065 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8066 assert(!IsPointer && 8067 "Unexpected base element with the pointer type."); 8068 // Mark the whole struct as the struct that requires allocation on the 8069 // device. 8070 PartialStruct.LowestElem = {0, LowestElem}; 8071 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8072 I->getAssociatedExpression()->getType()); 8073 Address HB = CGF.Builder.CreateConstGEP( 8074 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8075 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 8076 TypeSize.getQuantity() - 1); 8077 PartialStruct.HighestElem = { 8078 std::numeric_limits<decltype( 8079 PartialStruct.HighestElem.first)>::max(), 8080 HB}; 8081 PartialStruct.Base = BP; 8082 PartialStruct.LB = LB; 8083 assert( 8084 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8085 "Overlapped elements must be used only once for the variable."); 8086 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8087 // Emit data for non-overlapped data. 8088 OpenMPOffloadMappingFlags Flags = 8089 OMP_MAP_MEMBER_OF | 8090 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8091 /*AddPtrFlag=*/false, 8092 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8093 llvm::Value *Size = nullptr; 8094 // Do bitcopy of all non-overlapped structure elements. 8095 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8096 Component : OverlappedElements) { 8097 Address ComponentLB = Address::invalid(); 8098 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8099 Component) { 8100 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8101 const auto *FD = dyn_cast<FieldDecl>(VD); 8102 if (FD && FD->getType()->isLValueReferenceType()) { 8103 const auto *ME = 8104 cast<MemberExpr>(MC.getAssociatedExpression()); 8105 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8106 ComponentLB = 8107 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8108 .getAddress(CGF); 8109 } else { 8110 ComponentLB = 8111 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8112 .getAddress(CGF); 8113 } 8114 Size = CGF.Builder.CreatePtrDiff( 8115 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8116 CGF.EmitCastToVoidPtr(LB.getPointer())); 8117 break; 8118 } 8119 } 8120 assert(Size && "Failed to determine structure size"); 8121 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8122 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8123 CombinedInfo.Pointers.push_back(LB.getPointer()); 8124 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8125 Size, CGF.Int64Ty, /*isSigned=*/true)); 8126 CombinedInfo.Types.push_back(Flags); 8127 CombinedInfo.Mappers.push_back(nullptr); 8128 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8129 : 1); 8130 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8131 } 8132 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8133 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8134 CombinedInfo.Pointers.push_back(LB.getPointer()); 8135 Size = CGF.Builder.CreatePtrDiff( 8136 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8137 CGF.EmitCastToVoidPtr(LB.getPointer())); 8138 CombinedInfo.Sizes.push_back( 8139 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8140 CombinedInfo.Types.push_back(Flags); 8141 CombinedInfo.Mappers.push_back(nullptr); 8142 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8143 : 1); 8144 break; 8145 } 8146 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8147 if (!IsMemberPointerOrAddr || 8148 (Next == CE && MapType != OMPC_MAP_unknown)) { 8149 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8150 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8151 CombinedInfo.Pointers.push_back(LB.getPointer()); 8152 CombinedInfo.Sizes.push_back( 8153 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8154 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8155 : 1); 8156 8157 // If Mapper is valid, the last component inherits the mapper. 8158 bool HasMapper = Mapper && Next == CE; 8159 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8160 8161 // We need to add a pointer flag for each map that comes from the 8162 // same expression except for the first one. We also need to signal 8163 // this map is the first one that relates with the current capture 8164 // (there is a set of entries for each capture). 8165 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8166 MapType, MapModifiers, MotionModifiers, IsImplicit, 8167 !IsExpressionFirstInfo || RequiresReference || 8168 FirstPointerInComplexData || IsMemberReference, 8169 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8170 8171 if (!IsExpressionFirstInfo || IsMemberReference) { 8172 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8173 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8174 if (IsPointer || (IsMemberReference && Next != CE)) 8175 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8176 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8177 8178 if (ShouldBeMemberOf) { 8179 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8180 // should be later updated with the correct value of MEMBER_OF. 8181 Flags |= OMP_MAP_MEMBER_OF; 8182 // From now on, all subsequent PTR_AND_OBJ entries should not be 8183 // marked as MEMBER_OF. 8184 ShouldBeMemberOf = false; 8185 } 8186 } 8187 8188 CombinedInfo.Types.push_back(Flags); 8189 } 8190 8191 // If we have encountered a member expression so far, keep track of the 8192 // mapped member. If the parent is "*this", then the value declaration 8193 // is nullptr. 8194 if (EncounteredME) { 8195 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8196 unsigned FieldIndex = FD->getFieldIndex(); 8197 8198 // Update info about the lowest and highest elements for this struct 8199 if (!PartialStruct.Base.isValid()) { 8200 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8201 if (IsFinalArraySection) { 8202 Address HB = 8203 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8204 .getAddress(CGF); 8205 PartialStruct.HighestElem = {FieldIndex, HB}; 8206 } else { 8207 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8208 } 8209 PartialStruct.Base = BP; 8210 PartialStruct.LB = BP; 8211 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8212 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8213 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8214 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8215 } 8216 } 8217 8218 // Need to emit combined struct for array sections. 8219 if (IsFinalArraySection || IsNonContiguous) 8220 PartialStruct.IsArraySection = true; 8221 8222 // If we have a final array section, we are done with this expression. 8223 if (IsFinalArraySection) 8224 break; 8225 8226 // The pointer becomes the base for the next element. 8227 if (Next != CE) 8228 BP = IsMemberReference ? LowestElem : LB; 8229 8230 IsExpressionFirstInfo = false; 8231 IsCaptureFirstInfo = false; 8232 FirstPointerInComplexData = false; 8233 IsPrevMemberReference = IsMemberReference; 8234 } else if (FirstPointerInComplexData) { 8235 QualType Ty = Components.rbegin() 8236 ->getAssociatedDeclaration() 8237 ->getType() 8238 .getNonReferenceType(); 8239 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8240 FirstPointerInComplexData = false; 8241 } 8242 } 8243 // If ran into the whole component - allocate the space for the whole 8244 // record. 8245 if (!EncounteredME) 8246 PartialStruct.HasCompleteRecord = true; 8247 8248 if (!IsNonContiguous) 8249 return; 8250 8251 const ASTContext &Context = CGF.getContext(); 8252 8253 // For supporting stride in array section, we need to initialize the first 8254 // dimension size as 1, first offset as 0, and first count as 1 8255 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8256 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8257 MapValuesArrayTy CurStrides; 8258 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8259 uint64_t ElementTypeSize; 8260 8261 // Collect Size information for each dimension and get the element size as 8262 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8263 // should be [10, 10] and the first stride is 4 btyes. 8264 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8265 Components) { 8266 const Expr *AssocExpr = Component.getAssociatedExpression(); 8267 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8268 8269 if (!OASE) 8270 continue; 8271 8272 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8273 auto *CAT = Context.getAsConstantArrayType(Ty); 8274 auto *VAT = Context.getAsVariableArrayType(Ty); 8275 8276 // We need all the dimension size except for the last dimension. 8277 assert((VAT || CAT || &Component == &*Components.begin()) && 8278 "Should be either ConstantArray or VariableArray if not the " 8279 "first Component"); 8280 8281 // Get element size if CurStrides is empty. 8282 if (CurStrides.empty()) { 8283 const Type *ElementType = nullptr; 8284 if (CAT) 8285 ElementType = CAT->getElementType().getTypePtr(); 8286 else if (VAT) 8287 ElementType = VAT->getElementType().getTypePtr(); 8288 else 8289 assert(&Component == &*Components.begin() && 8290 "Only expect pointer (non CAT or VAT) when this is the " 8291 "first Component"); 8292 // If ElementType is null, then it means the base is a pointer 8293 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8294 // for next iteration. 8295 if (ElementType) { 8296 // For the case that having pointer as base, we need to remove one 8297 // level of indirection. 8298 if (&Component != &*Components.begin()) 8299 ElementType = ElementType->getPointeeOrArrayElementType(); 8300 ElementTypeSize = 8301 Context.getTypeSizeInChars(ElementType).getQuantity(); 8302 CurStrides.push_back( 8303 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8304 } 8305 } 8306 // Get dimension value except for the last dimension since we don't need 8307 // it. 8308 if (DimSizes.size() < Components.size() - 1) { 8309 if (CAT) 8310 DimSizes.push_back(llvm::ConstantInt::get( 8311 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8312 else if (VAT) 8313 DimSizes.push_back(CGF.Builder.CreateIntCast( 8314 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8315 /*IsSigned=*/false)); 8316 } 8317 } 8318 8319 // Skip the dummy dimension since we have already have its information. 8320 auto *DI = DimSizes.begin() + 1; 8321 // Product of dimension. 8322 llvm::Value *DimProd = 8323 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8324 8325 // Collect info for non-contiguous. Notice that offset, count, and stride 8326 // are only meaningful for array-section, so we insert a null for anything 8327 // other than array-section. 8328 // Also, the size of offset, count, and stride are not the same as 8329 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8330 // count, and stride are the same as the number of non-contiguous 8331 // declaration in target update to/from clause. 8332 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8333 Components) { 8334 const Expr *AssocExpr = Component.getAssociatedExpression(); 8335 8336 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8337 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8338 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8339 /*isSigned=*/false); 8340 CurOffsets.push_back(Offset); 8341 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8342 CurStrides.push_back(CurStrides.back()); 8343 continue; 8344 } 8345 8346 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8347 8348 if (!OASE) 8349 continue; 8350 8351 // Offset 8352 const Expr *OffsetExpr = OASE->getLowerBound(); 8353 llvm::Value *Offset = nullptr; 8354 if (!OffsetExpr) { 8355 // If offset is absent, then we just set it to zero. 8356 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8357 } else { 8358 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8359 CGF.Int64Ty, 8360 /*isSigned=*/false); 8361 } 8362 CurOffsets.push_back(Offset); 8363 8364 // Count 8365 const Expr *CountExpr = OASE->getLength(); 8366 llvm::Value *Count = nullptr; 8367 if (!CountExpr) { 8368 // In Clang, once a high dimension is an array section, we construct all 8369 // the lower dimension as array section, however, for case like 8370 // arr[0:2][2], Clang construct the inner dimension as an array section 8371 // but it actually is not in an array section form according to spec. 8372 if (!OASE->getColonLocFirst().isValid() && 8373 !OASE->getColonLocSecond().isValid()) { 8374 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8375 } else { 8376 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8377 // When the length is absent it defaults to ⌈(size − 8378 // lower-bound)/stride⌉, where size is the size of the array 8379 // dimension. 8380 const Expr *StrideExpr = OASE->getStride(); 8381 llvm::Value *Stride = 8382 StrideExpr 8383 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8384 CGF.Int64Ty, /*isSigned=*/false) 8385 : nullptr; 8386 if (Stride) 8387 Count = CGF.Builder.CreateUDiv( 8388 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8389 else 8390 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8391 } 8392 } else { 8393 Count = CGF.EmitScalarExpr(CountExpr); 8394 } 8395 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8396 CurCounts.push_back(Count); 8397 8398 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8399 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8400 // Offset Count Stride 8401 // D0 0 1 4 (int) <- dummy dimension 8402 // D1 0 2 8 (2 * (1) * 4) 8403 // D2 1 2 20 (1 * (1 * 5) * 4) 8404 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8405 const Expr *StrideExpr = OASE->getStride(); 8406 llvm::Value *Stride = 8407 StrideExpr 8408 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8409 CGF.Int64Ty, /*isSigned=*/false) 8410 : nullptr; 8411 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8412 if (Stride) 8413 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8414 else 8415 CurStrides.push_back(DimProd); 8416 if (DI != DimSizes.end()) 8417 ++DI; 8418 } 8419 8420 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8421 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8422 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8423 } 8424 8425 /// Return the adjusted map modifiers if the declaration a capture refers to 8426 /// appears in a first-private clause. This is expected to be used only with 8427 /// directives that start with 'target'. 8428 MappableExprsHandler::OpenMPOffloadMappingFlags 8429 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8430 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8431 8432 // A first private variable captured by reference will use only the 8433 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8434 // declaration is known as first-private in this handler. 8435 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8436 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8437 return MappableExprsHandler::OMP_MAP_TO | 8438 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8439 return MappableExprsHandler::OMP_MAP_PRIVATE | 8440 MappableExprsHandler::OMP_MAP_TO; 8441 } 8442 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8443 if (I != LambdasMap.end()) 8444 // for map(to: lambda): using user specified map type. 8445 return getMapTypeBits( 8446 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8447 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8448 /*AddPtrFlag=*/false, 8449 /*AddIsTargetParamFlag=*/false, 8450 /*isNonContiguous=*/false); 8451 return MappableExprsHandler::OMP_MAP_TO | 8452 MappableExprsHandler::OMP_MAP_FROM; 8453 } 8454 8455 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8456 // Rotate by getFlagMemberOffset() bits. 8457 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8458 << getFlagMemberOffset()); 8459 } 8460 8461 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8462 OpenMPOffloadMappingFlags MemberOfFlag) { 8463 // If the entry is PTR_AND_OBJ but has not been marked with the special 8464 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8465 // marked as MEMBER_OF. 8466 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8467 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8468 return; 8469 8470 // Reset the placeholder value to prepare the flag for the assignment of the 8471 // proper MEMBER_OF value. 8472 Flags &= ~OMP_MAP_MEMBER_OF; 8473 Flags |= MemberOfFlag; 8474 } 8475 8476 void getPlainLayout(const CXXRecordDecl *RD, 8477 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8478 bool AsBase) const { 8479 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8480 8481 llvm::StructType *St = 8482 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8483 8484 unsigned NumElements = St->getNumElements(); 8485 llvm::SmallVector< 8486 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8487 RecordLayout(NumElements); 8488 8489 // Fill bases. 8490 for (const auto &I : RD->bases()) { 8491 if (I.isVirtual()) 8492 continue; 8493 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8494 // Ignore empty bases. 8495 if (Base->isEmpty() || CGF.getContext() 8496 .getASTRecordLayout(Base) 8497 .getNonVirtualSize() 8498 .isZero()) 8499 continue; 8500 8501 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8502 RecordLayout[FieldIndex] = Base; 8503 } 8504 // Fill in virtual bases. 8505 for (const auto &I : RD->vbases()) { 8506 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8507 // Ignore empty bases. 8508 if (Base->isEmpty()) 8509 continue; 8510 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8511 if (RecordLayout[FieldIndex]) 8512 continue; 8513 RecordLayout[FieldIndex] = Base; 8514 } 8515 // Fill in all the fields. 8516 assert(!RD->isUnion() && "Unexpected union."); 8517 for (const auto *Field : RD->fields()) { 8518 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8519 // will fill in later.) 8520 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8521 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8522 RecordLayout[FieldIndex] = Field; 8523 } 8524 } 8525 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8526 &Data : RecordLayout) { 8527 if (Data.isNull()) 8528 continue; 8529 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8530 getPlainLayout(Base, Layout, /*AsBase=*/true); 8531 else 8532 Layout.push_back(Data.get<const FieldDecl *>()); 8533 } 8534 } 8535 8536 /// Generate all the base pointers, section pointers, sizes, map types, and 8537 /// mappers for the extracted mappable expressions (all included in \a 8538 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8539 /// pair of the relevant declaration and index where it occurs is appended to 8540 /// the device pointers info array. 8541 void generateAllInfoForClauses( 8542 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8543 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8544 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8545 // We have to process the component lists that relate with the same 8546 // declaration in a single chunk so that we can generate the map flags 8547 // correctly. Therefore, we organize all lists in a map. 8548 enum MapKind { Present, Allocs, Other, Total }; 8549 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8550 SmallVector<SmallVector<MapInfo, 8>, 4>> 8551 Info; 8552 8553 // Helper function to fill the information map for the different supported 8554 // clauses. 8555 auto &&InfoGen = 8556 [&Info, &SkipVarSet]( 8557 const ValueDecl *D, MapKind Kind, 8558 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8559 OpenMPMapClauseKind MapType, 8560 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8561 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8562 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8563 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8564 if (SkipVarSet.contains(D)) 8565 return; 8566 auto It = Info.find(D); 8567 if (It == Info.end()) 8568 It = Info 8569 .insert(std::make_pair( 8570 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8571 .first; 8572 It->second[Kind].emplace_back( 8573 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8574 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8575 }; 8576 8577 for (const auto *Cl : Clauses) { 8578 const auto *C = dyn_cast<OMPMapClause>(Cl); 8579 if (!C) 8580 continue; 8581 MapKind Kind = Other; 8582 if (llvm::is_contained(C->getMapTypeModifiers(), 8583 OMPC_MAP_MODIFIER_present)) 8584 Kind = Present; 8585 else if (C->getMapType() == OMPC_MAP_alloc) 8586 Kind = Allocs; 8587 const auto *EI = C->getVarRefs().begin(); 8588 for (const auto L : C->component_lists()) { 8589 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8590 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8591 C->getMapTypeModifiers(), llvm::None, 8592 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8593 E); 8594 ++EI; 8595 } 8596 } 8597 for (const auto *Cl : Clauses) { 8598 const auto *C = dyn_cast<OMPToClause>(Cl); 8599 if (!C) 8600 continue; 8601 MapKind Kind = Other; 8602 if (llvm::is_contained(C->getMotionModifiers(), 8603 OMPC_MOTION_MODIFIER_present)) 8604 Kind = Present; 8605 const auto *EI = C->getVarRefs().begin(); 8606 for (const auto L : C->component_lists()) { 8607 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8608 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8609 C->isImplicit(), std::get<2>(L), *EI); 8610 ++EI; 8611 } 8612 } 8613 for (const auto *Cl : Clauses) { 8614 const auto *C = dyn_cast<OMPFromClause>(Cl); 8615 if (!C) 8616 continue; 8617 MapKind Kind = Other; 8618 if (llvm::is_contained(C->getMotionModifiers(), 8619 OMPC_MOTION_MODIFIER_present)) 8620 Kind = Present; 8621 const auto *EI = C->getVarRefs().begin(); 8622 for (const auto L : C->component_lists()) { 8623 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8624 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8625 C->isImplicit(), std::get<2>(L), *EI); 8626 ++EI; 8627 } 8628 } 8629 8630 // Look at the use_device_ptr clause information and mark the existing map 8631 // entries as such. If there is no map information for an entry in the 8632 // use_device_ptr list, we create one with map type 'alloc' and zero size 8633 // section. It is the user fault if that was not mapped before. If there is 8634 // no map information and the pointer is a struct member, then we defer the 8635 // emission of that entry until the whole struct has been processed. 8636 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8637 SmallVector<DeferredDevicePtrEntryTy, 4>> 8638 DeferredInfo; 8639 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8640 8641 for (const auto *Cl : Clauses) { 8642 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8643 if (!C) 8644 continue; 8645 for (const auto L : C->component_lists()) { 8646 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8647 std::get<1>(L); 8648 assert(!Components.empty() && 8649 "Not expecting empty list of components!"); 8650 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8651 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8652 const Expr *IE = Components.back().getAssociatedExpression(); 8653 // If the first component is a member expression, we have to look into 8654 // 'this', which maps to null in the map of map information. Otherwise 8655 // look directly for the information. 8656 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8657 8658 // We potentially have map information for this declaration already. 8659 // Look for the first set of components that refer to it. 8660 if (It != Info.end()) { 8661 bool Found = false; 8662 for (auto &Data : It->second) { 8663 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8664 return MI.Components.back().getAssociatedDeclaration() == VD; 8665 }); 8666 // If we found a map entry, signal that the pointer has to be 8667 // returned and move on to the next declaration. Exclude cases where 8668 // the base pointer is mapped as array subscript, array section or 8669 // array shaping. The base address is passed as a pointer to base in 8670 // this case and cannot be used as a base for use_device_ptr list 8671 // item. 8672 if (CI != Data.end()) { 8673 auto PrevCI = std::next(CI->Components.rbegin()); 8674 const auto *VarD = dyn_cast<VarDecl>(VD); 8675 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8676 isa<MemberExpr>(IE) || 8677 !VD->getType().getNonReferenceType()->isPointerType() || 8678 PrevCI == CI->Components.rend() || 8679 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8680 VarD->hasLocalStorage()) { 8681 CI->ReturnDevicePointer = true; 8682 Found = true; 8683 break; 8684 } 8685 } 8686 } 8687 if (Found) 8688 continue; 8689 } 8690 8691 // We didn't find any match in our map information - generate a zero 8692 // size array section - if the pointer is a struct member we defer this 8693 // action until the whole struct has been processed. 8694 if (isa<MemberExpr>(IE)) { 8695 // Insert the pointer into Info to be processed by 8696 // generateInfoForComponentList. Because it is a member pointer 8697 // without a pointee, no entry will be generated for it, therefore 8698 // we need to generate one after the whole struct has been processed. 8699 // Nonetheless, generateInfoForComponentList must be called to take 8700 // the pointer into account for the calculation of the range of the 8701 // partial struct. 8702 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8703 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8704 nullptr); 8705 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8706 } else { 8707 llvm::Value *Ptr = 8708 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8709 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8710 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8711 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8712 UseDevicePtrCombinedInfo.Sizes.push_back( 8713 llvm::Constant::getNullValue(CGF.Int64Ty)); 8714 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8715 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8716 } 8717 } 8718 } 8719 8720 // Look at the use_device_addr clause information and mark the existing map 8721 // entries as such. If there is no map information for an entry in the 8722 // use_device_addr list, we create one with map type 'alloc' and zero size 8723 // section. It is the user fault if that was not mapped before. If there is 8724 // no map information and the pointer is a struct member, then we defer the 8725 // emission of that entry until the whole struct has been processed. 8726 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8727 for (const auto *Cl : Clauses) { 8728 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8729 if (!C) 8730 continue; 8731 for (const auto L : C->component_lists()) { 8732 assert(!std::get<1>(L).empty() && 8733 "Not expecting empty list of components!"); 8734 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8735 if (!Processed.insert(VD).second) 8736 continue; 8737 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8738 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8739 // If the first component is a member expression, we have to look into 8740 // 'this', which maps to null in the map of map information. Otherwise 8741 // look directly for the information. 8742 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8743 8744 // We potentially have map information for this declaration already. 8745 // Look for the first set of components that refer to it. 8746 if (It != Info.end()) { 8747 bool Found = false; 8748 for (auto &Data : It->second) { 8749 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8750 return MI.Components.back().getAssociatedDeclaration() == VD; 8751 }); 8752 // If we found a map entry, signal that the pointer has to be 8753 // returned and move on to the next declaration. 8754 if (CI != Data.end()) { 8755 CI->ReturnDevicePointer = true; 8756 Found = true; 8757 break; 8758 } 8759 } 8760 if (Found) 8761 continue; 8762 } 8763 8764 // We didn't find any match in our map information - generate a zero 8765 // size array section - if the pointer is a struct member we defer this 8766 // action until the whole struct has been processed. 8767 if (isa<MemberExpr>(IE)) { 8768 // Insert the pointer into Info to be processed by 8769 // generateInfoForComponentList. Because it is a member pointer 8770 // without a pointee, no entry will be generated for it, therefore 8771 // we need to generate one after the whole struct has been processed. 8772 // Nonetheless, generateInfoForComponentList must be called to take 8773 // the pointer into account for the calculation of the range of the 8774 // partial struct. 8775 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8776 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8777 nullptr, nullptr, /*ForDeviceAddr=*/true); 8778 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8779 } else { 8780 llvm::Value *Ptr; 8781 if (IE->isGLValue()) 8782 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8783 else 8784 Ptr = CGF.EmitScalarExpr(IE); 8785 CombinedInfo.Exprs.push_back(VD); 8786 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8787 CombinedInfo.Pointers.push_back(Ptr); 8788 CombinedInfo.Sizes.push_back( 8789 llvm::Constant::getNullValue(CGF.Int64Ty)); 8790 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8791 CombinedInfo.Mappers.push_back(nullptr); 8792 } 8793 } 8794 } 8795 8796 for (const auto &Data : Info) { 8797 StructRangeInfoTy PartialStruct; 8798 // Temporary generated information. 8799 MapCombinedInfoTy CurInfo; 8800 const Decl *D = Data.first; 8801 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8802 for (const auto &M : Data.second) { 8803 for (const MapInfo &L : M) { 8804 assert(!L.Components.empty() && 8805 "Not expecting declaration with no component lists."); 8806 8807 // Remember the current base pointer index. 8808 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8809 CurInfo.NonContigInfo.IsNonContiguous = 8810 L.Components.back().isNonContiguous(); 8811 generateInfoForComponentList( 8812 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8813 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8814 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8815 8816 // If this entry relates with a device pointer, set the relevant 8817 // declaration and add the 'return pointer' flag. 8818 if (L.ReturnDevicePointer) { 8819 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8820 "Unexpected number of mapped base pointers."); 8821 8822 const ValueDecl *RelevantVD = 8823 L.Components.back().getAssociatedDeclaration(); 8824 assert(RelevantVD && 8825 "No relevant declaration related with device pointer??"); 8826 8827 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8828 RelevantVD); 8829 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8830 } 8831 } 8832 } 8833 8834 // Append any pending zero-length pointers which are struct members and 8835 // used with use_device_ptr or use_device_addr. 8836 auto CI = DeferredInfo.find(Data.first); 8837 if (CI != DeferredInfo.end()) { 8838 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8839 llvm::Value *BasePtr; 8840 llvm::Value *Ptr; 8841 if (L.ForDeviceAddr) { 8842 if (L.IE->isGLValue()) 8843 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8844 else 8845 Ptr = this->CGF.EmitScalarExpr(L.IE); 8846 BasePtr = Ptr; 8847 // Entry is RETURN_PARAM. Also, set the placeholder value 8848 // MEMBER_OF=FFFF so that the entry is later updated with the 8849 // correct value of MEMBER_OF. 8850 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8851 } else { 8852 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8853 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8854 L.IE->getExprLoc()); 8855 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8856 // placeholder value MEMBER_OF=FFFF so that the entry is later 8857 // updated with the correct value of MEMBER_OF. 8858 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8859 OMP_MAP_MEMBER_OF); 8860 } 8861 CurInfo.Exprs.push_back(L.VD); 8862 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8863 CurInfo.Pointers.push_back(Ptr); 8864 CurInfo.Sizes.push_back( 8865 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8866 CurInfo.Mappers.push_back(nullptr); 8867 } 8868 } 8869 // If there is an entry in PartialStruct it means we have a struct with 8870 // individual members mapped. Emit an extra combined entry. 8871 if (PartialStruct.Base.isValid()) { 8872 CurInfo.NonContigInfo.Dims.push_back(0); 8873 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8874 } 8875 8876 // We need to append the results of this capture to what we already 8877 // have. 8878 CombinedInfo.append(CurInfo); 8879 } 8880 // Append data for use_device_ptr clauses. 8881 CombinedInfo.append(UseDevicePtrCombinedInfo); 8882 } 8883 8884 public: 8885 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8886 : CurDir(&Dir), CGF(CGF) { 8887 // Extract firstprivate clause information. 8888 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8889 for (const auto *D : C->varlists()) 8890 FirstPrivateDecls.try_emplace( 8891 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8892 // Extract implicit firstprivates from uses_allocators clauses. 8893 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8894 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8895 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8896 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8897 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8898 /*Implicit=*/true); 8899 else if (const auto *VD = dyn_cast<VarDecl>( 8900 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8901 ->getDecl())) 8902 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8903 } 8904 } 8905 // Extract device pointer clause information. 8906 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8907 for (auto L : C->component_lists()) 8908 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8909 // Extract map information. 8910 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8911 if (C->getMapType() != OMPC_MAP_to) 8912 continue; 8913 for (auto L : C->component_lists()) { 8914 const ValueDecl *VD = std::get<0>(L); 8915 const auto *RD = VD ? VD->getType() 8916 .getCanonicalType() 8917 .getNonReferenceType() 8918 ->getAsCXXRecordDecl() 8919 : nullptr; 8920 if (RD && RD->isLambda()) 8921 LambdasMap.try_emplace(std::get<0>(L), C); 8922 } 8923 } 8924 } 8925 8926 /// Constructor for the declare mapper directive. 8927 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8928 : CurDir(&Dir), CGF(CGF) {} 8929 8930 /// Generate code for the combined entry if we have a partially mapped struct 8931 /// and take care of the mapping flags of the arguments corresponding to 8932 /// individual struct members. 8933 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8934 MapFlagsArrayTy &CurTypes, 8935 const StructRangeInfoTy &PartialStruct, 8936 const ValueDecl *VD = nullptr, 8937 bool NotTargetParams = true) const { 8938 if (CurTypes.size() == 1 && 8939 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8940 !PartialStruct.IsArraySection) 8941 return; 8942 Address LBAddr = PartialStruct.LowestElem.second; 8943 Address HBAddr = PartialStruct.HighestElem.second; 8944 if (PartialStruct.HasCompleteRecord) { 8945 LBAddr = PartialStruct.LB; 8946 HBAddr = PartialStruct.LB; 8947 } 8948 CombinedInfo.Exprs.push_back(VD); 8949 // Base is the base of the struct 8950 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8951 // Pointer is the address of the lowest element 8952 llvm::Value *LB = LBAddr.getPointer(); 8953 CombinedInfo.Pointers.push_back(LB); 8954 // There should not be a mapper for a combined entry. 8955 CombinedInfo.Mappers.push_back(nullptr); 8956 // Size is (addr of {highest+1} element) - (addr of lowest element) 8957 llvm::Value *HB = HBAddr.getPointer(); 8958 llvm::Value *HAddr = 8959 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8960 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8961 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8962 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8963 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8964 /*isSigned=*/false); 8965 CombinedInfo.Sizes.push_back(Size); 8966 // Map type is always TARGET_PARAM, if generate info for captures. 8967 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8968 : OMP_MAP_TARGET_PARAM); 8969 // If any element has the present modifier, then make sure the runtime 8970 // doesn't attempt to allocate the struct. 8971 if (CurTypes.end() != 8972 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8973 return Type & OMP_MAP_PRESENT; 8974 })) 8975 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8976 // Remove TARGET_PARAM flag from the first element 8977 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8978 // If any element has the ompx_hold modifier, then make sure the runtime 8979 // uses the hold reference count for the struct as a whole so that it won't 8980 // be unmapped by an extra dynamic reference count decrement. Add it to all 8981 // elements as well so the runtime knows which reference count to check 8982 // when determining whether it's time for device-to-host transfers of 8983 // individual elements. 8984 if (CurTypes.end() != 8985 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8986 return Type & OMP_MAP_OMPX_HOLD; 8987 })) { 8988 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8989 for (auto &M : CurTypes) 8990 M |= OMP_MAP_OMPX_HOLD; 8991 } 8992 8993 // All other current entries will be MEMBER_OF the combined entry 8994 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8995 // 0xFFFF in the MEMBER_OF field). 8996 OpenMPOffloadMappingFlags MemberOfFlag = 8997 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8998 for (auto &M : CurTypes) 8999 setCorrectMemberOfFlag(M, MemberOfFlag); 9000 } 9001 9002 /// Generate all the base pointers, section pointers, sizes, map types, and 9003 /// mappers for the extracted mappable expressions (all included in \a 9004 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9005 /// pair of the relevant declaration and index where it occurs is appended to 9006 /// the device pointers info array. 9007 void generateAllInfo( 9008 MapCombinedInfoTy &CombinedInfo, 9009 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9010 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9011 assert(CurDir.is<const OMPExecutableDirective *>() && 9012 "Expect a executable directive"); 9013 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9014 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9015 } 9016 9017 /// Generate all the base pointers, section pointers, sizes, map types, and 9018 /// mappers for the extracted map clauses of user-defined mapper (all included 9019 /// in \a CombinedInfo). 9020 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9021 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9022 "Expect a declare mapper directive"); 9023 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9024 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9025 } 9026 9027 /// Emit capture info for lambdas for variables captured by reference. 9028 void generateInfoForLambdaCaptures( 9029 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9030 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9031 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 9032 const auto *RD = VDType->getAsCXXRecordDecl(); 9033 if (!RD || !RD->isLambda()) 9034 return; 9035 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 9036 CGF.getContext().getDeclAlign(VD)); 9037 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 9038 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9039 FieldDecl *ThisCapture = nullptr; 9040 RD->getCaptureFields(Captures, ThisCapture); 9041 if (ThisCapture) { 9042 LValue ThisLVal = 9043 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9044 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9045 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9046 VDLVal.getPointer(CGF)); 9047 CombinedInfo.Exprs.push_back(VD); 9048 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9049 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9050 CombinedInfo.Sizes.push_back( 9051 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9052 CGF.Int64Ty, /*isSigned=*/true)); 9053 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9054 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9055 CombinedInfo.Mappers.push_back(nullptr); 9056 } 9057 for (const LambdaCapture &LC : RD->captures()) { 9058 if (!LC.capturesVariable()) 9059 continue; 9060 const VarDecl *VD = LC.getCapturedVar(); 9061 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9062 continue; 9063 auto It = Captures.find(VD); 9064 assert(It != Captures.end() && "Found lambda capture without field."); 9065 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9066 if (LC.getCaptureKind() == LCK_ByRef) { 9067 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9068 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9069 VDLVal.getPointer(CGF)); 9070 CombinedInfo.Exprs.push_back(VD); 9071 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9072 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9073 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9074 CGF.getTypeSize( 9075 VD->getType().getCanonicalType().getNonReferenceType()), 9076 CGF.Int64Ty, /*isSigned=*/true)); 9077 } else { 9078 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9079 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9080 VDLVal.getPointer(CGF)); 9081 CombinedInfo.Exprs.push_back(VD); 9082 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9083 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9084 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9085 } 9086 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9087 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9088 CombinedInfo.Mappers.push_back(nullptr); 9089 } 9090 } 9091 9092 /// Set correct indices for lambdas captures. 9093 void adjustMemberOfForLambdaCaptures( 9094 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9095 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9096 MapFlagsArrayTy &Types) const { 9097 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9098 // Set correct member_of idx for all implicit lambda captures. 9099 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9100 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9101 continue; 9102 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9103 assert(BasePtr && "Unable to find base lambda address."); 9104 int TgtIdx = -1; 9105 for (unsigned J = I; J > 0; --J) { 9106 unsigned Idx = J - 1; 9107 if (Pointers[Idx] != BasePtr) 9108 continue; 9109 TgtIdx = Idx; 9110 break; 9111 } 9112 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9113 // All other current entries will be MEMBER_OF the combined entry 9114 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9115 // 0xFFFF in the MEMBER_OF field). 9116 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9117 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9118 } 9119 } 9120 9121 /// Generate the base pointers, section pointers, sizes, map types, and 9122 /// mappers associated to a given capture (all included in \a CombinedInfo). 9123 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9124 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9125 StructRangeInfoTy &PartialStruct) const { 9126 assert(!Cap->capturesVariableArrayType() && 9127 "Not expecting to generate map info for a variable array type!"); 9128 9129 // We need to know when we generating information for the first component 9130 const ValueDecl *VD = Cap->capturesThis() 9131 ? nullptr 9132 : Cap->getCapturedVar()->getCanonicalDecl(); 9133 9134 // for map(to: lambda): skip here, processing it in 9135 // generateDefaultMapInfo 9136 if (LambdasMap.count(VD)) 9137 return; 9138 9139 // If this declaration appears in a is_device_ptr clause we just have to 9140 // pass the pointer by value. If it is a reference to a declaration, we just 9141 // pass its value. 9142 if (DevPointersMap.count(VD)) { 9143 CombinedInfo.Exprs.push_back(VD); 9144 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9145 CombinedInfo.Pointers.push_back(Arg); 9146 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9147 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9148 /*isSigned=*/true)); 9149 CombinedInfo.Types.push_back( 9150 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9151 OMP_MAP_TARGET_PARAM); 9152 CombinedInfo.Mappers.push_back(nullptr); 9153 return; 9154 } 9155 9156 using MapData = 9157 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9158 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9159 const ValueDecl *, const Expr *>; 9160 SmallVector<MapData, 4> DeclComponentLists; 9161 assert(CurDir.is<const OMPExecutableDirective *>() && 9162 "Expect a executable directive"); 9163 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9164 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9165 const auto *EI = C->getVarRefs().begin(); 9166 for (const auto L : C->decl_component_lists(VD)) { 9167 const ValueDecl *VDecl, *Mapper; 9168 // The Expression is not correct if the mapping is implicit 9169 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9170 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9171 std::tie(VDecl, Components, Mapper) = L; 9172 assert(VDecl == VD && "We got information for the wrong declaration??"); 9173 assert(!Components.empty() && 9174 "Not expecting declaration with no component lists."); 9175 DeclComponentLists.emplace_back(Components, C->getMapType(), 9176 C->getMapTypeModifiers(), 9177 C->isImplicit(), Mapper, E); 9178 ++EI; 9179 } 9180 } 9181 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9182 const MapData &RHS) { 9183 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9184 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9185 bool HasPresent = 9186 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9187 bool HasAllocs = MapType == OMPC_MAP_alloc; 9188 MapModifiers = std::get<2>(RHS); 9189 MapType = std::get<1>(LHS); 9190 bool HasPresentR = 9191 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9192 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9193 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9194 }); 9195 9196 // Find overlapping elements (including the offset from the base element). 9197 llvm::SmallDenseMap< 9198 const MapData *, 9199 llvm::SmallVector< 9200 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9201 4> 9202 OverlappedData; 9203 size_t Count = 0; 9204 for (const MapData &L : DeclComponentLists) { 9205 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9206 OpenMPMapClauseKind MapType; 9207 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9208 bool IsImplicit; 9209 const ValueDecl *Mapper; 9210 const Expr *VarRef; 9211 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9212 L; 9213 ++Count; 9214 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9215 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9216 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9217 VarRef) = L1; 9218 auto CI = Components.rbegin(); 9219 auto CE = Components.rend(); 9220 auto SI = Components1.rbegin(); 9221 auto SE = Components1.rend(); 9222 for (; CI != CE && SI != SE; ++CI, ++SI) { 9223 if (CI->getAssociatedExpression()->getStmtClass() != 9224 SI->getAssociatedExpression()->getStmtClass()) 9225 break; 9226 // Are we dealing with different variables/fields? 9227 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9228 break; 9229 } 9230 // Found overlapping if, at least for one component, reached the head 9231 // of the components list. 9232 if (CI == CE || SI == SE) { 9233 // Ignore it if it is the same component. 9234 if (CI == CE && SI == SE) 9235 continue; 9236 const auto It = (SI == SE) ? CI : SI; 9237 // If one component is a pointer and another one is a kind of 9238 // dereference of this pointer (array subscript, section, dereference, 9239 // etc.), it is not an overlapping. 9240 // Same, if one component is a base and another component is a 9241 // dereferenced pointer memberexpr with the same base. 9242 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9243 (std::prev(It)->getAssociatedDeclaration() && 9244 std::prev(It) 9245 ->getAssociatedDeclaration() 9246 ->getType() 9247 ->isPointerType()) || 9248 (It->getAssociatedDeclaration() && 9249 It->getAssociatedDeclaration()->getType()->isPointerType() && 9250 std::next(It) != CE && std::next(It) != SE)) 9251 continue; 9252 const MapData &BaseData = CI == CE ? L : L1; 9253 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9254 SI == SE ? Components : Components1; 9255 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9256 OverlappedElements.getSecond().push_back(SubData); 9257 } 9258 } 9259 } 9260 // Sort the overlapped elements for each item. 9261 llvm::SmallVector<const FieldDecl *, 4> Layout; 9262 if (!OverlappedData.empty()) { 9263 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9264 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9265 while (BaseType != OrigType) { 9266 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9267 OrigType = BaseType->getPointeeOrArrayElementType(); 9268 } 9269 9270 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9271 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9272 else { 9273 const auto *RD = BaseType->getAsRecordDecl(); 9274 Layout.append(RD->field_begin(), RD->field_end()); 9275 } 9276 } 9277 for (auto &Pair : OverlappedData) { 9278 llvm::stable_sort( 9279 Pair.getSecond(), 9280 [&Layout]( 9281 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9282 OMPClauseMappableExprCommon::MappableExprComponentListRef 9283 Second) { 9284 auto CI = First.rbegin(); 9285 auto CE = First.rend(); 9286 auto SI = Second.rbegin(); 9287 auto SE = Second.rend(); 9288 for (; CI != CE && SI != SE; ++CI, ++SI) { 9289 if (CI->getAssociatedExpression()->getStmtClass() != 9290 SI->getAssociatedExpression()->getStmtClass()) 9291 break; 9292 // Are we dealing with different variables/fields? 9293 if (CI->getAssociatedDeclaration() != 9294 SI->getAssociatedDeclaration()) 9295 break; 9296 } 9297 9298 // Lists contain the same elements. 9299 if (CI == CE && SI == SE) 9300 return false; 9301 9302 // List with less elements is less than list with more elements. 9303 if (CI == CE || SI == SE) 9304 return CI == CE; 9305 9306 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9307 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9308 if (FD1->getParent() == FD2->getParent()) 9309 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9310 const auto *It = 9311 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9312 return FD == FD1 || FD == FD2; 9313 }); 9314 return *It == FD1; 9315 }); 9316 } 9317 9318 // Associated with a capture, because the mapping flags depend on it. 9319 // Go through all of the elements with the overlapped elements. 9320 bool IsFirstComponentList = true; 9321 for (const auto &Pair : OverlappedData) { 9322 const MapData &L = *Pair.getFirst(); 9323 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9324 OpenMPMapClauseKind MapType; 9325 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9326 bool IsImplicit; 9327 const ValueDecl *Mapper; 9328 const Expr *VarRef; 9329 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9330 L; 9331 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9332 OverlappedComponents = Pair.getSecond(); 9333 generateInfoForComponentList( 9334 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9335 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9336 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9337 IsFirstComponentList = false; 9338 } 9339 // Go through other elements without overlapped elements. 9340 for (const MapData &L : DeclComponentLists) { 9341 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9342 OpenMPMapClauseKind MapType; 9343 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9344 bool IsImplicit; 9345 const ValueDecl *Mapper; 9346 const Expr *VarRef; 9347 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9348 L; 9349 auto It = OverlappedData.find(&L); 9350 if (It == OverlappedData.end()) 9351 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9352 Components, CombinedInfo, PartialStruct, 9353 IsFirstComponentList, IsImplicit, Mapper, 9354 /*ForDeviceAddr=*/false, VD, VarRef); 9355 IsFirstComponentList = false; 9356 } 9357 } 9358 9359 /// Generate the default map information for a given capture \a CI, 9360 /// record field declaration \a RI and captured value \a CV. 9361 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9362 const FieldDecl &RI, llvm::Value *CV, 9363 MapCombinedInfoTy &CombinedInfo) const { 9364 bool IsImplicit = true; 9365 // Do the default mapping. 9366 if (CI.capturesThis()) { 9367 CombinedInfo.Exprs.push_back(nullptr); 9368 CombinedInfo.BasePointers.push_back(CV); 9369 CombinedInfo.Pointers.push_back(CV); 9370 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9371 CombinedInfo.Sizes.push_back( 9372 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9373 CGF.Int64Ty, /*isSigned=*/true)); 9374 // Default map type. 9375 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9376 } else if (CI.capturesVariableByCopy()) { 9377 const VarDecl *VD = CI.getCapturedVar(); 9378 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9379 CombinedInfo.BasePointers.push_back(CV); 9380 CombinedInfo.Pointers.push_back(CV); 9381 if (!RI.getType()->isAnyPointerType()) { 9382 // We have to signal to the runtime captures passed by value that are 9383 // not pointers. 9384 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9385 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9386 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9387 } else { 9388 // Pointers are implicitly mapped with a zero size and no flags 9389 // (other than first map that is added for all implicit maps). 9390 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9391 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9392 } 9393 auto I = FirstPrivateDecls.find(VD); 9394 if (I != FirstPrivateDecls.end()) 9395 IsImplicit = I->getSecond(); 9396 } else { 9397 assert(CI.capturesVariable() && "Expected captured reference."); 9398 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9399 QualType ElementType = PtrTy->getPointeeType(); 9400 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9401 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9402 // The default map type for a scalar/complex type is 'to' because by 9403 // default the value doesn't have to be retrieved. For an aggregate 9404 // type, the default is 'tofrom'. 9405 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9406 const VarDecl *VD = CI.getCapturedVar(); 9407 auto I = FirstPrivateDecls.find(VD); 9408 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9409 CombinedInfo.BasePointers.push_back(CV); 9410 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9411 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9412 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9413 AlignmentSource::Decl)); 9414 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9415 } else { 9416 CombinedInfo.Pointers.push_back(CV); 9417 } 9418 if (I != FirstPrivateDecls.end()) 9419 IsImplicit = I->getSecond(); 9420 } 9421 // Every default map produces a single argument which is a target parameter. 9422 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9423 9424 // Add flag stating this is an implicit map. 9425 if (IsImplicit) 9426 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9427 9428 // No user-defined mapper for default mapping. 9429 CombinedInfo.Mappers.push_back(nullptr); 9430 } 9431 }; 9432 } // anonymous namespace 9433 9434 static void emitNonContiguousDescriptor( 9435 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9436 CGOpenMPRuntime::TargetDataInfo &Info) { 9437 CodeGenModule &CGM = CGF.CGM; 9438 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9439 &NonContigInfo = CombinedInfo.NonContigInfo; 9440 9441 // Build an array of struct descriptor_dim and then assign it to 9442 // offload_args. 9443 // 9444 // struct descriptor_dim { 9445 // uint64_t offset; 9446 // uint64_t count; 9447 // uint64_t stride 9448 // }; 9449 ASTContext &C = CGF.getContext(); 9450 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9451 RecordDecl *RD; 9452 RD = C.buildImplicitRecord("descriptor_dim"); 9453 RD->startDefinition(); 9454 addFieldToRecordDecl(C, RD, Int64Ty); 9455 addFieldToRecordDecl(C, RD, Int64Ty); 9456 addFieldToRecordDecl(C, RD, Int64Ty); 9457 RD->completeDefinition(); 9458 QualType DimTy = C.getRecordType(RD); 9459 9460 enum { OffsetFD = 0, CountFD, StrideFD }; 9461 // We need two index variable here since the size of "Dims" is the same as the 9462 // size of Components, however, the size of offset, count, and stride is equal 9463 // to the size of base declaration that is non-contiguous. 9464 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9465 // Skip emitting ir if dimension size is 1 since it cannot be 9466 // non-contiguous. 9467 if (NonContigInfo.Dims[I] == 1) 9468 continue; 9469 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9470 QualType ArrayTy = 9471 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9472 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9473 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9474 unsigned RevIdx = EE - II - 1; 9475 LValue DimsLVal = CGF.MakeAddrLValue( 9476 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9477 // Offset 9478 LValue OffsetLVal = CGF.EmitLValueForField( 9479 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9480 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9481 // Count 9482 LValue CountLVal = CGF.EmitLValueForField( 9483 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9484 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9485 // Stride 9486 LValue StrideLVal = CGF.EmitLValueForField( 9487 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9488 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9489 } 9490 // args[I] = &dims 9491 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9492 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9493 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9494 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9495 Info.PointersArray, 0, I); 9496 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); 9497 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9498 ++L; 9499 } 9500 } 9501 9502 // Try to extract the base declaration from a `this->x` expression if possible. 9503 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9504 if (!E) 9505 return nullptr; 9506 9507 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9508 if (const MemberExpr *ME = 9509 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9510 return ME->getMemberDecl(); 9511 return nullptr; 9512 } 9513 9514 /// Emit a string constant containing the names of the values mapped to the 9515 /// offloading runtime library. 9516 llvm::Constant * 9517 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9518 MappableExprsHandler::MappingExprInfo &MapExprs) { 9519 9520 uint32_t SrcLocStrSize; 9521 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9522 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9523 9524 SourceLocation Loc; 9525 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9526 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9527 Loc = VD->getLocation(); 9528 else 9529 Loc = MapExprs.getMapExpr()->getExprLoc(); 9530 } else { 9531 Loc = MapExprs.getMapDecl()->getLocation(); 9532 } 9533 9534 std::string ExprName; 9535 if (MapExprs.getMapExpr()) { 9536 PrintingPolicy P(CGF.getContext().getLangOpts()); 9537 llvm::raw_string_ostream OS(ExprName); 9538 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9539 OS.flush(); 9540 } else { 9541 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9542 } 9543 9544 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9545 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9546 PLoc.getLine(), PLoc.getColumn(), 9547 SrcLocStrSize); 9548 } 9549 9550 /// Emit the arrays used to pass the captures and map information to the 9551 /// offloading runtime library. If there is no map or capture information, 9552 /// return nullptr by reference. 9553 static void emitOffloadingArrays( 9554 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9555 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9556 bool IsNonContiguous = false) { 9557 CodeGenModule &CGM = CGF.CGM; 9558 ASTContext &Ctx = CGF.getContext(); 9559 9560 // Reset the array information. 9561 Info.clearArrayInfo(); 9562 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9563 9564 if (Info.NumberOfPtrs) { 9565 // Detect if we have any capture size requiring runtime evaluation of the 9566 // size so that a constant array could be eventually used. 9567 9568 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9569 QualType PointerArrayType = Ctx.getConstantArrayType( 9570 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9571 /*IndexTypeQuals=*/0); 9572 9573 Info.BasePointersArray = 9574 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9575 Info.PointersArray = 9576 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9577 Address MappersArray = 9578 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9579 Info.MappersArray = MappersArray.getPointer(); 9580 9581 // If we don't have any VLA types or other types that require runtime 9582 // evaluation, we can use a constant array for the map sizes, otherwise we 9583 // need to fill up the arrays as we do for the pointers. 9584 QualType Int64Ty = 9585 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9586 SmallVector<llvm::Constant *> ConstSizes( 9587 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9588 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9589 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9590 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9591 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9592 if (IsNonContiguous && (CombinedInfo.Types[I] & 9593 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9594 ConstSizes[I] = llvm::ConstantInt::get( 9595 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9596 else 9597 ConstSizes[I] = CI; 9598 continue; 9599 } 9600 } 9601 RuntimeSizes.set(I); 9602 } 9603 9604 if (RuntimeSizes.all()) { 9605 QualType SizeArrayType = Ctx.getConstantArrayType( 9606 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9607 /*IndexTypeQuals=*/0); 9608 Info.SizesArray = 9609 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9610 } else { 9611 auto *SizesArrayInit = llvm::ConstantArray::get( 9612 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9613 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9614 auto *SizesArrayGbl = new llvm::GlobalVariable( 9615 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9616 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9617 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9618 if (RuntimeSizes.any()) { 9619 QualType SizeArrayType = Ctx.getConstantArrayType( 9620 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9621 /*IndexTypeQuals=*/0); 9622 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9623 llvm::Value *GblConstPtr = 9624 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9625 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9626 CGF.Builder.CreateMemCpy( 9627 Buffer, 9628 Address(GblConstPtr, CGM.Int64Ty, 9629 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9630 /*DestWidth=*/64, /*Signed=*/false))), 9631 CGF.getTypeSize(SizeArrayType)); 9632 Info.SizesArray = Buffer.getPointer(); 9633 } else { 9634 Info.SizesArray = SizesArrayGbl; 9635 } 9636 } 9637 9638 // The map types are always constant so we don't need to generate code to 9639 // fill arrays. Instead, we create an array constant. 9640 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9641 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9642 std::string MaptypesName = 9643 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9644 auto *MapTypesArrayGbl = 9645 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9646 Info.MapTypesArray = MapTypesArrayGbl; 9647 9648 // The information types are only built if there is debug information 9649 // requested. 9650 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9651 Info.MapNamesArray = llvm::Constant::getNullValue( 9652 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9653 } else { 9654 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9655 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9656 }; 9657 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9658 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9659 std::string MapnamesName = 9660 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9661 auto *MapNamesArrayGbl = 9662 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9663 Info.MapNamesArray = MapNamesArrayGbl; 9664 } 9665 9666 // If there's a present map type modifier, it must not be applied to the end 9667 // of a region, so generate a separate map type array in that case. 9668 if (Info.separateBeginEndCalls()) { 9669 bool EndMapTypesDiffer = false; 9670 for (uint64_t &Type : Mapping) { 9671 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9672 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9673 EndMapTypesDiffer = true; 9674 } 9675 } 9676 if (EndMapTypesDiffer) { 9677 MapTypesArrayGbl = 9678 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9679 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9680 } 9681 } 9682 9683 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9684 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9685 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9686 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9687 Info.BasePointersArray, 0, I); 9688 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9689 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9690 Address BPAddr(BP, BPVal->getType(), 9691 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9692 CGF.Builder.CreateStore(BPVal, BPAddr); 9693 9694 if (Info.requiresDevicePointerInfo()) 9695 if (const ValueDecl *DevVD = 9696 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9697 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9698 9699 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9700 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9701 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9702 Info.PointersArray, 0, I); 9703 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9704 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9705 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9706 CGF.Builder.CreateStore(PVal, PAddr); 9707 9708 if (RuntimeSizes.test(I)) { 9709 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9710 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9711 Info.SizesArray, 9712 /*Idx0=*/0, 9713 /*Idx1=*/I); 9714 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); 9715 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9716 CGM.Int64Ty, 9717 /*isSigned=*/true), 9718 SAddr); 9719 } 9720 9721 // Fill up the mapper array. 9722 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9723 if (CombinedInfo.Mappers[I]) { 9724 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9725 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9726 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9727 Info.HasMapper = true; 9728 } 9729 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9730 CGF.Builder.CreateStore(MFunc, MAddr); 9731 } 9732 } 9733 9734 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9735 Info.NumberOfPtrs == 0) 9736 return; 9737 9738 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9739 } 9740 9741 namespace { 9742 /// Additional arguments for emitOffloadingArraysArgument function. 9743 struct ArgumentsOptions { 9744 bool ForEndCall = false; 9745 ArgumentsOptions() = default; 9746 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9747 }; 9748 } // namespace 9749 9750 /// Emit the arguments to be passed to the runtime library based on the 9751 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9752 /// ForEndCall, emit map types to be passed for the end of the region instead of 9753 /// the beginning. 9754 static void emitOffloadingArraysArgument( 9755 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9756 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9757 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9758 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9759 const ArgumentsOptions &Options = ArgumentsOptions()) { 9760 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9761 "expected region end call to runtime only when end call is separate"); 9762 CodeGenModule &CGM = CGF.CGM; 9763 if (Info.NumberOfPtrs) { 9764 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9765 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9766 Info.BasePointersArray, 9767 /*Idx0=*/0, /*Idx1=*/0); 9768 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9769 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9770 Info.PointersArray, 9771 /*Idx0=*/0, 9772 /*Idx1=*/0); 9773 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9774 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9775 /*Idx0=*/0, /*Idx1=*/0); 9776 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9777 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9778 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9779 : Info.MapTypesArray, 9780 /*Idx0=*/0, 9781 /*Idx1=*/0); 9782 9783 // Only emit the mapper information arrays if debug information is 9784 // requested. 9785 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9786 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9787 else 9788 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9789 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9790 Info.MapNamesArray, 9791 /*Idx0=*/0, 9792 /*Idx1=*/0); 9793 // If there is no user-defined mapper, set the mapper array to nullptr to 9794 // avoid an unnecessary data privatization 9795 if (!Info.HasMapper) 9796 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9797 else 9798 MappersArrayArg = 9799 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9800 } else { 9801 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9802 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9803 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9804 MapTypesArrayArg = 9805 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9806 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9807 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9808 } 9809 } 9810 9811 /// Check for inner distribute directive. 9812 static const OMPExecutableDirective * 9813 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9814 const auto *CS = D.getInnermostCapturedStmt(); 9815 const auto *Body = 9816 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9817 const Stmt *ChildStmt = 9818 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9819 9820 if (const auto *NestedDir = 9821 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9822 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9823 switch (D.getDirectiveKind()) { 9824 case OMPD_target: 9825 if (isOpenMPDistributeDirective(DKind)) 9826 return NestedDir; 9827 if (DKind == OMPD_teams) { 9828 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9829 /*IgnoreCaptured=*/true); 9830 if (!Body) 9831 return nullptr; 9832 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9833 if (const auto *NND = 9834 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9835 DKind = NND->getDirectiveKind(); 9836 if (isOpenMPDistributeDirective(DKind)) 9837 return NND; 9838 } 9839 } 9840 return nullptr; 9841 case OMPD_target_teams: 9842 if (isOpenMPDistributeDirective(DKind)) 9843 return NestedDir; 9844 return nullptr; 9845 case OMPD_target_parallel: 9846 case OMPD_target_simd: 9847 case OMPD_target_parallel_for: 9848 case OMPD_target_parallel_for_simd: 9849 return nullptr; 9850 case OMPD_target_teams_distribute: 9851 case OMPD_target_teams_distribute_simd: 9852 case OMPD_target_teams_distribute_parallel_for: 9853 case OMPD_target_teams_distribute_parallel_for_simd: 9854 case OMPD_parallel: 9855 case OMPD_for: 9856 case OMPD_parallel_for: 9857 case OMPD_parallel_master: 9858 case OMPD_parallel_sections: 9859 case OMPD_for_simd: 9860 case OMPD_parallel_for_simd: 9861 case OMPD_cancel: 9862 case OMPD_cancellation_point: 9863 case OMPD_ordered: 9864 case OMPD_threadprivate: 9865 case OMPD_allocate: 9866 case OMPD_task: 9867 case OMPD_simd: 9868 case OMPD_tile: 9869 case OMPD_unroll: 9870 case OMPD_sections: 9871 case OMPD_section: 9872 case OMPD_single: 9873 case OMPD_master: 9874 case OMPD_critical: 9875 case OMPD_taskyield: 9876 case OMPD_barrier: 9877 case OMPD_taskwait: 9878 case OMPD_taskgroup: 9879 case OMPD_atomic: 9880 case OMPD_flush: 9881 case OMPD_depobj: 9882 case OMPD_scan: 9883 case OMPD_teams: 9884 case OMPD_target_data: 9885 case OMPD_target_exit_data: 9886 case OMPD_target_enter_data: 9887 case OMPD_distribute: 9888 case OMPD_distribute_simd: 9889 case OMPD_distribute_parallel_for: 9890 case OMPD_distribute_parallel_for_simd: 9891 case OMPD_teams_distribute: 9892 case OMPD_teams_distribute_simd: 9893 case OMPD_teams_distribute_parallel_for: 9894 case OMPD_teams_distribute_parallel_for_simd: 9895 case OMPD_target_update: 9896 case OMPD_declare_simd: 9897 case OMPD_declare_variant: 9898 case OMPD_begin_declare_variant: 9899 case OMPD_end_declare_variant: 9900 case OMPD_declare_target: 9901 case OMPD_end_declare_target: 9902 case OMPD_declare_reduction: 9903 case OMPD_declare_mapper: 9904 case OMPD_taskloop: 9905 case OMPD_taskloop_simd: 9906 case OMPD_master_taskloop: 9907 case OMPD_master_taskloop_simd: 9908 case OMPD_parallel_master_taskloop: 9909 case OMPD_parallel_master_taskloop_simd: 9910 case OMPD_requires: 9911 case OMPD_metadirective: 9912 case OMPD_unknown: 9913 default: 9914 llvm_unreachable("Unexpected directive."); 9915 } 9916 } 9917 9918 return nullptr; 9919 } 9920 9921 /// Emit the user-defined mapper function. The code generation follows the 9922 /// pattern in the example below. 9923 /// \code 9924 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9925 /// void *base, void *begin, 9926 /// int64_t size, int64_t type, 9927 /// void *name = nullptr) { 9928 /// // Allocate space for an array section first or add a base/begin for 9929 /// // pointer dereference. 9930 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9931 /// !maptype.IsDelete) 9932 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9933 /// size*sizeof(Ty), clearToFromMember(type)); 9934 /// // Map members. 9935 /// for (unsigned i = 0; i < size; i++) { 9936 /// // For each component specified by this mapper: 9937 /// for (auto c : begin[i]->all_components) { 9938 /// if (c.hasMapper()) 9939 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9940 /// c.arg_type, c.arg_name); 9941 /// else 9942 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9943 /// c.arg_begin, c.arg_size, c.arg_type, 9944 /// c.arg_name); 9945 /// } 9946 /// } 9947 /// // Delete the array section. 9948 /// if (size > 1 && maptype.IsDelete) 9949 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9950 /// size*sizeof(Ty), clearToFromMember(type)); 9951 /// } 9952 /// \endcode 9953 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9954 CodeGenFunction *CGF) { 9955 if (UDMMap.count(D) > 0) 9956 return; 9957 ASTContext &C = CGM.getContext(); 9958 QualType Ty = D->getType(); 9959 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9960 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9961 auto *MapperVarDecl = 9962 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9963 SourceLocation Loc = D->getLocation(); 9964 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9965 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9966 9967 // Prepare mapper function arguments and attributes. 9968 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9969 C.VoidPtrTy, ImplicitParamDecl::Other); 9970 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9971 ImplicitParamDecl::Other); 9972 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9973 C.VoidPtrTy, ImplicitParamDecl::Other); 9974 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9975 ImplicitParamDecl::Other); 9976 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9977 ImplicitParamDecl::Other); 9978 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9979 ImplicitParamDecl::Other); 9980 FunctionArgList Args; 9981 Args.push_back(&HandleArg); 9982 Args.push_back(&BaseArg); 9983 Args.push_back(&BeginArg); 9984 Args.push_back(&SizeArg); 9985 Args.push_back(&TypeArg); 9986 Args.push_back(&NameArg); 9987 const CGFunctionInfo &FnInfo = 9988 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9989 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9990 SmallString<64> TyStr; 9991 llvm::raw_svector_ostream Out(TyStr); 9992 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9993 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9994 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9995 Name, &CGM.getModule()); 9996 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9997 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9998 // Start the mapper function code generation. 9999 CodeGenFunction MapperCGF(CGM); 10000 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10001 // Compute the starting and end addresses of array elements. 10002 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10003 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10004 C.getPointerType(Int64Ty), Loc); 10005 // Prepare common arguments for array initiation and deletion. 10006 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10007 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10008 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10009 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10010 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10011 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10012 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10013 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10014 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10015 // Convert the size in bytes into the number of array elements. 10016 Size = MapperCGF.Builder.CreateExactUDiv( 10017 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10018 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10019 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10020 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 10021 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10022 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10023 C.getPointerType(Int64Ty), Loc); 10024 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10025 MapperCGF.GetAddrOfLocalVar(&NameArg), 10026 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10027 10028 // Emit array initiation if this is an array section and \p MapType indicates 10029 // that memory allocation is required. 10030 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10031 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10032 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10033 10034 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10035 10036 // Emit the loop header block. 10037 MapperCGF.EmitBlock(HeadBB); 10038 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10039 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10040 // Evaluate whether the initial condition is satisfied. 10041 llvm::Value *IsEmpty = 10042 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10043 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10044 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10045 10046 // Emit the loop body block. 10047 MapperCGF.EmitBlock(BodyBB); 10048 llvm::BasicBlock *LastBB = BodyBB; 10049 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10050 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10051 PtrPHI->addIncoming(PtrBegin, EntryBB); 10052 Address PtrCurrent(PtrPHI, ElemTy, 10053 MapperCGF.GetAddrOfLocalVar(&BeginArg) 10054 .getAlignment() 10055 .alignmentOfArrayElement(ElementSize)); 10056 // Privatize the declared variable of mapper to be the current array element. 10057 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10058 Scope.addPrivate(MapperVarDecl, PtrCurrent); 10059 (void)Scope.Privatize(); 10060 10061 // Get map clause information. Fill up the arrays with all mapped variables. 10062 MappableExprsHandler::MapCombinedInfoTy Info; 10063 MappableExprsHandler MEHandler(*D, MapperCGF); 10064 MEHandler.generateAllInfoForMapper(Info); 10065 10066 // Call the runtime API __tgt_mapper_num_components to get the number of 10067 // pre-existing components. 10068 llvm::Value *OffloadingArgs[] = {Handle}; 10069 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10070 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10071 OMPRTL___tgt_mapper_num_components), 10072 OffloadingArgs); 10073 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10074 PreviousSize, 10075 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10076 10077 // Fill up the runtime mapper handle for all components. 10078 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10079 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10080 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10081 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10082 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10083 llvm::Value *CurSizeArg = Info.Sizes[I]; 10084 llvm::Value *CurNameArg = 10085 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10086 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10087 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10088 10089 // Extract the MEMBER_OF field from the map type. 10090 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10091 llvm::Value *MemberMapType = 10092 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10093 10094 // Combine the map type inherited from user-defined mapper with that 10095 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10096 // bits of the \a MapType, which is the input argument of the mapper 10097 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10098 // bits of MemberMapType. 10099 // [OpenMP 5.0], 1.2.6. map-type decay. 10100 // | alloc | to | from | tofrom | release | delete 10101 // ---------------------------------------------------------- 10102 // alloc | alloc | alloc | alloc | alloc | release | delete 10103 // to | alloc | to | alloc | to | release | delete 10104 // from | alloc | alloc | from | from | release | delete 10105 // tofrom | alloc | to | from | tofrom | release | delete 10106 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10107 MapType, 10108 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10109 MappableExprsHandler::OMP_MAP_FROM)); 10110 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10111 llvm::BasicBlock *AllocElseBB = 10112 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10113 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10114 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10115 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10116 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10117 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10118 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10119 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10120 MapperCGF.EmitBlock(AllocBB); 10121 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10122 MemberMapType, 10123 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10124 MappableExprsHandler::OMP_MAP_FROM))); 10125 MapperCGF.Builder.CreateBr(EndBB); 10126 MapperCGF.EmitBlock(AllocElseBB); 10127 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10128 LeftToFrom, 10129 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10130 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10131 // In case of to, clear OMP_MAP_FROM. 10132 MapperCGF.EmitBlock(ToBB); 10133 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10134 MemberMapType, 10135 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10136 MapperCGF.Builder.CreateBr(EndBB); 10137 MapperCGF.EmitBlock(ToElseBB); 10138 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10139 LeftToFrom, 10140 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10141 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10142 // In case of from, clear OMP_MAP_TO. 10143 MapperCGF.EmitBlock(FromBB); 10144 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10145 MemberMapType, 10146 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10147 // In case of tofrom, do nothing. 10148 MapperCGF.EmitBlock(EndBB); 10149 LastBB = EndBB; 10150 llvm::PHINode *CurMapType = 10151 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10152 CurMapType->addIncoming(AllocMapType, AllocBB); 10153 CurMapType->addIncoming(ToMapType, ToBB); 10154 CurMapType->addIncoming(FromMapType, FromBB); 10155 CurMapType->addIncoming(MemberMapType, ToElseBB); 10156 10157 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10158 CurSizeArg, CurMapType, CurNameArg}; 10159 if (Info.Mappers[I]) { 10160 // Call the corresponding mapper function. 10161 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10162 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10163 assert(MapperFunc && "Expect a valid mapper function is available."); 10164 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10165 } else { 10166 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10167 // data structure. 10168 MapperCGF.EmitRuntimeCall( 10169 OMPBuilder.getOrCreateRuntimeFunction( 10170 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10171 OffloadingArgs); 10172 } 10173 } 10174 10175 // Update the pointer to point to the next element that needs to be mapped, 10176 // and check whether we have mapped all elements. 10177 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10178 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10179 PtrPHI->addIncoming(PtrNext, LastBB); 10180 llvm::Value *IsDone = 10181 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10182 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10183 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10184 10185 MapperCGF.EmitBlock(ExitBB); 10186 // Emit array deletion if this is an array section and \p MapType indicates 10187 // that deletion is required. 10188 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10189 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10190 10191 // Emit the function exit block. 10192 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10193 MapperCGF.FinishFunction(); 10194 UDMMap.try_emplace(D, Fn); 10195 if (CGF) { 10196 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10197 Decls.second.push_back(D); 10198 } 10199 } 10200 10201 /// Emit the array initialization or deletion portion for user-defined mapper 10202 /// code generation. First, it evaluates whether an array section is mapped and 10203 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10204 /// true, and \a MapType indicates to not delete this array, array 10205 /// initialization code is generated. If \a IsInit is false, and \a MapType 10206 /// indicates to not this array, array deletion code is generated. 10207 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10208 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10209 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10210 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10211 bool IsInit) { 10212 StringRef Prefix = IsInit ? ".init" : ".del"; 10213 10214 // Evaluate if this is an array section. 10215 llvm::BasicBlock *BodyBB = 10216 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10217 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10218 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10219 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10220 MapType, 10221 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10222 llvm::Value *DeleteCond; 10223 llvm::Value *Cond; 10224 if (IsInit) { 10225 // base != begin? 10226 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10227 // IsPtrAndObj? 10228 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10229 MapType, 10230 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10231 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10232 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10233 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10234 DeleteCond = MapperCGF.Builder.CreateIsNull( 10235 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10236 } else { 10237 Cond = IsArray; 10238 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10239 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10240 } 10241 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10242 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10243 10244 MapperCGF.EmitBlock(BodyBB); 10245 // Get the array size by multiplying element size and element number (i.e., \p 10246 // Size). 10247 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10248 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10249 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10250 // memory allocation/deletion purpose only. 10251 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10252 MapType, 10253 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10254 MappableExprsHandler::OMP_MAP_FROM))); 10255 MapTypeArg = MapperCGF.Builder.CreateOr( 10256 MapTypeArg, 10257 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10258 10259 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10260 // data structure. 10261 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10262 ArraySize, MapTypeArg, MapName}; 10263 MapperCGF.EmitRuntimeCall( 10264 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10265 OMPRTL___tgt_push_mapper_component), 10266 OffloadingArgs); 10267 } 10268 10269 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10270 const OMPDeclareMapperDecl *D) { 10271 auto I = UDMMap.find(D); 10272 if (I != UDMMap.end()) 10273 return I->second; 10274 emitUserDefinedMapper(D); 10275 return UDMMap.lookup(D); 10276 } 10277 10278 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10279 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10280 llvm::Value *DeviceID, 10281 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10282 const OMPLoopDirective &D)> 10283 SizeEmitter) { 10284 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10285 const OMPExecutableDirective *TD = &D; 10286 // Get nested teams distribute kind directive, if any. 10287 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10288 TD = getNestedDistributeDirective(CGM.getContext(), D); 10289 if (!TD) 10290 return; 10291 const auto *LD = cast<OMPLoopDirective>(TD); 10292 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10293 PrePostActionTy &) { 10294 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10295 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10296 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10297 CGF.EmitRuntimeCall( 10298 OMPBuilder.getOrCreateRuntimeFunction( 10299 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10300 Args); 10301 } 10302 }; 10303 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10304 } 10305 10306 void CGOpenMPRuntime::emitTargetCall( 10307 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10308 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10309 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10310 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10311 const OMPLoopDirective &D)> 10312 SizeEmitter) { 10313 if (!CGF.HaveInsertPoint()) 10314 return; 10315 10316 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10317 CGM.getLangOpts().OpenMPOffloadMandatory; 10318 10319 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10320 10321 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10322 D.hasClausesOfKind<OMPNowaitClause>(); 10323 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10324 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10325 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10326 PrePostActionTy &) { 10327 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10328 }; 10329 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10330 10331 CodeGenFunction::OMPTargetDataInfo InputInfo; 10332 llvm::Value *MapTypesArray = nullptr; 10333 llvm::Value *MapNamesArray = nullptr; 10334 // Generate code for the host fallback function. 10335 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10336 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10337 if (OffloadingMandatory) { 10338 CGF.Builder.CreateUnreachable(); 10339 } else { 10340 if (RequiresOuterTask) { 10341 CapturedVars.clear(); 10342 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10343 } 10344 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10345 } 10346 }; 10347 // Fill up the pointer arrays and transfer execution to the device. 10348 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10349 &MapNamesArray, SizeEmitter, 10350 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10351 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10352 // Reverse offloading is not supported, so just execute on the host. 10353 FallbackGen(CGF); 10354 return; 10355 } 10356 10357 // On top of the arrays that were filled up, the target offloading call 10358 // takes as arguments the device id as well as the host pointer. The host 10359 // pointer is used by the runtime library to identify the current target 10360 // region, so it only has to be unique and not necessarily point to 10361 // anything. It could be the pointer to the outlined function that 10362 // implements the target region, but we aren't using that so that the 10363 // compiler doesn't need to keep that, and could therefore inline the host 10364 // function if proven worthwhile during optimization. 10365 10366 // From this point on, we need to have an ID of the target region defined. 10367 assert(OutlinedFnID && "Invalid outlined function ID!"); 10368 (void)OutlinedFnID; 10369 10370 // Emit device ID if any. 10371 llvm::Value *DeviceID; 10372 if (Device.getPointer()) { 10373 assert((Device.getInt() == OMPC_DEVICE_unknown || 10374 Device.getInt() == OMPC_DEVICE_device_num) && 10375 "Expected device_num modifier."); 10376 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10377 DeviceID = 10378 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10379 } else { 10380 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10381 } 10382 10383 // Emit the number of elements in the offloading arrays. 10384 llvm::Value *PointerNum = 10385 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10386 10387 // Return value of the runtime offloading call. 10388 llvm::Value *Return; 10389 10390 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10391 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10392 10393 // Source location for the ident struct 10394 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10395 10396 // Emit tripcount for the target loop-based directive. 10397 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10398 10399 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10400 // The target region is an outlined function launched by the runtime 10401 // via calls __tgt_target() or __tgt_target_teams(). 10402 // 10403 // __tgt_target() launches a target region with one team and one thread, 10404 // executing a serial region. This master thread may in turn launch 10405 // more threads within its team upon encountering a parallel region, 10406 // however, no additional teams can be launched on the device. 10407 // 10408 // __tgt_target_teams() launches a target region with one or more teams, 10409 // each with one or more threads. This call is required for target 10410 // constructs such as: 10411 // 'target teams' 10412 // 'target' / 'teams' 10413 // 'target teams distribute parallel for' 10414 // 'target parallel' 10415 // and so on. 10416 // 10417 // Note that on the host and CPU targets, the runtime implementation of 10418 // these calls simply call the outlined function without forking threads. 10419 // The outlined functions themselves have runtime calls to 10420 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10421 // the compiler in emitTeamsCall() and emitParallelCall(). 10422 // 10423 // In contrast, on the NVPTX target, the implementation of 10424 // __tgt_target_teams() launches a GPU kernel with the requested number 10425 // of teams and threads so no additional calls to the runtime are required. 10426 if (NumTeams) { 10427 // If we have NumTeams defined this means that we have an enclosed teams 10428 // region. Therefore we also expect to have NumThreads defined. These two 10429 // values should be defined in the presence of a teams directive, 10430 // regardless of having any clauses associated. If the user is using teams 10431 // but no clauses, these two values will be the default that should be 10432 // passed to the runtime library - a 32-bit integer with the value zero. 10433 assert(NumThreads && "Thread limit expression should be available along " 10434 "with number of teams."); 10435 SmallVector<llvm::Value *> OffloadingArgs = { 10436 RTLoc, 10437 DeviceID, 10438 OutlinedFnID, 10439 PointerNum, 10440 InputInfo.BasePointersArray.getPointer(), 10441 InputInfo.PointersArray.getPointer(), 10442 InputInfo.SizesArray.getPointer(), 10443 MapTypesArray, 10444 MapNamesArray, 10445 InputInfo.MappersArray.getPointer(), 10446 NumTeams, 10447 NumThreads}; 10448 if (HasNowait) { 10449 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10450 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10451 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10452 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10453 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10454 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10455 } 10456 Return = CGF.EmitRuntimeCall( 10457 OMPBuilder.getOrCreateRuntimeFunction( 10458 CGM.getModule(), HasNowait 10459 ? OMPRTL___tgt_target_teams_nowait_mapper 10460 : OMPRTL___tgt_target_teams_mapper), 10461 OffloadingArgs); 10462 } else { 10463 SmallVector<llvm::Value *> OffloadingArgs = { 10464 RTLoc, 10465 DeviceID, 10466 OutlinedFnID, 10467 PointerNum, 10468 InputInfo.BasePointersArray.getPointer(), 10469 InputInfo.PointersArray.getPointer(), 10470 InputInfo.SizesArray.getPointer(), 10471 MapTypesArray, 10472 MapNamesArray, 10473 InputInfo.MappersArray.getPointer()}; 10474 if (HasNowait) { 10475 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10476 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10477 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10478 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10479 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10480 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10481 } 10482 Return = CGF.EmitRuntimeCall( 10483 OMPBuilder.getOrCreateRuntimeFunction( 10484 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10485 : OMPRTL___tgt_target_mapper), 10486 OffloadingArgs); 10487 } 10488 10489 // Check the error code and execute the host version if required. 10490 llvm::BasicBlock *OffloadFailedBlock = 10491 CGF.createBasicBlock("omp_offload.failed"); 10492 llvm::BasicBlock *OffloadContBlock = 10493 CGF.createBasicBlock("omp_offload.cont"); 10494 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10495 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10496 10497 CGF.EmitBlock(OffloadFailedBlock); 10498 FallbackGen(CGF); 10499 10500 CGF.EmitBranch(OffloadContBlock); 10501 10502 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10503 }; 10504 10505 // Notify that the host version must be executed. 10506 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10507 FallbackGen(CGF); 10508 }; 10509 10510 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10511 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10512 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10513 // Fill up the arrays with all the captured variables. 10514 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10515 10516 // Get mappable expression information. 10517 MappableExprsHandler MEHandler(D, CGF); 10518 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10519 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10520 10521 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10522 auto *CV = CapturedVars.begin(); 10523 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10524 CE = CS.capture_end(); 10525 CI != CE; ++CI, ++RI, ++CV) { 10526 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10527 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10528 10529 // VLA sizes are passed to the outlined region by copy and do not have map 10530 // information associated. 10531 if (CI->capturesVariableArrayType()) { 10532 CurInfo.Exprs.push_back(nullptr); 10533 CurInfo.BasePointers.push_back(*CV); 10534 CurInfo.Pointers.push_back(*CV); 10535 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10536 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10537 // Copy to the device as an argument. No need to retrieve it. 10538 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10539 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10540 MappableExprsHandler::OMP_MAP_IMPLICIT); 10541 CurInfo.Mappers.push_back(nullptr); 10542 } else { 10543 // If we have any information in the map clause, we use it, otherwise we 10544 // just do a default mapping. 10545 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10546 if (!CI->capturesThis()) 10547 MappedVarSet.insert(CI->getCapturedVar()); 10548 else 10549 MappedVarSet.insert(nullptr); 10550 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10551 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10552 // Generate correct mapping for variables captured by reference in 10553 // lambdas. 10554 if (CI->capturesVariable()) 10555 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10556 CurInfo, LambdaPointers); 10557 } 10558 // We expect to have at least an element of information for this capture. 10559 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10560 "Non-existing map pointer for capture!"); 10561 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10562 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10563 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10564 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10565 "Inconsistent map information sizes!"); 10566 10567 // If there is an entry in PartialStruct it means we have a struct with 10568 // individual members mapped. Emit an extra combined entry. 10569 if (PartialStruct.Base.isValid()) { 10570 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10571 MEHandler.emitCombinedEntry( 10572 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10573 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10574 } 10575 10576 // We need to append the results of this capture to what we already have. 10577 CombinedInfo.append(CurInfo); 10578 } 10579 // Adjust MEMBER_OF flags for the lambdas captures. 10580 MEHandler.adjustMemberOfForLambdaCaptures( 10581 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10582 CombinedInfo.Types); 10583 // Map any list items in a map clause that were not captures because they 10584 // weren't referenced within the construct. 10585 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10586 10587 TargetDataInfo Info; 10588 // Fill up the arrays and create the arguments. 10589 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10590 emitOffloadingArraysArgument( 10591 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10592 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10593 {/*ForEndCall=*/false}); 10594 10595 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10596 InputInfo.BasePointersArray = 10597 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10598 InputInfo.PointersArray = 10599 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10600 InputInfo.SizesArray = 10601 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10602 InputInfo.MappersArray = 10603 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10604 MapTypesArray = Info.MapTypesArray; 10605 MapNamesArray = Info.MapNamesArray; 10606 if (RequiresOuterTask) 10607 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10608 else 10609 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10610 }; 10611 10612 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10613 CodeGenFunction &CGF, PrePostActionTy &) { 10614 if (RequiresOuterTask) { 10615 CodeGenFunction::OMPTargetDataInfo InputInfo; 10616 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10617 } else { 10618 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10619 } 10620 }; 10621 10622 // If we have a target function ID it means that we need to support 10623 // offloading, otherwise, just execute on the host. We need to execute on host 10624 // regardless of the conditional in the if clause if, e.g., the user do not 10625 // specify target triples. 10626 if (OutlinedFnID) { 10627 if (IfCond) { 10628 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10629 } else { 10630 RegionCodeGenTy ThenRCG(TargetThenGen); 10631 ThenRCG(CGF); 10632 } 10633 } else { 10634 RegionCodeGenTy ElseRCG(TargetElseGen); 10635 ElseRCG(CGF); 10636 } 10637 } 10638 10639 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10640 StringRef ParentName) { 10641 if (!S) 10642 return; 10643 10644 // Codegen OMP target directives that offload compute to the device. 10645 bool RequiresDeviceCodegen = 10646 isa<OMPExecutableDirective>(S) && 10647 isOpenMPTargetExecutionDirective( 10648 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10649 10650 if (RequiresDeviceCodegen) { 10651 const auto &E = *cast<OMPExecutableDirective>(S); 10652 unsigned DeviceID; 10653 unsigned FileID; 10654 unsigned Line; 10655 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10656 FileID, Line); 10657 10658 // Is this a target region that should not be emitted as an entry point? If 10659 // so just signal we are done with this target region. 10660 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10661 ParentName, Line)) 10662 return; 10663 10664 switch (E.getDirectiveKind()) { 10665 case OMPD_target: 10666 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10667 cast<OMPTargetDirective>(E)); 10668 break; 10669 case OMPD_target_parallel: 10670 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10671 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10672 break; 10673 case OMPD_target_teams: 10674 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10675 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10676 break; 10677 case OMPD_target_teams_distribute: 10678 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10679 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10680 break; 10681 case OMPD_target_teams_distribute_simd: 10682 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10683 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10684 break; 10685 case OMPD_target_parallel_for: 10686 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10687 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10688 break; 10689 case OMPD_target_parallel_for_simd: 10690 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10691 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10692 break; 10693 case OMPD_target_simd: 10694 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10695 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10696 break; 10697 case OMPD_target_teams_distribute_parallel_for: 10698 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10699 CGM, ParentName, 10700 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10701 break; 10702 case OMPD_target_teams_distribute_parallel_for_simd: 10703 CodeGenFunction:: 10704 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10705 CGM, ParentName, 10706 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10707 break; 10708 case OMPD_parallel: 10709 case OMPD_for: 10710 case OMPD_parallel_for: 10711 case OMPD_parallel_master: 10712 case OMPD_parallel_sections: 10713 case OMPD_for_simd: 10714 case OMPD_parallel_for_simd: 10715 case OMPD_cancel: 10716 case OMPD_cancellation_point: 10717 case OMPD_ordered: 10718 case OMPD_threadprivate: 10719 case OMPD_allocate: 10720 case OMPD_task: 10721 case OMPD_simd: 10722 case OMPD_tile: 10723 case OMPD_unroll: 10724 case OMPD_sections: 10725 case OMPD_section: 10726 case OMPD_single: 10727 case OMPD_master: 10728 case OMPD_critical: 10729 case OMPD_taskyield: 10730 case OMPD_barrier: 10731 case OMPD_taskwait: 10732 case OMPD_taskgroup: 10733 case OMPD_atomic: 10734 case OMPD_flush: 10735 case OMPD_depobj: 10736 case OMPD_scan: 10737 case OMPD_teams: 10738 case OMPD_target_data: 10739 case OMPD_target_exit_data: 10740 case OMPD_target_enter_data: 10741 case OMPD_distribute: 10742 case OMPD_distribute_simd: 10743 case OMPD_distribute_parallel_for: 10744 case OMPD_distribute_parallel_for_simd: 10745 case OMPD_teams_distribute: 10746 case OMPD_teams_distribute_simd: 10747 case OMPD_teams_distribute_parallel_for: 10748 case OMPD_teams_distribute_parallel_for_simd: 10749 case OMPD_target_update: 10750 case OMPD_declare_simd: 10751 case OMPD_declare_variant: 10752 case OMPD_begin_declare_variant: 10753 case OMPD_end_declare_variant: 10754 case OMPD_declare_target: 10755 case OMPD_end_declare_target: 10756 case OMPD_declare_reduction: 10757 case OMPD_declare_mapper: 10758 case OMPD_taskloop: 10759 case OMPD_taskloop_simd: 10760 case OMPD_master_taskloop: 10761 case OMPD_master_taskloop_simd: 10762 case OMPD_parallel_master_taskloop: 10763 case OMPD_parallel_master_taskloop_simd: 10764 case OMPD_requires: 10765 case OMPD_metadirective: 10766 case OMPD_unknown: 10767 default: 10768 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10769 } 10770 return; 10771 } 10772 10773 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10774 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10775 return; 10776 10777 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10778 return; 10779 } 10780 10781 // If this is a lambda function, look into its body. 10782 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10783 S = L->getBody(); 10784 10785 // Keep looking for target regions recursively. 10786 for (const Stmt *II : S->children()) 10787 scanForTargetRegionsFunctions(II, ParentName); 10788 } 10789 10790 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10791 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10792 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10793 if (!DevTy) 10794 return false; 10795 // Do not emit device_type(nohost) functions for the host. 10796 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10797 return true; 10798 // Do not emit device_type(host) functions for the device. 10799 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10800 return true; 10801 return false; 10802 } 10803 10804 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10805 // If emitting code for the host, we do not process FD here. Instead we do 10806 // the normal code generation. 10807 if (!CGM.getLangOpts().OpenMPIsDevice) { 10808 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10809 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10810 CGM.getLangOpts().OpenMPIsDevice)) 10811 return true; 10812 return false; 10813 } 10814 10815 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10816 // Try to detect target regions in the function. 10817 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10818 StringRef Name = CGM.getMangledName(GD); 10819 scanForTargetRegionsFunctions(FD->getBody(), Name); 10820 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10821 CGM.getLangOpts().OpenMPIsDevice)) 10822 return true; 10823 } 10824 10825 // Do not to emit function if it is not marked as declare target. 10826 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10827 AlreadyEmittedTargetDecls.count(VD) == 0; 10828 } 10829 10830 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10831 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10832 CGM.getLangOpts().OpenMPIsDevice)) 10833 return true; 10834 10835 if (!CGM.getLangOpts().OpenMPIsDevice) 10836 return false; 10837 10838 // Check if there are Ctors/Dtors in this declaration and look for target 10839 // regions in it. We use the complete variant to produce the kernel name 10840 // mangling. 10841 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10842 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10843 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10844 StringRef ParentName = 10845 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10846 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10847 } 10848 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10849 StringRef ParentName = 10850 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10851 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10852 } 10853 } 10854 10855 // Do not to emit variable if it is not marked as declare target. 10856 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10857 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10858 cast<VarDecl>(GD.getDecl())); 10859 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10860 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10861 HasRequiresUnifiedSharedMemory)) { 10862 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10863 return true; 10864 } 10865 return false; 10866 } 10867 10868 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10869 llvm::Constant *Addr) { 10870 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10871 !CGM.getLangOpts().OpenMPIsDevice) 10872 return; 10873 10874 // If we have host/nohost variables, they do not need to be registered. 10875 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10876 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10877 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10878 return; 10879 10880 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10881 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10882 if (!Res) { 10883 if (CGM.getLangOpts().OpenMPIsDevice) { 10884 // Register non-target variables being emitted in device code (debug info 10885 // may cause this). 10886 StringRef VarName = CGM.getMangledName(VD); 10887 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10888 } 10889 return; 10890 } 10891 // Register declare target variables. 10892 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10893 StringRef VarName; 10894 CharUnits VarSize; 10895 llvm::GlobalValue::LinkageTypes Linkage; 10896 10897 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10898 !HasRequiresUnifiedSharedMemory) { 10899 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10900 VarName = CGM.getMangledName(VD); 10901 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10902 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10903 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10904 } else { 10905 VarSize = CharUnits::Zero(); 10906 } 10907 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10908 // Temp solution to prevent optimizations of the internal variables. 10909 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10910 // Do not create a "ref-variable" if the original is not also available 10911 // on the host. 10912 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10913 return; 10914 std::string RefName = getName({VarName, "ref"}); 10915 if (!CGM.GetGlobalValue(RefName)) { 10916 llvm::Constant *AddrRef = 10917 getOrCreateInternalVariable(Addr->getType(), RefName); 10918 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10919 GVAddrRef->setConstant(/*Val=*/true); 10920 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10921 GVAddrRef->setInitializer(Addr); 10922 CGM.addCompilerUsedGlobal(GVAddrRef); 10923 } 10924 } 10925 } else { 10926 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10927 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10928 HasRequiresUnifiedSharedMemory)) && 10929 "Declare target attribute must link or to with unified memory."); 10930 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10931 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10932 else 10933 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10934 10935 if (CGM.getLangOpts().OpenMPIsDevice) { 10936 VarName = Addr->getName(); 10937 Addr = nullptr; 10938 } else { 10939 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10940 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10941 } 10942 VarSize = CGM.getPointerSize(); 10943 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10944 } 10945 10946 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10947 VarName, Addr, VarSize, Flags, Linkage); 10948 } 10949 10950 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10951 if (isa<FunctionDecl>(GD.getDecl()) || 10952 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10953 return emitTargetFunctions(GD); 10954 10955 return emitTargetGlobalVariable(GD); 10956 } 10957 10958 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10959 for (const VarDecl *VD : DeferredGlobalVariables) { 10960 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10961 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10962 if (!Res) 10963 continue; 10964 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10965 !HasRequiresUnifiedSharedMemory) { 10966 CGM.EmitGlobal(VD); 10967 } else { 10968 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10969 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10970 HasRequiresUnifiedSharedMemory)) && 10971 "Expected link clause or to clause with unified memory."); 10972 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10973 } 10974 } 10975 } 10976 10977 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10978 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10979 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10980 " Expected target-based directive."); 10981 } 10982 10983 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10984 for (const OMPClause *Clause : D->clauselists()) { 10985 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10986 HasRequiresUnifiedSharedMemory = true; 10987 } else if (const auto *AC = 10988 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10989 switch (AC->getAtomicDefaultMemOrderKind()) { 10990 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10991 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10992 break; 10993 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10994 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10995 break; 10996 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10997 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10998 break; 10999 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11000 break; 11001 } 11002 } 11003 } 11004 } 11005 11006 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11007 return RequiresAtomicOrdering; 11008 } 11009 11010 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11011 LangAS &AS) { 11012 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11013 return false; 11014 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11015 switch(A->getAllocatorType()) { 11016 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11017 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11018 // Not supported, fallback to the default mem space. 11019 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11020 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11021 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11022 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11023 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11024 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11025 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11026 AS = LangAS::Default; 11027 return true; 11028 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11029 llvm_unreachable("Expected predefined allocator for the variables with the " 11030 "static storage."); 11031 } 11032 return false; 11033 } 11034 11035 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11036 return HasRequiresUnifiedSharedMemory; 11037 } 11038 11039 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11040 CodeGenModule &CGM) 11041 : CGM(CGM) { 11042 if (CGM.getLangOpts().OpenMPIsDevice) { 11043 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11044 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11045 } 11046 } 11047 11048 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11049 if (CGM.getLangOpts().OpenMPIsDevice) 11050 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11051 } 11052 11053 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11054 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11055 return true; 11056 11057 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11058 // Do not to emit function if it is marked as declare target as it was already 11059 // emitted. 11060 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11061 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11062 if (auto *F = dyn_cast_or_null<llvm::Function>( 11063 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11064 return !F->isDeclaration(); 11065 return false; 11066 } 11067 return true; 11068 } 11069 11070 return !AlreadyEmittedTargetDecls.insert(D).second; 11071 } 11072 11073 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11074 // If we don't have entries or if we are emitting code for the device, we 11075 // don't need to do anything. 11076 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11077 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11078 (OffloadEntriesInfoManager.empty() && 11079 !HasEmittedDeclareTargetRegion && 11080 !HasEmittedTargetRegion)) 11081 return nullptr; 11082 11083 // Create and register the function that handles the requires directives. 11084 ASTContext &C = CGM.getContext(); 11085 11086 llvm::Function *RequiresRegFn; 11087 { 11088 CodeGenFunction CGF(CGM); 11089 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11090 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11091 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11092 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11093 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11094 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11095 // TODO: check for other requires clauses. 11096 // The requires directive takes effect only when a target region is 11097 // present in the compilation unit. Otherwise it is ignored and not 11098 // passed to the runtime. This avoids the runtime from throwing an error 11099 // for mismatching requires clauses across compilation units that don't 11100 // contain at least 1 target region. 11101 assert((HasEmittedTargetRegion || 11102 HasEmittedDeclareTargetRegion || 11103 !OffloadEntriesInfoManager.empty()) && 11104 "Target or declare target region expected."); 11105 if (HasRequiresUnifiedSharedMemory) 11106 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11107 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11108 CGM.getModule(), OMPRTL___tgt_register_requires), 11109 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11110 CGF.FinishFunction(); 11111 } 11112 return RequiresRegFn; 11113 } 11114 11115 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11116 const OMPExecutableDirective &D, 11117 SourceLocation Loc, 11118 llvm::Function *OutlinedFn, 11119 ArrayRef<llvm::Value *> CapturedVars) { 11120 if (!CGF.HaveInsertPoint()) 11121 return; 11122 11123 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11124 CodeGenFunction::RunCleanupsScope Scope(CGF); 11125 11126 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11127 llvm::Value *Args[] = { 11128 RTLoc, 11129 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11130 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11131 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11132 RealArgs.append(std::begin(Args), std::end(Args)); 11133 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11134 11135 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11136 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11137 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11138 } 11139 11140 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11141 const Expr *NumTeams, 11142 const Expr *ThreadLimit, 11143 SourceLocation Loc) { 11144 if (!CGF.HaveInsertPoint()) 11145 return; 11146 11147 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11148 11149 llvm::Value *NumTeamsVal = 11150 NumTeams 11151 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11152 CGF.CGM.Int32Ty, /* isSigned = */ true) 11153 : CGF.Builder.getInt32(0); 11154 11155 llvm::Value *ThreadLimitVal = 11156 ThreadLimit 11157 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11158 CGF.CGM.Int32Ty, /* isSigned = */ true) 11159 : CGF.Builder.getInt32(0); 11160 11161 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11162 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11163 ThreadLimitVal}; 11164 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11165 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11166 PushNumTeamsArgs); 11167 } 11168 11169 void CGOpenMPRuntime::emitTargetDataCalls( 11170 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11171 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11172 if (!CGF.HaveInsertPoint()) 11173 return; 11174 11175 // Action used to replace the default codegen action and turn privatization 11176 // off. 11177 PrePostActionTy NoPrivAction; 11178 11179 // Generate the code for the opening of the data environment. Capture all the 11180 // arguments of the runtime call by reference because they are used in the 11181 // closing of the region. 11182 auto &&BeginThenGen = [this, &D, Device, &Info, 11183 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11184 // Fill up the arrays with all the mapped variables. 11185 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11186 11187 // Get map clause information. 11188 MappableExprsHandler MEHandler(D, CGF); 11189 MEHandler.generateAllInfo(CombinedInfo); 11190 11191 // Fill up the arrays and create the arguments. 11192 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11193 /*IsNonContiguous=*/true); 11194 11195 llvm::Value *BasePointersArrayArg = nullptr; 11196 llvm::Value *PointersArrayArg = nullptr; 11197 llvm::Value *SizesArrayArg = nullptr; 11198 llvm::Value *MapTypesArrayArg = nullptr; 11199 llvm::Value *MapNamesArrayArg = nullptr; 11200 llvm::Value *MappersArrayArg = nullptr; 11201 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11202 SizesArrayArg, MapTypesArrayArg, 11203 MapNamesArrayArg, MappersArrayArg, Info); 11204 11205 // Emit device ID if any. 11206 llvm::Value *DeviceID = nullptr; 11207 if (Device) { 11208 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11209 CGF.Int64Ty, /*isSigned=*/true); 11210 } else { 11211 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11212 } 11213 11214 // Emit the number of elements in the offloading arrays. 11215 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11216 // 11217 // Source location for the ident struct 11218 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11219 11220 llvm::Value *OffloadingArgs[] = {RTLoc, 11221 DeviceID, 11222 PointerNum, 11223 BasePointersArrayArg, 11224 PointersArrayArg, 11225 SizesArrayArg, 11226 MapTypesArrayArg, 11227 MapNamesArrayArg, 11228 MappersArrayArg}; 11229 CGF.EmitRuntimeCall( 11230 OMPBuilder.getOrCreateRuntimeFunction( 11231 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11232 OffloadingArgs); 11233 11234 // If device pointer privatization is required, emit the body of the region 11235 // here. It will have to be duplicated: with and without privatization. 11236 if (!Info.CaptureDeviceAddrMap.empty()) 11237 CodeGen(CGF); 11238 }; 11239 11240 // Generate code for the closing of the data region. 11241 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11242 PrePostActionTy &) { 11243 assert(Info.isValid() && "Invalid data environment closing arguments."); 11244 11245 llvm::Value *BasePointersArrayArg = nullptr; 11246 llvm::Value *PointersArrayArg = nullptr; 11247 llvm::Value *SizesArrayArg = nullptr; 11248 llvm::Value *MapTypesArrayArg = nullptr; 11249 llvm::Value *MapNamesArrayArg = nullptr; 11250 llvm::Value *MappersArrayArg = nullptr; 11251 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11252 SizesArrayArg, MapTypesArrayArg, 11253 MapNamesArrayArg, MappersArrayArg, Info, 11254 {/*ForEndCall=*/true}); 11255 11256 // Emit device ID if any. 11257 llvm::Value *DeviceID = nullptr; 11258 if (Device) { 11259 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11260 CGF.Int64Ty, /*isSigned=*/true); 11261 } else { 11262 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11263 } 11264 11265 // Emit the number of elements in the offloading arrays. 11266 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11267 11268 // Source location for the ident struct 11269 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11270 11271 llvm::Value *OffloadingArgs[] = {RTLoc, 11272 DeviceID, 11273 PointerNum, 11274 BasePointersArrayArg, 11275 PointersArrayArg, 11276 SizesArrayArg, 11277 MapTypesArrayArg, 11278 MapNamesArrayArg, 11279 MappersArrayArg}; 11280 CGF.EmitRuntimeCall( 11281 OMPBuilder.getOrCreateRuntimeFunction( 11282 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11283 OffloadingArgs); 11284 }; 11285 11286 // If we need device pointer privatization, we need to emit the body of the 11287 // region with no privatization in the 'else' branch of the conditional. 11288 // Otherwise, we don't have to do anything. 11289 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11290 PrePostActionTy &) { 11291 if (!Info.CaptureDeviceAddrMap.empty()) { 11292 CodeGen.setAction(NoPrivAction); 11293 CodeGen(CGF); 11294 } 11295 }; 11296 11297 // We don't have to do anything to close the region if the if clause evaluates 11298 // to false. 11299 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11300 11301 if (IfCond) { 11302 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11303 } else { 11304 RegionCodeGenTy RCG(BeginThenGen); 11305 RCG(CGF); 11306 } 11307 11308 // If we don't require privatization of device pointers, we emit the body in 11309 // between the runtime calls. This avoids duplicating the body code. 11310 if (Info.CaptureDeviceAddrMap.empty()) { 11311 CodeGen.setAction(NoPrivAction); 11312 CodeGen(CGF); 11313 } 11314 11315 if (IfCond) { 11316 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11317 } else { 11318 RegionCodeGenTy RCG(EndThenGen); 11319 RCG(CGF); 11320 } 11321 } 11322 11323 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11324 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11325 const Expr *Device) { 11326 if (!CGF.HaveInsertPoint()) 11327 return; 11328 11329 assert((isa<OMPTargetEnterDataDirective>(D) || 11330 isa<OMPTargetExitDataDirective>(D) || 11331 isa<OMPTargetUpdateDirective>(D)) && 11332 "Expecting either target enter, exit data, or update directives."); 11333 11334 CodeGenFunction::OMPTargetDataInfo InputInfo; 11335 llvm::Value *MapTypesArray = nullptr; 11336 llvm::Value *MapNamesArray = nullptr; 11337 // Generate the code for the opening of the data environment. 11338 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11339 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11340 // Emit device ID if any. 11341 llvm::Value *DeviceID = nullptr; 11342 if (Device) { 11343 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11344 CGF.Int64Ty, /*isSigned=*/true); 11345 } else { 11346 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11347 } 11348 11349 // Emit the number of elements in the offloading arrays. 11350 llvm::Constant *PointerNum = 11351 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11352 11353 // Source location for the ident struct 11354 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11355 11356 llvm::Value *OffloadingArgs[] = {RTLoc, 11357 DeviceID, 11358 PointerNum, 11359 InputInfo.BasePointersArray.getPointer(), 11360 InputInfo.PointersArray.getPointer(), 11361 InputInfo.SizesArray.getPointer(), 11362 MapTypesArray, 11363 MapNamesArray, 11364 InputInfo.MappersArray.getPointer()}; 11365 11366 // Select the right runtime function call for each standalone 11367 // directive. 11368 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11369 RuntimeFunction RTLFn; 11370 switch (D.getDirectiveKind()) { 11371 case OMPD_target_enter_data: 11372 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11373 : OMPRTL___tgt_target_data_begin_mapper; 11374 break; 11375 case OMPD_target_exit_data: 11376 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11377 : OMPRTL___tgt_target_data_end_mapper; 11378 break; 11379 case OMPD_target_update: 11380 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11381 : OMPRTL___tgt_target_data_update_mapper; 11382 break; 11383 case OMPD_parallel: 11384 case OMPD_for: 11385 case OMPD_parallel_for: 11386 case OMPD_parallel_master: 11387 case OMPD_parallel_sections: 11388 case OMPD_for_simd: 11389 case OMPD_parallel_for_simd: 11390 case OMPD_cancel: 11391 case OMPD_cancellation_point: 11392 case OMPD_ordered: 11393 case OMPD_threadprivate: 11394 case OMPD_allocate: 11395 case OMPD_task: 11396 case OMPD_simd: 11397 case OMPD_tile: 11398 case OMPD_unroll: 11399 case OMPD_sections: 11400 case OMPD_section: 11401 case OMPD_single: 11402 case OMPD_master: 11403 case OMPD_critical: 11404 case OMPD_taskyield: 11405 case OMPD_barrier: 11406 case OMPD_taskwait: 11407 case OMPD_taskgroup: 11408 case OMPD_atomic: 11409 case OMPD_flush: 11410 case OMPD_depobj: 11411 case OMPD_scan: 11412 case OMPD_teams: 11413 case OMPD_target_data: 11414 case OMPD_distribute: 11415 case OMPD_distribute_simd: 11416 case OMPD_distribute_parallel_for: 11417 case OMPD_distribute_parallel_for_simd: 11418 case OMPD_teams_distribute: 11419 case OMPD_teams_distribute_simd: 11420 case OMPD_teams_distribute_parallel_for: 11421 case OMPD_teams_distribute_parallel_for_simd: 11422 case OMPD_declare_simd: 11423 case OMPD_declare_variant: 11424 case OMPD_begin_declare_variant: 11425 case OMPD_end_declare_variant: 11426 case OMPD_declare_target: 11427 case OMPD_end_declare_target: 11428 case OMPD_declare_reduction: 11429 case OMPD_declare_mapper: 11430 case OMPD_taskloop: 11431 case OMPD_taskloop_simd: 11432 case OMPD_master_taskloop: 11433 case OMPD_master_taskloop_simd: 11434 case OMPD_parallel_master_taskloop: 11435 case OMPD_parallel_master_taskloop_simd: 11436 case OMPD_target: 11437 case OMPD_target_simd: 11438 case OMPD_target_teams_distribute: 11439 case OMPD_target_teams_distribute_simd: 11440 case OMPD_target_teams_distribute_parallel_for: 11441 case OMPD_target_teams_distribute_parallel_for_simd: 11442 case OMPD_target_teams: 11443 case OMPD_target_parallel: 11444 case OMPD_target_parallel_for: 11445 case OMPD_target_parallel_for_simd: 11446 case OMPD_requires: 11447 case OMPD_metadirective: 11448 case OMPD_unknown: 11449 default: 11450 llvm_unreachable("Unexpected standalone target data directive."); 11451 break; 11452 } 11453 CGF.EmitRuntimeCall( 11454 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11455 OffloadingArgs); 11456 }; 11457 11458 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11459 &MapNamesArray](CodeGenFunction &CGF, 11460 PrePostActionTy &) { 11461 // Fill up the arrays with all the mapped variables. 11462 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11463 11464 // Get map clause information. 11465 MappableExprsHandler MEHandler(D, CGF); 11466 MEHandler.generateAllInfo(CombinedInfo); 11467 11468 TargetDataInfo Info; 11469 // Fill up the arrays and create the arguments. 11470 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11471 /*IsNonContiguous=*/true); 11472 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11473 D.hasClausesOfKind<OMPNowaitClause>(); 11474 emitOffloadingArraysArgument( 11475 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11476 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11477 {/*ForEndCall=*/false}); 11478 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11479 InputInfo.BasePointersArray = 11480 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11481 InputInfo.PointersArray = 11482 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11483 InputInfo.SizesArray = 11484 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 11485 InputInfo.MappersArray = 11486 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11487 MapTypesArray = Info.MapTypesArray; 11488 MapNamesArray = Info.MapNamesArray; 11489 if (RequiresOuterTask) 11490 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11491 else 11492 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11493 }; 11494 11495 if (IfCond) { 11496 emitIfClause(CGF, IfCond, TargetThenGen, 11497 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11498 } else { 11499 RegionCodeGenTy ThenRCG(TargetThenGen); 11500 ThenRCG(CGF); 11501 } 11502 } 11503 11504 namespace { 11505 /// Kind of parameter in a function with 'declare simd' directive. 11506 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11507 /// Attribute set of the parameter. 11508 struct ParamAttrTy { 11509 ParamKindTy Kind = Vector; 11510 llvm::APSInt StrideOrArg; 11511 llvm::APSInt Alignment; 11512 }; 11513 } // namespace 11514 11515 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11516 ArrayRef<ParamAttrTy> ParamAttrs) { 11517 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11518 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11519 // of that clause. The VLEN value must be power of 2. 11520 // In other case the notion of the function`s "characteristic data type" (CDT) 11521 // is used to compute the vector length. 11522 // CDT is defined in the following order: 11523 // a) For non-void function, the CDT is the return type. 11524 // b) If the function has any non-uniform, non-linear parameters, then the 11525 // CDT is the type of the first such parameter. 11526 // c) If the CDT determined by a) or b) above is struct, union, or class 11527 // type which is pass-by-value (except for the type that maps to the 11528 // built-in complex data type), the characteristic data type is int. 11529 // d) If none of the above three cases is applicable, the CDT is int. 11530 // The VLEN is then determined based on the CDT and the size of vector 11531 // register of that ISA for which current vector version is generated. The 11532 // VLEN is computed using the formula below: 11533 // VLEN = sizeof(vector_register) / sizeof(CDT), 11534 // where vector register size specified in section 3.2.1 Registers and the 11535 // Stack Frame of original AMD64 ABI document. 11536 QualType RetType = FD->getReturnType(); 11537 if (RetType.isNull()) 11538 return 0; 11539 ASTContext &C = FD->getASTContext(); 11540 QualType CDT; 11541 if (!RetType.isNull() && !RetType->isVoidType()) { 11542 CDT = RetType; 11543 } else { 11544 unsigned Offset = 0; 11545 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11546 if (ParamAttrs[Offset].Kind == Vector) 11547 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11548 ++Offset; 11549 } 11550 if (CDT.isNull()) { 11551 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11552 if (ParamAttrs[I + Offset].Kind == Vector) { 11553 CDT = FD->getParamDecl(I)->getType(); 11554 break; 11555 } 11556 } 11557 } 11558 } 11559 if (CDT.isNull()) 11560 CDT = C.IntTy; 11561 CDT = CDT->getCanonicalTypeUnqualified(); 11562 if (CDT->isRecordType() || CDT->isUnionType()) 11563 CDT = C.IntTy; 11564 return C.getTypeSize(CDT); 11565 } 11566 11567 static void 11568 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11569 const llvm::APSInt &VLENVal, 11570 ArrayRef<ParamAttrTy> ParamAttrs, 11571 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11572 struct ISADataTy { 11573 char ISA; 11574 unsigned VecRegSize; 11575 }; 11576 ISADataTy ISAData[] = { 11577 { 11578 'b', 128 11579 }, // SSE 11580 { 11581 'c', 256 11582 }, // AVX 11583 { 11584 'd', 256 11585 }, // AVX2 11586 { 11587 'e', 512 11588 }, // AVX512 11589 }; 11590 llvm::SmallVector<char, 2> Masked; 11591 switch (State) { 11592 case OMPDeclareSimdDeclAttr::BS_Undefined: 11593 Masked.push_back('N'); 11594 Masked.push_back('M'); 11595 break; 11596 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11597 Masked.push_back('N'); 11598 break; 11599 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11600 Masked.push_back('M'); 11601 break; 11602 } 11603 for (char Mask : Masked) { 11604 for (const ISADataTy &Data : ISAData) { 11605 SmallString<256> Buffer; 11606 llvm::raw_svector_ostream Out(Buffer); 11607 Out << "_ZGV" << Data.ISA << Mask; 11608 if (!VLENVal) { 11609 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11610 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11611 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11612 } else { 11613 Out << VLENVal; 11614 } 11615 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11616 switch (ParamAttr.Kind){ 11617 case LinearWithVarStride: 11618 Out << 's' << ParamAttr.StrideOrArg; 11619 break; 11620 case Linear: 11621 Out << 'l'; 11622 if (ParamAttr.StrideOrArg != 1) 11623 Out << ParamAttr.StrideOrArg; 11624 break; 11625 case Uniform: 11626 Out << 'u'; 11627 break; 11628 case Vector: 11629 Out << 'v'; 11630 break; 11631 } 11632 if (!!ParamAttr.Alignment) 11633 Out << 'a' << ParamAttr.Alignment; 11634 } 11635 Out << '_' << Fn->getName(); 11636 Fn->addFnAttr(Out.str()); 11637 } 11638 } 11639 } 11640 11641 // This are the Functions that are needed to mangle the name of the 11642 // vector functions generated by the compiler, according to the rules 11643 // defined in the "Vector Function ABI specifications for AArch64", 11644 // available at 11645 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11646 11647 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11648 /// 11649 /// TODO: Need to implement the behavior for reference marked with a 11650 /// var or no linear modifiers (1.b in the section). For this, we 11651 /// need to extend ParamKindTy to support the linear modifiers. 11652 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11653 QT = QT.getCanonicalType(); 11654 11655 if (QT->isVoidType()) 11656 return false; 11657 11658 if (Kind == ParamKindTy::Uniform) 11659 return false; 11660 11661 if (Kind == ParamKindTy::Linear) 11662 return false; 11663 11664 // TODO: Handle linear references with modifiers 11665 11666 if (Kind == ParamKindTy::LinearWithVarStride) 11667 return false; 11668 11669 return true; 11670 } 11671 11672 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11673 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11674 QT = QT.getCanonicalType(); 11675 unsigned Size = C.getTypeSize(QT); 11676 11677 // Only scalars and complex within 16 bytes wide set PVB to true. 11678 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11679 return false; 11680 11681 if (QT->isFloatingType()) 11682 return true; 11683 11684 if (QT->isIntegerType()) 11685 return true; 11686 11687 if (QT->isPointerType()) 11688 return true; 11689 11690 // TODO: Add support for complex types (section 3.1.2, item 2). 11691 11692 return false; 11693 } 11694 11695 /// Computes the lane size (LS) of a return type or of an input parameter, 11696 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11697 /// TODO: Add support for references, section 3.2.1, item 1. 11698 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11699 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11700 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11701 if (getAArch64PBV(PTy, C)) 11702 return C.getTypeSize(PTy); 11703 } 11704 if (getAArch64PBV(QT, C)) 11705 return C.getTypeSize(QT); 11706 11707 return C.getTypeSize(C.getUIntPtrType()); 11708 } 11709 11710 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11711 // signature of the scalar function, as defined in 3.2.2 of the 11712 // AAVFABI. 11713 static std::tuple<unsigned, unsigned, bool> 11714 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11715 QualType RetType = FD->getReturnType().getCanonicalType(); 11716 11717 ASTContext &C = FD->getASTContext(); 11718 11719 bool OutputBecomesInput = false; 11720 11721 llvm::SmallVector<unsigned, 8> Sizes; 11722 if (!RetType->isVoidType()) { 11723 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11724 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11725 OutputBecomesInput = true; 11726 } 11727 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11728 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11729 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11730 } 11731 11732 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11733 // The LS of a function parameter / return value can only be a power 11734 // of 2, starting from 8 bits, up to 128. 11735 assert(llvm::all_of(Sizes, 11736 [](unsigned Size) { 11737 return Size == 8 || Size == 16 || Size == 32 || 11738 Size == 64 || Size == 128; 11739 }) && 11740 "Invalid size"); 11741 11742 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11743 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11744 OutputBecomesInput); 11745 } 11746 11747 /// Mangle the parameter part of the vector function name according to 11748 /// their OpenMP classification. The mangling function is defined in 11749 /// section 3.5 of the AAVFABI. 11750 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11751 SmallString<256> Buffer; 11752 llvm::raw_svector_ostream Out(Buffer); 11753 for (const auto &ParamAttr : ParamAttrs) { 11754 switch (ParamAttr.Kind) { 11755 case LinearWithVarStride: 11756 Out << "ls" << ParamAttr.StrideOrArg; 11757 break; 11758 case Linear: 11759 Out << 'l'; 11760 // Don't print the step value if it is not present or if it is 11761 // equal to 1. 11762 if (ParamAttr.StrideOrArg != 1) 11763 Out << ParamAttr.StrideOrArg; 11764 break; 11765 case Uniform: 11766 Out << 'u'; 11767 break; 11768 case Vector: 11769 Out << 'v'; 11770 break; 11771 } 11772 11773 if (!!ParamAttr.Alignment) 11774 Out << 'a' << ParamAttr.Alignment; 11775 } 11776 11777 return std::string(Out.str()); 11778 } 11779 11780 // Function used to add the attribute. The parameter `VLEN` is 11781 // templated to allow the use of "x" when targeting scalable functions 11782 // for SVE. 11783 template <typename T> 11784 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11785 char ISA, StringRef ParSeq, 11786 StringRef MangledName, bool OutputBecomesInput, 11787 llvm::Function *Fn) { 11788 SmallString<256> Buffer; 11789 llvm::raw_svector_ostream Out(Buffer); 11790 Out << Prefix << ISA << LMask << VLEN; 11791 if (OutputBecomesInput) 11792 Out << "v"; 11793 Out << ParSeq << "_" << MangledName; 11794 Fn->addFnAttr(Out.str()); 11795 } 11796 11797 // Helper function to generate the Advanced SIMD names depending on 11798 // the value of the NDS when simdlen is not present. 11799 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11800 StringRef Prefix, char ISA, 11801 StringRef ParSeq, StringRef MangledName, 11802 bool OutputBecomesInput, 11803 llvm::Function *Fn) { 11804 switch (NDS) { 11805 case 8: 11806 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11807 OutputBecomesInput, Fn); 11808 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11809 OutputBecomesInput, Fn); 11810 break; 11811 case 16: 11812 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11813 OutputBecomesInput, Fn); 11814 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11815 OutputBecomesInput, Fn); 11816 break; 11817 case 32: 11818 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11819 OutputBecomesInput, Fn); 11820 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11821 OutputBecomesInput, Fn); 11822 break; 11823 case 64: 11824 case 128: 11825 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11826 OutputBecomesInput, Fn); 11827 break; 11828 default: 11829 llvm_unreachable("Scalar type is too wide."); 11830 } 11831 } 11832 11833 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11834 static void emitAArch64DeclareSimdFunction( 11835 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11836 ArrayRef<ParamAttrTy> ParamAttrs, 11837 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11838 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11839 11840 // Get basic data for building the vector signature. 11841 const auto Data = getNDSWDS(FD, ParamAttrs); 11842 const unsigned NDS = std::get<0>(Data); 11843 const unsigned WDS = std::get<1>(Data); 11844 const bool OutputBecomesInput = std::get<2>(Data); 11845 11846 // Check the values provided via `simdlen` by the user. 11847 // 1. A `simdlen(1)` doesn't produce vector signatures, 11848 if (UserVLEN == 1) { 11849 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11850 DiagnosticsEngine::Warning, 11851 "The clause simdlen(1) has no effect when targeting aarch64."); 11852 CGM.getDiags().Report(SLoc, DiagID); 11853 return; 11854 } 11855 11856 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11857 // Advanced SIMD output. 11858 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11859 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11860 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11861 "power of 2 when targeting Advanced SIMD."); 11862 CGM.getDiags().Report(SLoc, DiagID); 11863 return; 11864 } 11865 11866 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11867 // limits. 11868 if (ISA == 's' && UserVLEN != 0) { 11869 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11870 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11871 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11872 "lanes in the architectural constraints " 11873 "for SVE (min is 128-bit, max is " 11874 "2048-bit, by steps of 128-bit)"); 11875 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11876 return; 11877 } 11878 } 11879 11880 // Sort out parameter sequence. 11881 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11882 StringRef Prefix = "_ZGV"; 11883 // Generate simdlen from user input (if any). 11884 if (UserVLEN) { 11885 if (ISA == 's') { 11886 // SVE generates only a masked function. 11887 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11888 OutputBecomesInput, Fn); 11889 } else { 11890 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11891 // Advanced SIMD generates one or two functions, depending on 11892 // the `[not]inbranch` clause. 11893 switch (State) { 11894 case OMPDeclareSimdDeclAttr::BS_Undefined: 11895 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11896 OutputBecomesInput, Fn); 11897 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11898 OutputBecomesInput, Fn); 11899 break; 11900 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11901 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11902 OutputBecomesInput, Fn); 11903 break; 11904 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11905 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11906 OutputBecomesInput, Fn); 11907 break; 11908 } 11909 } 11910 } else { 11911 // If no user simdlen is provided, follow the AAVFABI rules for 11912 // generating the vector length. 11913 if (ISA == 's') { 11914 // SVE, section 3.4.1, item 1. 11915 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11916 OutputBecomesInput, Fn); 11917 } else { 11918 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11919 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11920 // two vector names depending on the use of the clause 11921 // `[not]inbranch`. 11922 switch (State) { 11923 case OMPDeclareSimdDeclAttr::BS_Undefined: 11924 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11925 OutputBecomesInput, Fn); 11926 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11927 OutputBecomesInput, Fn); 11928 break; 11929 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11930 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11931 OutputBecomesInput, Fn); 11932 break; 11933 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11934 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11935 OutputBecomesInput, Fn); 11936 break; 11937 } 11938 } 11939 } 11940 } 11941 11942 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11943 llvm::Function *Fn) { 11944 ASTContext &C = CGM.getContext(); 11945 FD = FD->getMostRecentDecl(); 11946 while (FD) { 11947 // Map params to their positions in function decl. 11948 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11949 if (isa<CXXMethodDecl>(FD)) 11950 ParamPositions.try_emplace(FD, 0); 11951 unsigned ParamPos = ParamPositions.size(); 11952 for (const ParmVarDecl *P : FD->parameters()) { 11953 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11954 ++ParamPos; 11955 } 11956 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11957 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11958 // Mark uniform parameters. 11959 for (const Expr *E : Attr->uniforms()) { 11960 E = E->IgnoreParenImpCasts(); 11961 unsigned Pos; 11962 if (isa<CXXThisExpr>(E)) { 11963 Pos = ParamPositions[FD]; 11964 } else { 11965 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11966 ->getCanonicalDecl(); 11967 auto It = ParamPositions.find(PVD); 11968 assert(It != ParamPositions.end() && "Function parameter not found"); 11969 Pos = It->second; 11970 } 11971 ParamAttrs[Pos].Kind = Uniform; 11972 } 11973 // Get alignment info. 11974 auto *NI = Attr->alignments_begin(); 11975 for (const Expr *E : Attr->aligneds()) { 11976 E = E->IgnoreParenImpCasts(); 11977 unsigned Pos; 11978 QualType ParmTy; 11979 if (isa<CXXThisExpr>(E)) { 11980 Pos = ParamPositions[FD]; 11981 ParmTy = E->getType(); 11982 } else { 11983 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11984 ->getCanonicalDecl(); 11985 auto It = ParamPositions.find(PVD); 11986 assert(It != ParamPositions.end() && "Function parameter not found"); 11987 Pos = It->second; 11988 ParmTy = PVD->getType(); 11989 } 11990 ParamAttrs[Pos].Alignment = 11991 (*NI) 11992 ? (*NI)->EvaluateKnownConstInt(C) 11993 : llvm::APSInt::getUnsigned( 11994 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11995 .getQuantity()); 11996 ++NI; 11997 } 11998 // Mark linear parameters. 11999 auto *SI = Attr->steps_begin(); 12000 for (const Expr *E : Attr->linears()) { 12001 E = E->IgnoreParenImpCasts(); 12002 unsigned Pos; 12003 // Rescaling factor needed to compute the linear parameter 12004 // value in the mangled name. 12005 unsigned PtrRescalingFactor = 1; 12006 if (isa<CXXThisExpr>(E)) { 12007 Pos = ParamPositions[FD]; 12008 } else { 12009 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12010 ->getCanonicalDecl(); 12011 auto It = ParamPositions.find(PVD); 12012 assert(It != ParamPositions.end() && "Function parameter not found"); 12013 Pos = It->second; 12014 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12015 PtrRescalingFactor = CGM.getContext() 12016 .getTypeSizeInChars(P->getPointeeType()) 12017 .getQuantity(); 12018 } 12019 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12020 ParamAttr.Kind = Linear; 12021 // Assuming a stride of 1, for `linear` without modifiers. 12022 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12023 if (*SI) { 12024 Expr::EvalResult Result; 12025 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12026 if (const auto *DRE = 12027 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12028 if (const auto *StridePVD = 12029 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 12030 ParamAttr.Kind = LinearWithVarStride; 12031 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 12032 assert(It != ParamPositions.end() && 12033 "Function parameter not found"); 12034 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 12035 } 12036 } 12037 } else { 12038 ParamAttr.StrideOrArg = Result.Val.getInt(); 12039 } 12040 } 12041 // If we are using a linear clause on a pointer, we need to 12042 // rescale the value of linear_step with the byte size of the 12043 // pointee type. 12044 if (Linear == ParamAttr.Kind) 12045 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12046 ++SI; 12047 } 12048 llvm::APSInt VLENVal; 12049 SourceLocation ExprLoc; 12050 const Expr *VLENExpr = Attr->getSimdlen(); 12051 if (VLENExpr) { 12052 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12053 ExprLoc = VLENExpr->getExprLoc(); 12054 } 12055 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12056 if (CGM.getTriple().isX86()) { 12057 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12058 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12059 unsigned VLEN = VLENVal.getExtValue(); 12060 StringRef MangledName = Fn->getName(); 12061 if (CGM.getTarget().hasFeature("sve")) 12062 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12063 MangledName, 's', 128, Fn, ExprLoc); 12064 if (CGM.getTarget().hasFeature("neon")) 12065 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12066 MangledName, 'n', 128, Fn, ExprLoc); 12067 } 12068 } 12069 FD = FD->getPreviousDecl(); 12070 } 12071 } 12072 12073 namespace { 12074 /// Cleanup action for doacross support. 12075 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12076 public: 12077 static const int DoacrossFinArgs = 2; 12078 12079 private: 12080 llvm::FunctionCallee RTLFn; 12081 llvm::Value *Args[DoacrossFinArgs]; 12082 12083 public: 12084 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12085 ArrayRef<llvm::Value *> CallArgs) 12086 : RTLFn(RTLFn) { 12087 assert(CallArgs.size() == DoacrossFinArgs); 12088 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12089 } 12090 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12091 if (!CGF.HaveInsertPoint()) 12092 return; 12093 CGF.EmitRuntimeCall(RTLFn, Args); 12094 } 12095 }; 12096 } // namespace 12097 12098 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12099 const OMPLoopDirective &D, 12100 ArrayRef<Expr *> NumIterations) { 12101 if (!CGF.HaveInsertPoint()) 12102 return; 12103 12104 ASTContext &C = CGM.getContext(); 12105 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12106 RecordDecl *RD; 12107 if (KmpDimTy.isNull()) { 12108 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12109 // kmp_int64 lo; // lower 12110 // kmp_int64 up; // upper 12111 // kmp_int64 st; // stride 12112 // }; 12113 RD = C.buildImplicitRecord("kmp_dim"); 12114 RD->startDefinition(); 12115 addFieldToRecordDecl(C, RD, Int64Ty); 12116 addFieldToRecordDecl(C, RD, Int64Ty); 12117 addFieldToRecordDecl(C, RD, Int64Ty); 12118 RD->completeDefinition(); 12119 KmpDimTy = C.getRecordType(RD); 12120 } else { 12121 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12122 } 12123 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12124 QualType ArrayTy = 12125 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12126 12127 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12128 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12129 enum { LowerFD = 0, UpperFD, StrideFD }; 12130 // Fill dims with data. 12131 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12132 LValue DimsLVal = CGF.MakeAddrLValue( 12133 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12134 // dims.upper = num_iterations; 12135 LValue UpperLVal = CGF.EmitLValueForField( 12136 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12137 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12138 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12139 Int64Ty, NumIterations[I]->getExprLoc()); 12140 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12141 // dims.stride = 1; 12142 LValue StrideLVal = CGF.EmitLValueForField( 12143 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12144 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12145 StrideLVal); 12146 } 12147 12148 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12149 // kmp_int32 num_dims, struct kmp_dim * dims); 12150 llvm::Value *Args[] = { 12151 emitUpdateLocation(CGF, D.getBeginLoc()), 12152 getThreadID(CGF, D.getBeginLoc()), 12153 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12154 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12155 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12156 CGM.VoidPtrTy)}; 12157 12158 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12159 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12160 CGF.EmitRuntimeCall(RTLFn, Args); 12161 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12162 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12163 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12164 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12165 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12166 llvm::makeArrayRef(FiniArgs)); 12167 } 12168 12169 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12170 const OMPDependClause *C) { 12171 QualType Int64Ty = 12172 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12173 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12174 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12175 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12176 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12177 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12178 const Expr *CounterVal = C->getLoopData(I); 12179 assert(CounterVal); 12180 llvm::Value *CntVal = CGF.EmitScalarConversion( 12181 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12182 CounterVal->getExprLoc()); 12183 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12184 /*Volatile=*/false, Int64Ty); 12185 } 12186 llvm::Value *Args[] = { 12187 emitUpdateLocation(CGF, C->getBeginLoc()), 12188 getThreadID(CGF, C->getBeginLoc()), 12189 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12190 llvm::FunctionCallee RTLFn; 12191 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12192 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12193 OMPRTL___kmpc_doacross_post); 12194 } else { 12195 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12196 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12197 OMPRTL___kmpc_doacross_wait); 12198 } 12199 CGF.EmitRuntimeCall(RTLFn, Args); 12200 } 12201 12202 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12203 llvm::FunctionCallee Callee, 12204 ArrayRef<llvm::Value *> Args) const { 12205 assert(Loc.isValid() && "Outlined function call location must be valid."); 12206 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12207 12208 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12209 if (Fn->doesNotThrow()) { 12210 CGF.EmitNounwindRuntimeCall(Fn, Args); 12211 return; 12212 } 12213 } 12214 CGF.EmitRuntimeCall(Callee, Args); 12215 } 12216 12217 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12218 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12219 ArrayRef<llvm::Value *> Args) const { 12220 emitCall(CGF, Loc, OutlinedFn, Args); 12221 } 12222 12223 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12224 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12225 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12226 HasEmittedDeclareTargetRegion = true; 12227 } 12228 12229 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12230 const VarDecl *NativeParam, 12231 const VarDecl *TargetParam) const { 12232 return CGF.GetAddrOfLocalVar(NativeParam); 12233 } 12234 12235 /// Return allocator value from expression, or return a null allocator (default 12236 /// when no allocator specified). 12237 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12238 const Expr *Allocator) { 12239 llvm::Value *AllocVal; 12240 if (Allocator) { 12241 AllocVal = CGF.EmitScalarExpr(Allocator); 12242 // According to the standard, the original allocator type is a enum 12243 // (integer). Convert to pointer type, if required. 12244 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12245 CGF.getContext().VoidPtrTy, 12246 Allocator->getExprLoc()); 12247 } else { 12248 // If no allocator specified, it defaults to the null allocator. 12249 AllocVal = llvm::Constant::getNullValue( 12250 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12251 } 12252 return AllocVal; 12253 } 12254 12255 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12256 const VarDecl *VD) { 12257 if (!VD) 12258 return Address::invalid(); 12259 Address UntiedAddr = Address::invalid(); 12260 Address UntiedRealAddr = Address::invalid(); 12261 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12262 if (It != FunctionToUntiedTaskStackMap.end()) { 12263 const UntiedLocalVarsAddressesMap &UntiedData = 12264 UntiedLocalVarsStack[It->second]; 12265 auto I = UntiedData.find(VD); 12266 if (I != UntiedData.end()) { 12267 UntiedAddr = I->second.first; 12268 UntiedRealAddr = I->second.second; 12269 } 12270 } 12271 const VarDecl *CVD = VD->getCanonicalDecl(); 12272 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12273 // Use the default allocation. 12274 if (!isAllocatableDecl(VD)) 12275 return UntiedAddr; 12276 llvm::Value *Size; 12277 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12278 if (CVD->getType()->isVariablyModifiedType()) { 12279 Size = CGF.getTypeSize(CVD->getType()); 12280 // Align the size: ((size + align - 1) / align) * align 12281 Size = CGF.Builder.CreateNUWAdd( 12282 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12283 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12284 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12285 } else { 12286 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12287 Size = CGM.getSize(Sz.alignTo(Align)); 12288 } 12289 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12290 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12291 const Expr *Allocator = AA->getAllocator(); 12292 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12293 llvm::Value *Alignment = 12294 AA->getAlignment() 12295 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12296 CGM.SizeTy, /*isSigned=*/false) 12297 : nullptr; 12298 SmallVector<llvm::Value *, 4> Args; 12299 Args.push_back(ThreadID); 12300 if (Alignment) 12301 Args.push_back(Alignment); 12302 Args.push_back(Size); 12303 Args.push_back(AllocVal); 12304 llvm::omp::RuntimeFunction FnID = 12305 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12306 llvm::Value *Addr = CGF.EmitRuntimeCall( 12307 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12308 getName({CVD->getName(), ".void.addr"})); 12309 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12310 CGM.getModule(), OMPRTL___kmpc_free); 12311 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12312 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12313 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12314 if (UntiedAddr.isValid()) 12315 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12316 12317 // Cleanup action for allocate support. 12318 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12319 llvm::FunctionCallee RTLFn; 12320 SourceLocation::UIntTy LocEncoding; 12321 Address Addr; 12322 const Expr *AllocExpr; 12323 12324 public: 12325 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12326 SourceLocation::UIntTy LocEncoding, Address Addr, 12327 const Expr *AllocExpr) 12328 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12329 AllocExpr(AllocExpr) {} 12330 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12331 if (!CGF.HaveInsertPoint()) 12332 return; 12333 llvm::Value *Args[3]; 12334 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12335 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12336 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12337 Addr.getPointer(), CGF.VoidPtrTy); 12338 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12339 Args[2] = AllocVal; 12340 CGF.EmitRuntimeCall(RTLFn, Args); 12341 } 12342 }; 12343 Address VDAddr = 12344 UntiedRealAddr.isValid() 12345 ? UntiedRealAddr 12346 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 12347 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12348 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12349 VDAddr, Allocator); 12350 if (UntiedRealAddr.isValid()) 12351 if (auto *Region = 12352 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12353 Region->emitUntiedSwitch(CGF); 12354 return VDAddr; 12355 } 12356 return UntiedAddr; 12357 } 12358 12359 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12360 const VarDecl *VD) const { 12361 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12362 if (It == FunctionToUntiedTaskStackMap.end()) 12363 return false; 12364 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12365 } 12366 12367 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12368 CodeGenModule &CGM, const OMPLoopDirective &S) 12369 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12370 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12371 if (!NeedToPush) 12372 return; 12373 NontemporalDeclsSet &DS = 12374 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12375 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12376 for (const Stmt *Ref : C->private_refs()) { 12377 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12378 const ValueDecl *VD; 12379 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12380 VD = DRE->getDecl(); 12381 } else { 12382 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12383 assert((ME->isImplicitCXXThis() || 12384 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12385 "Expected member of current class."); 12386 VD = ME->getMemberDecl(); 12387 } 12388 DS.insert(VD); 12389 } 12390 } 12391 } 12392 12393 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12394 if (!NeedToPush) 12395 return; 12396 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12397 } 12398 12399 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12400 CodeGenFunction &CGF, 12401 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12402 std::pair<Address, Address>> &LocalVars) 12403 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12404 if (!NeedToPush) 12405 return; 12406 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12407 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12408 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12409 } 12410 12411 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12412 if (!NeedToPush) 12413 return; 12414 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12415 } 12416 12417 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12418 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12419 12420 return llvm::any_of( 12421 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12422 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12423 } 12424 12425 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12426 const OMPExecutableDirective &S, 12427 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12428 const { 12429 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12430 // Vars in target/task regions must be excluded completely. 12431 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12432 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12433 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12434 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12435 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12436 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12437 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12438 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12439 } 12440 } 12441 // Exclude vars in private clauses. 12442 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12443 for (const Expr *Ref : C->varlists()) { 12444 if (!Ref->getType()->isScalarType()) 12445 continue; 12446 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12447 if (!DRE) 12448 continue; 12449 NeedToCheckForLPCs.insert(DRE->getDecl()); 12450 } 12451 } 12452 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12453 for (const Expr *Ref : C->varlists()) { 12454 if (!Ref->getType()->isScalarType()) 12455 continue; 12456 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12457 if (!DRE) 12458 continue; 12459 NeedToCheckForLPCs.insert(DRE->getDecl()); 12460 } 12461 } 12462 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12463 for (const Expr *Ref : C->varlists()) { 12464 if (!Ref->getType()->isScalarType()) 12465 continue; 12466 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12467 if (!DRE) 12468 continue; 12469 NeedToCheckForLPCs.insert(DRE->getDecl()); 12470 } 12471 } 12472 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12473 for (const Expr *Ref : C->varlists()) { 12474 if (!Ref->getType()->isScalarType()) 12475 continue; 12476 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12477 if (!DRE) 12478 continue; 12479 NeedToCheckForLPCs.insert(DRE->getDecl()); 12480 } 12481 } 12482 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12483 for (const Expr *Ref : C->varlists()) { 12484 if (!Ref->getType()->isScalarType()) 12485 continue; 12486 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12487 if (!DRE) 12488 continue; 12489 NeedToCheckForLPCs.insert(DRE->getDecl()); 12490 } 12491 } 12492 for (const Decl *VD : NeedToCheckForLPCs) { 12493 for (const LastprivateConditionalData &Data : 12494 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12495 if (Data.DeclToUniqueName.count(VD) > 0) { 12496 if (!Data.Disabled) 12497 NeedToAddForLPCsAsDisabled.insert(VD); 12498 break; 12499 } 12500 } 12501 } 12502 } 12503 12504 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12505 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12506 : CGM(CGF.CGM), 12507 Action((CGM.getLangOpts().OpenMP >= 50 && 12508 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12509 [](const OMPLastprivateClause *C) { 12510 return C->getKind() == 12511 OMPC_LASTPRIVATE_conditional; 12512 })) 12513 ? ActionToDo::PushAsLastprivateConditional 12514 : ActionToDo::DoNotPush) { 12515 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12516 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12517 return; 12518 assert(Action == ActionToDo::PushAsLastprivateConditional && 12519 "Expected a push action."); 12520 LastprivateConditionalData &Data = 12521 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12522 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12523 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12524 continue; 12525 12526 for (const Expr *Ref : C->varlists()) { 12527 Data.DeclToUniqueName.insert(std::make_pair( 12528 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12529 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12530 } 12531 } 12532 Data.IVLVal = IVLVal; 12533 Data.Fn = CGF.CurFn; 12534 } 12535 12536 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12537 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12538 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12539 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12540 if (CGM.getLangOpts().OpenMP < 50) 12541 return; 12542 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12543 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12544 if (!NeedToAddForLPCsAsDisabled.empty()) { 12545 Action = ActionToDo::DisableLastprivateConditional; 12546 LastprivateConditionalData &Data = 12547 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12548 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12549 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12550 Data.Fn = CGF.CurFn; 12551 Data.Disabled = true; 12552 } 12553 } 12554 12555 CGOpenMPRuntime::LastprivateConditionalRAII 12556 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12557 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12558 return LastprivateConditionalRAII(CGF, S); 12559 } 12560 12561 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12562 if (CGM.getLangOpts().OpenMP < 50) 12563 return; 12564 if (Action == ActionToDo::DisableLastprivateConditional) { 12565 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12566 "Expected list of disabled private vars."); 12567 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12568 } 12569 if (Action == ActionToDo::PushAsLastprivateConditional) { 12570 assert( 12571 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12572 "Expected list of lastprivate conditional vars."); 12573 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12574 } 12575 } 12576 12577 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12578 const VarDecl *VD) { 12579 ASTContext &C = CGM.getContext(); 12580 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12581 if (I == LastprivateConditionalToTypes.end()) 12582 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12583 QualType NewType; 12584 const FieldDecl *VDField; 12585 const FieldDecl *FiredField; 12586 LValue BaseLVal; 12587 auto VI = I->getSecond().find(VD); 12588 if (VI == I->getSecond().end()) { 12589 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12590 RD->startDefinition(); 12591 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12592 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12593 RD->completeDefinition(); 12594 NewType = C.getRecordType(RD); 12595 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12596 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12597 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12598 } else { 12599 NewType = std::get<0>(VI->getSecond()); 12600 VDField = std::get<1>(VI->getSecond()); 12601 FiredField = std::get<2>(VI->getSecond()); 12602 BaseLVal = std::get<3>(VI->getSecond()); 12603 } 12604 LValue FiredLVal = 12605 CGF.EmitLValueForField(BaseLVal, FiredField); 12606 CGF.EmitStoreOfScalar( 12607 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12608 FiredLVal); 12609 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12610 } 12611 12612 namespace { 12613 /// Checks if the lastprivate conditional variable is referenced in LHS. 12614 class LastprivateConditionalRefChecker final 12615 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12616 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12617 const Expr *FoundE = nullptr; 12618 const Decl *FoundD = nullptr; 12619 StringRef UniqueDeclName; 12620 LValue IVLVal; 12621 llvm::Function *FoundFn = nullptr; 12622 SourceLocation Loc; 12623 12624 public: 12625 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12626 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12627 llvm::reverse(LPM)) { 12628 auto It = D.DeclToUniqueName.find(E->getDecl()); 12629 if (It == D.DeclToUniqueName.end()) 12630 continue; 12631 if (D.Disabled) 12632 return false; 12633 FoundE = E; 12634 FoundD = E->getDecl()->getCanonicalDecl(); 12635 UniqueDeclName = It->second; 12636 IVLVal = D.IVLVal; 12637 FoundFn = D.Fn; 12638 break; 12639 } 12640 return FoundE == E; 12641 } 12642 bool VisitMemberExpr(const MemberExpr *E) { 12643 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12644 return false; 12645 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12646 llvm::reverse(LPM)) { 12647 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12648 if (It == D.DeclToUniqueName.end()) 12649 continue; 12650 if (D.Disabled) 12651 return false; 12652 FoundE = E; 12653 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12654 UniqueDeclName = It->second; 12655 IVLVal = D.IVLVal; 12656 FoundFn = D.Fn; 12657 break; 12658 } 12659 return FoundE == E; 12660 } 12661 bool VisitStmt(const Stmt *S) { 12662 for (const Stmt *Child : S->children()) { 12663 if (!Child) 12664 continue; 12665 if (const auto *E = dyn_cast<Expr>(Child)) 12666 if (!E->isGLValue()) 12667 continue; 12668 if (Visit(Child)) 12669 return true; 12670 } 12671 return false; 12672 } 12673 explicit LastprivateConditionalRefChecker( 12674 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12675 : LPM(LPM) {} 12676 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12677 getFoundData() const { 12678 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12679 } 12680 }; 12681 } // namespace 12682 12683 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12684 LValue IVLVal, 12685 StringRef UniqueDeclName, 12686 LValue LVal, 12687 SourceLocation Loc) { 12688 // Last updated loop counter for the lastprivate conditional var. 12689 // int<xx> last_iv = 0; 12690 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12691 llvm::Constant *LastIV = 12692 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12693 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12694 IVLVal.getAlignment().getAsAlign()); 12695 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12696 12697 // Last value of the lastprivate conditional. 12698 // decltype(priv_a) last_a; 12699 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12700 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12701 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12702 LValue LastLVal = CGF.MakeAddrLValue( 12703 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12704 12705 // Global loop counter. Required to handle inner parallel-for regions. 12706 // iv 12707 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12708 12709 // #pragma omp critical(a) 12710 // if (last_iv <= iv) { 12711 // last_iv = iv; 12712 // last_a = priv_a; 12713 // } 12714 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12715 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12716 Action.Enter(CGF); 12717 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12718 // (last_iv <= iv) ? Check if the variable is updated and store new 12719 // value in global var. 12720 llvm::Value *CmpRes; 12721 if (IVLVal.getType()->isSignedIntegerType()) { 12722 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12723 } else { 12724 assert(IVLVal.getType()->isUnsignedIntegerType() && 12725 "Loop iteration variable must be integer."); 12726 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12727 } 12728 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12729 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12730 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12731 // { 12732 CGF.EmitBlock(ThenBB); 12733 12734 // last_iv = iv; 12735 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12736 12737 // last_a = priv_a; 12738 switch (CGF.getEvaluationKind(LVal.getType())) { 12739 case TEK_Scalar: { 12740 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12741 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12742 break; 12743 } 12744 case TEK_Complex: { 12745 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12746 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12747 break; 12748 } 12749 case TEK_Aggregate: 12750 llvm_unreachable( 12751 "Aggregates are not supported in lastprivate conditional."); 12752 } 12753 // } 12754 CGF.EmitBranch(ExitBB); 12755 // There is no need to emit line number for unconditional branch. 12756 (void)ApplyDebugLocation::CreateEmpty(CGF); 12757 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12758 }; 12759 12760 if (CGM.getLangOpts().OpenMPSimd) { 12761 // Do not emit as a critical region as no parallel region could be emitted. 12762 RegionCodeGenTy ThenRCG(CodeGen); 12763 ThenRCG(CGF); 12764 } else { 12765 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12766 } 12767 } 12768 12769 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12770 const Expr *LHS) { 12771 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12772 return; 12773 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12774 if (!Checker.Visit(LHS)) 12775 return; 12776 const Expr *FoundE; 12777 const Decl *FoundD; 12778 StringRef UniqueDeclName; 12779 LValue IVLVal; 12780 llvm::Function *FoundFn; 12781 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12782 Checker.getFoundData(); 12783 if (FoundFn != CGF.CurFn) { 12784 // Special codegen for inner parallel regions. 12785 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12786 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12787 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12788 "Lastprivate conditional is not found in outer region."); 12789 QualType StructTy = std::get<0>(It->getSecond()); 12790 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12791 LValue PrivLVal = CGF.EmitLValue(FoundE); 12792 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12793 PrivLVal.getAddress(CGF), 12794 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12795 CGF.ConvertTypeForMem(StructTy)); 12796 LValue BaseLVal = 12797 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12798 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12799 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12800 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12801 FiredLVal, llvm::AtomicOrdering::Unordered, 12802 /*IsVolatile=*/true, /*isInit=*/false); 12803 return; 12804 } 12805 12806 // Private address of the lastprivate conditional in the current context. 12807 // priv_a 12808 LValue LVal = CGF.EmitLValue(FoundE); 12809 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12810 FoundE->getExprLoc()); 12811 } 12812 12813 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12814 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12815 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12816 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12817 return; 12818 auto Range = llvm::reverse(LastprivateConditionalStack); 12819 auto It = llvm::find_if( 12820 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12821 if (It == Range.end() || It->Fn != CGF.CurFn) 12822 return; 12823 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12824 assert(LPCI != LastprivateConditionalToTypes.end() && 12825 "Lastprivates must be registered already."); 12826 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12827 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12828 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12829 for (const auto &Pair : It->DeclToUniqueName) { 12830 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12831 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12832 continue; 12833 auto I = LPCI->getSecond().find(Pair.first); 12834 assert(I != LPCI->getSecond().end() && 12835 "Lastprivate must be rehistered already."); 12836 // bool Cmp = priv_a.Fired != 0; 12837 LValue BaseLVal = std::get<3>(I->getSecond()); 12838 LValue FiredLVal = 12839 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12840 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12841 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12842 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12843 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12844 // if (Cmp) { 12845 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12846 CGF.EmitBlock(ThenBB); 12847 Address Addr = CGF.GetAddrOfLocalVar(VD); 12848 LValue LVal; 12849 if (VD->getType()->isReferenceType()) 12850 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12851 AlignmentSource::Decl); 12852 else 12853 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12854 AlignmentSource::Decl); 12855 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12856 D.getBeginLoc()); 12857 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12858 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12859 // } 12860 } 12861 } 12862 12863 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12864 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12865 SourceLocation Loc) { 12866 if (CGF.getLangOpts().OpenMP < 50) 12867 return; 12868 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12869 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12870 "Unknown lastprivate conditional variable."); 12871 StringRef UniqueName = It->second; 12872 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12873 // The variable was not updated in the region - exit. 12874 if (!GV) 12875 return; 12876 LValue LPLVal = CGF.MakeAddrLValue( 12877 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12878 PrivLVal.getType().getNonReferenceType()); 12879 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12880 CGF.EmitStoreOfScalar(Res, PrivLVal); 12881 } 12882 12883 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12884 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12885 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12886 llvm_unreachable("Not supported in SIMD-only mode"); 12887 } 12888 12889 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12890 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12891 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12892 llvm_unreachable("Not supported in SIMD-only mode"); 12893 } 12894 12895 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12896 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12897 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12898 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12899 bool Tied, unsigned &NumberOfParts) { 12900 llvm_unreachable("Not supported in SIMD-only mode"); 12901 } 12902 12903 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12904 SourceLocation Loc, 12905 llvm::Function *OutlinedFn, 12906 ArrayRef<llvm::Value *> CapturedVars, 12907 const Expr *IfCond, 12908 llvm::Value *NumThreads) { 12909 llvm_unreachable("Not supported in SIMD-only mode"); 12910 } 12911 12912 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12913 CodeGenFunction &CGF, StringRef CriticalName, 12914 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12915 const Expr *Hint) { 12916 llvm_unreachable("Not supported in SIMD-only mode"); 12917 } 12918 12919 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12920 const RegionCodeGenTy &MasterOpGen, 12921 SourceLocation Loc) { 12922 llvm_unreachable("Not supported in SIMD-only mode"); 12923 } 12924 12925 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12926 const RegionCodeGenTy &MasterOpGen, 12927 SourceLocation Loc, 12928 const Expr *Filter) { 12929 llvm_unreachable("Not supported in SIMD-only mode"); 12930 } 12931 12932 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12933 SourceLocation Loc) { 12934 llvm_unreachable("Not supported in SIMD-only mode"); 12935 } 12936 12937 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12938 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12939 SourceLocation Loc) { 12940 llvm_unreachable("Not supported in SIMD-only mode"); 12941 } 12942 12943 void CGOpenMPSIMDRuntime::emitSingleRegion( 12944 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12945 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12946 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12947 ArrayRef<const Expr *> AssignmentOps) { 12948 llvm_unreachable("Not supported in SIMD-only mode"); 12949 } 12950 12951 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12952 const RegionCodeGenTy &OrderedOpGen, 12953 SourceLocation Loc, 12954 bool IsThreads) { 12955 llvm_unreachable("Not supported in SIMD-only mode"); 12956 } 12957 12958 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12959 SourceLocation Loc, 12960 OpenMPDirectiveKind Kind, 12961 bool EmitChecks, 12962 bool ForceSimpleCall) { 12963 llvm_unreachable("Not supported in SIMD-only mode"); 12964 } 12965 12966 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12967 CodeGenFunction &CGF, SourceLocation Loc, 12968 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12969 bool Ordered, const DispatchRTInput &DispatchValues) { 12970 llvm_unreachable("Not supported in SIMD-only mode"); 12971 } 12972 12973 void CGOpenMPSIMDRuntime::emitForStaticInit( 12974 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12975 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12976 llvm_unreachable("Not supported in SIMD-only mode"); 12977 } 12978 12979 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12980 CodeGenFunction &CGF, SourceLocation Loc, 12981 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12982 llvm_unreachable("Not supported in SIMD-only mode"); 12983 } 12984 12985 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12986 SourceLocation Loc, 12987 unsigned IVSize, 12988 bool IVSigned) { 12989 llvm_unreachable("Not supported in SIMD-only mode"); 12990 } 12991 12992 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12993 SourceLocation Loc, 12994 OpenMPDirectiveKind DKind) { 12995 llvm_unreachable("Not supported in SIMD-only mode"); 12996 } 12997 12998 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12999 SourceLocation Loc, 13000 unsigned IVSize, bool IVSigned, 13001 Address IL, Address LB, 13002 Address UB, Address ST) { 13003 llvm_unreachable("Not supported in SIMD-only mode"); 13004 } 13005 13006 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 13007 llvm::Value *NumThreads, 13008 SourceLocation Loc) { 13009 llvm_unreachable("Not supported in SIMD-only mode"); 13010 } 13011 13012 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13013 ProcBindKind ProcBind, 13014 SourceLocation Loc) { 13015 llvm_unreachable("Not supported in SIMD-only mode"); 13016 } 13017 13018 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13019 const VarDecl *VD, 13020 Address VDAddr, 13021 SourceLocation Loc) { 13022 llvm_unreachable("Not supported in SIMD-only mode"); 13023 } 13024 13025 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13026 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13027 CodeGenFunction *CGF) { 13028 llvm_unreachable("Not supported in SIMD-only mode"); 13029 } 13030 13031 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13032 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13033 llvm_unreachable("Not supported in SIMD-only mode"); 13034 } 13035 13036 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13037 ArrayRef<const Expr *> Vars, 13038 SourceLocation Loc, 13039 llvm::AtomicOrdering AO) { 13040 llvm_unreachable("Not supported in SIMD-only mode"); 13041 } 13042 13043 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13044 const OMPExecutableDirective &D, 13045 llvm::Function *TaskFunction, 13046 QualType SharedsTy, Address Shareds, 13047 const Expr *IfCond, 13048 const OMPTaskDataTy &Data) { 13049 llvm_unreachable("Not supported in SIMD-only mode"); 13050 } 13051 13052 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13053 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13054 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13055 const Expr *IfCond, const OMPTaskDataTy &Data) { 13056 llvm_unreachable("Not supported in SIMD-only mode"); 13057 } 13058 13059 void CGOpenMPSIMDRuntime::emitReduction( 13060 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13061 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13062 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13063 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13064 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13065 ReductionOps, Options); 13066 } 13067 13068 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13069 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13070 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13071 llvm_unreachable("Not supported in SIMD-only mode"); 13072 } 13073 13074 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13075 SourceLocation Loc, 13076 bool IsWorksharingReduction) { 13077 llvm_unreachable("Not supported in SIMD-only mode"); 13078 } 13079 13080 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13081 SourceLocation Loc, 13082 ReductionCodeGen &RCG, 13083 unsigned N) { 13084 llvm_unreachable("Not supported in SIMD-only mode"); 13085 } 13086 13087 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13088 SourceLocation Loc, 13089 llvm::Value *ReductionsPtr, 13090 LValue SharedLVal) { 13091 llvm_unreachable("Not supported in SIMD-only mode"); 13092 } 13093 13094 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13095 SourceLocation Loc, 13096 const OMPTaskDataTy &Data) { 13097 llvm_unreachable("Not supported in SIMD-only mode"); 13098 } 13099 13100 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13101 CodeGenFunction &CGF, SourceLocation Loc, 13102 OpenMPDirectiveKind CancelRegion) { 13103 llvm_unreachable("Not supported in SIMD-only mode"); 13104 } 13105 13106 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13107 SourceLocation Loc, const Expr *IfCond, 13108 OpenMPDirectiveKind CancelRegion) { 13109 llvm_unreachable("Not supported in SIMD-only mode"); 13110 } 13111 13112 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13113 const OMPExecutableDirective &D, StringRef ParentName, 13114 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13115 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13116 llvm_unreachable("Not supported in SIMD-only mode"); 13117 } 13118 13119 void CGOpenMPSIMDRuntime::emitTargetCall( 13120 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13121 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13122 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13123 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13124 const OMPLoopDirective &D)> 13125 SizeEmitter) { 13126 llvm_unreachable("Not supported in SIMD-only mode"); 13127 } 13128 13129 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13130 llvm_unreachable("Not supported in SIMD-only mode"); 13131 } 13132 13133 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13134 llvm_unreachable("Not supported in SIMD-only mode"); 13135 } 13136 13137 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13138 return false; 13139 } 13140 13141 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13142 const OMPExecutableDirective &D, 13143 SourceLocation Loc, 13144 llvm::Function *OutlinedFn, 13145 ArrayRef<llvm::Value *> CapturedVars) { 13146 llvm_unreachable("Not supported in SIMD-only mode"); 13147 } 13148 13149 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13150 const Expr *NumTeams, 13151 const Expr *ThreadLimit, 13152 SourceLocation Loc) { 13153 llvm_unreachable("Not supported in SIMD-only mode"); 13154 } 13155 13156 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13157 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13158 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13159 llvm_unreachable("Not supported in SIMD-only mode"); 13160 } 13161 13162 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13163 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13164 const Expr *Device) { 13165 llvm_unreachable("Not supported in SIMD-only mode"); 13166 } 13167 13168 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13169 const OMPLoopDirective &D, 13170 ArrayRef<Expr *> NumIterations) { 13171 llvm_unreachable("Not supported in SIMD-only mode"); 13172 } 13173 13174 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13175 const OMPDependClause *C) { 13176 llvm_unreachable("Not supported in SIMD-only mode"); 13177 } 13178 13179 const VarDecl * 13180 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13181 const VarDecl *NativeParam) const { 13182 llvm_unreachable("Not supported in SIMD-only mode"); 13183 } 13184 13185 Address 13186 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13187 const VarDecl *NativeParam, 13188 const VarDecl *TargetParam) const { 13189 llvm_unreachable("Not supported in SIMD-only mode"); 13190 } 13191