1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 46 using namespace clang; 47 using namespace CodeGen; 48 using namespace llvm::omp; 49 50 namespace { 51 /// Base class for handling code generation inside OpenMP regions. 52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 53 public: 54 /// Kinds of OpenMP regions used in codegen. 55 enum CGOpenMPRegionKind { 56 /// Region with outlined function for standalone 'parallel' 57 /// directive. 58 ParallelOutlinedRegion, 59 /// Region with outlined function for standalone 'task' directive. 60 TaskOutlinedRegion, 61 /// Region for constructs that do not require function outlining, 62 /// like 'for', 'sections', 'atomic' etc. directives. 63 InlinedRegion, 64 /// Region with outlined function for standalone 'target' directive. 65 TargetRegion, 66 }; 67 68 CGOpenMPRegionInfo(const CapturedStmt &CS, 69 const CGOpenMPRegionKind RegionKind, 70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 71 bool HasCancel) 72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 74 75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 77 bool HasCancel) 78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 79 Kind(Kind), HasCancel(HasCancel) {} 80 81 /// Get a variable or parameter for storing global thread id 82 /// inside OpenMP construct. 83 virtual const VarDecl *getThreadIDVariable() const = 0; 84 85 /// Emit the captured statement body. 86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 87 88 /// Get an LValue for the current ThreadID variable. 89 /// \return LValue for thread id variable. This LValue always has type int32*. 90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 91 92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 93 94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 95 96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 97 98 bool hasCancel() const { return HasCancel; } 99 100 static bool classof(const CGCapturedStmtInfo *Info) { 101 return Info->getKind() == CR_OpenMP; 102 } 103 104 ~CGOpenMPRegionInfo() override = default; 105 106 protected: 107 CGOpenMPRegionKind RegionKind; 108 RegionCodeGenTy CodeGen; 109 OpenMPDirectiveKind Kind; 110 bool HasCancel; 111 }; 112 113 /// API for captured statement code generation in OpenMP constructs. 114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 115 public: 116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 117 const RegionCodeGenTy &CodeGen, 118 OpenMPDirectiveKind Kind, bool HasCancel, 119 StringRef HelperName) 120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 121 HasCancel), 122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 124 } 125 126 /// Get a variable or parameter for storing global thread id 127 /// inside OpenMP construct. 128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 129 130 /// Get the name of the capture helper. 131 StringRef getHelperName() const override { return HelperName; } 132 133 static bool classof(const CGCapturedStmtInfo *Info) { 134 return CGOpenMPRegionInfo::classof(Info) && 135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 136 ParallelOutlinedRegion; 137 } 138 139 private: 140 /// A variable or parameter storing global thread id for OpenMP 141 /// constructs. 142 const VarDecl *ThreadIDVar; 143 StringRef HelperName; 144 }; 145 146 /// API for captured statement code generation in OpenMP constructs. 147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 148 public: 149 class UntiedTaskActionTy final : public PrePostActionTy { 150 bool Untied; 151 const VarDecl *PartIDVar; 152 const RegionCodeGenTy UntiedCodeGen; 153 llvm::SwitchInst *UntiedSwitch = nullptr; 154 155 public: 156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 157 const RegionCodeGenTy &UntiedCodeGen) 158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 159 void Enter(CodeGenFunction &CGF) override { 160 if (Untied) { 161 // Emit task switching point. 162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 llvm::Value *Res = 166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 169 CGF.EmitBlock(DoneBB); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 173 CGF.Builder.GetInsertBlock()); 174 emitUntiedSwitch(CGF); 175 } 176 } 177 void emitUntiedSwitch(CodeGenFunction &CGF) const { 178 if (Untied) { 179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 180 CGF.GetAddrOfLocalVar(PartIDVar), 181 PartIDVar->getType()->castAs<PointerType>()); 182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 183 PartIdLVal); 184 UntiedCodeGen(CGF); 185 CodeGenFunction::JumpDest CurPoint = 186 CGF.getJumpDestInCurrentScope(".untied.next."); 187 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 190 CGF.Builder.GetInsertBlock()); 191 CGF.EmitBranchThroughCleanup(CurPoint); 192 CGF.EmitBlock(CurPoint.getBlock()); 193 } 194 } 195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 196 }; 197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 198 const VarDecl *ThreadIDVar, 199 const RegionCodeGenTy &CodeGen, 200 OpenMPDirectiveKind Kind, bool HasCancel, 201 const UntiedTaskActionTy &Action) 202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 203 ThreadIDVar(ThreadIDVar), Action(Action) { 204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 205 } 206 207 /// Get a variable or parameter for storing global thread id 208 /// inside OpenMP construct. 209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 210 211 /// Get an LValue for the current ThreadID variable. 212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 213 214 /// Get the name of the capture helper. 215 StringRef getHelperName() const override { return ".omp_outlined."; } 216 217 void emitUntiedSwitch(CodeGenFunction &CGF) override { 218 Action.emitUntiedSwitch(CGF); 219 } 220 221 static bool classof(const CGCapturedStmtInfo *Info) { 222 return CGOpenMPRegionInfo::classof(Info) && 223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 224 TaskOutlinedRegion; 225 } 226 227 private: 228 /// A variable or parameter storing global thread id for OpenMP 229 /// constructs. 230 const VarDecl *ThreadIDVar; 231 /// Action for emitting code for untied tasks. 232 const UntiedTaskActionTy &Action; 233 }; 234 235 /// API for inlined captured statement code generation in OpenMP 236 /// constructs. 237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 238 public: 239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 240 const RegionCodeGenTy &CodeGen, 241 OpenMPDirectiveKind Kind, bool HasCancel) 242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 243 OldCSI(OldCSI), 244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 245 246 // Retrieve the value of the context parameter. 247 llvm::Value *getContextValue() const override { 248 if (OuterRegionInfo) 249 return OuterRegionInfo->getContextValue(); 250 llvm_unreachable("No context value for inlined OpenMP region"); 251 } 252 253 void setContextValue(llvm::Value *V) override { 254 if (OuterRegionInfo) { 255 OuterRegionInfo->setContextValue(V); 256 return; 257 } 258 llvm_unreachable("No context value for inlined OpenMP region"); 259 } 260 261 /// Lookup the captured field decl for a variable. 262 const FieldDecl *lookup(const VarDecl *VD) const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->lookup(VD); 265 // If there is no outer outlined region,no need to lookup in a list of 266 // captured variables, we can use the original one. 267 return nullptr; 268 } 269 270 FieldDecl *getThisFieldDecl() const override { 271 if (OuterRegionInfo) 272 return OuterRegionInfo->getThisFieldDecl(); 273 return nullptr; 274 } 275 276 /// Get a variable or parameter for storing global thread id 277 /// inside OpenMP construct. 278 const VarDecl *getThreadIDVariable() const override { 279 if (OuterRegionInfo) 280 return OuterRegionInfo->getThreadIDVariable(); 281 return nullptr; 282 } 283 284 /// Get an LValue for the current ThreadID variable. 285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 286 if (OuterRegionInfo) 287 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 288 llvm_unreachable("No LValue for inlined OpenMP construct"); 289 } 290 291 /// Get the name of the capture helper. 292 StringRef getHelperName() const override { 293 if (auto *OuterRegionInfo = getOldCSI()) 294 return OuterRegionInfo->getHelperName(); 295 llvm_unreachable("No helper name for inlined OpenMP construct"); 296 } 297 298 void emitUntiedSwitch(CodeGenFunction &CGF) override { 299 if (OuterRegionInfo) 300 OuterRegionInfo->emitUntiedSwitch(CGF); 301 } 302 303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 304 305 static bool classof(const CGCapturedStmtInfo *Info) { 306 return CGOpenMPRegionInfo::classof(Info) && 307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 308 } 309 310 ~CGOpenMPInlinedRegionInfo() override = default; 311 312 private: 313 /// CodeGen info about outer OpenMP region. 314 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 315 CGOpenMPRegionInfo *OuterRegionInfo; 316 }; 317 318 /// API for captured statement code generation in OpenMP target 319 /// constructs. For this captures, implicit parameters are used instead of the 320 /// captured fields. The name of the target region has to be unique in a given 321 /// application so it is provided by the client, because only the client has 322 /// the information to generate that. 323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 324 public: 325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 326 const RegionCodeGenTy &CodeGen, StringRef HelperName) 327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 328 /*HasCancel=*/false), 329 HelperName(HelperName) {} 330 331 /// This is unused for target regions because each starts executing 332 /// with a single thread. 333 const VarDecl *getThreadIDVariable() const override { return nullptr; } 334 335 /// Get the name of the capture helper. 336 StringRef getHelperName() const override { return HelperName; } 337 338 static bool classof(const CGCapturedStmtInfo *Info) { 339 return CGOpenMPRegionInfo::classof(Info) && 340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 341 } 342 343 private: 344 StringRef HelperName; 345 }; 346 347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 348 llvm_unreachable("No codegen for expressions"); 349 } 350 /// API for generation of expressions captured in a innermost OpenMP 351 /// region. 352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 353 public: 354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 356 OMPD_unknown, 357 /*HasCancel=*/false), 358 PrivScope(CGF) { 359 // Make sure the globals captured in the provided statement are local by 360 // using the privatization logic. We assume the same variable is not 361 // captured more than once. 362 for (const auto &C : CS.captures()) { 363 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 364 continue; 365 366 const VarDecl *VD = C.getCapturedVar(); 367 if (VD->isLocalVarDeclOrParm()) 368 continue; 369 370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 371 /*RefersToEnclosingVariableOrCapture=*/false, 372 VD->getType().getNonReferenceType(), VK_LValue, 373 C.getLocation()); 374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 if (DRD) 691 SrcAddr = 692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 693 694 llvm::Value *SrcBegin = nullptr; 695 if (DRD) 696 SrcBegin = SrcAddr.getPointer(); 697 llvm::Value *DestBegin = DestAddr.getPointer(); 698 // Cast from pointer to array type to pointer to single element. 699 llvm::Value *DestEnd = 700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, SrcAddr.getElementType(), 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, DestAddr.getElementType(), 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 746 "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 753 "omp.arraycpy.dest.element"); 754 // Check whether we've reached the end. 755 llvm::Value *Done = 756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 759 760 // Done. 761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 762 } 763 764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 765 return CGF.EmitOMPSharedLValue(E); 766 } 767 768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 769 const Expr *E) { 770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 772 return LValue(); 773 } 774 775 void ReductionCodeGen::emitAggregateInitialization( 776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 777 const OMPDeclareReductionDecl *DRD) { 778 // Emit VarDecl with copy init for arrays. 779 // Get the address of the original variable captured in current 780 // captured region. 781 const auto *PrivateVD = 782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 783 bool EmitDeclareReductionInit = 784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 786 EmitDeclareReductionInit, 787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 788 : PrivateVD->getInit(), 789 DRD, SharedAddr); 790 } 791 792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 793 ArrayRef<const Expr *> Origs, 794 ArrayRef<const Expr *> Privates, 795 ArrayRef<const Expr *> ReductionOps) { 796 ClausesData.reserve(Shareds.size()); 797 SharedAddresses.reserve(Shareds.size()); 798 Sizes.reserve(Shareds.size()); 799 BaseDecls.reserve(Shareds.size()); 800 const auto *IOrig = Origs.begin(); 801 const auto *IPriv = Privates.begin(); 802 const auto *IRed = ReductionOps.begin(); 803 for (const Expr *Ref : Shareds) { 804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 805 std::advance(IOrig, 1); 806 std::advance(IPriv, 1); 807 std::advance(IRed, 1); 808 } 809 } 810 811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 813 "Number of generated lvalues must be exactly N."); 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 816 SharedAddresses.emplace_back(First, Second); 817 if (ClausesData[N].Shared == ClausesData[N].Ref) { 818 OrigAddresses.emplace_back(First, Second); 819 } else { 820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 822 OrigAddresses.emplace_back(First, Second); 823 } 824 } 825 826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 827 QualType PrivateType = getPrivateType(N); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 839 if (AsArraySection) { 840 Size = CGF.Builder.CreatePtrDiff(ElemType, 841 OrigAddresses[N].second.getPointer(CGF), 842 OrigAddresses[N].first.getPointer(CGF)); 843 Size = CGF.Builder.CreateNUWAdd( 844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 846 } else { 847 SizeInChars = 848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 850 } 851 Sizes.emplace_back(SizeInChars, Size); 852 CodeGenFunction::OpaqueValueMapping OpaqueMap( 853 CGF, 854 cast<OpaqueValueExpr>( 855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 856 RValue::get(Size)); 857 CGF.EmitVariablyModifiedType(PrivateType); 858 } 859 860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 861 llvm::Value *Size) { 862 QualType PrivateType = getPrivateType(N); 863 if (!PrivateType->isVariablyModifiedType()) { 864 assert(!Size && !Sizes[N].second && 865 "Size should be nullptr for non-variably modified reduction " 866 "items."); 867 return; 868 } 869 CodeGenFunction::OpaqueValueMapping OpaqueMap( 870 CGF, 871 cast<OpaqueValueExpr>( 872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 873 RValue::get(Size)); 874 CGF.EmitVariablyModifiedType(PrivateType); 875 } 876 877 void ReductionCodeGen::emitInitialization( 878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 880 assert(SharedAddresses.size() > N && "No variable was generated"); 881 const auto *PrivateVD = 882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 883 const OMPDeclareReductionDecl *DRD = 884 getReductionInit(ClausesData[N].ReductionOp); 885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 886 if (DRD && DRD->getInitializer()) 887 (void)DefaultInit(CGF); 888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 890 (void)DefaultInit(CGF); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 893 PrivateAddr, SharedAddr, SharedType); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 QualType PrivateType = getPrivateType(N); 904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 905 return DTorKind != QualType::DK_none; 906 } 907 908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 909 Address PrivateAddr) { 910 QualType PrivateType = getPrivateType(N); 911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 912 if (needCleanups(N)) { 913 PrivateAddr = CGF.Builder.CreateElementBitCast( 914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 916 } 917 } 918 919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 920 LValue BaseLV) { 921 BaseTy = BaseTy.getNonReferenceType(); 922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 923 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 926 } else { 927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 929 } 930 BaseTy = BaseTy->getPointeeType(); 931 } 932 return CGF.MakeAddrLValue( 933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 934 CGF.ConvertTypeForMem(ElTy)), 935 BaseLV.getType(), BaseLV.getBaseInfo(), 936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 937 } 938 939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 Address OriginalBaseAddress, llvm::Value *Addr) { 941 Address Tmp = Address::invalid(); 942 Address TopTmp = Address::invalid(); 943 Address MostTopTmp = Address::invalid(); 944 BaseTy = BaseTy.getNonReferenceType(); 945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 946 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 947 Tmp = CGF.CreateMemTemp(BaseTy); 948 if (TopTmp.isValid()) 949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 950 else 951 MostTopTmp = Tmp; 952 TopTmp = Tmp; 953 BaseTy = BaseTy->getPointeeType(); 954 } 955 956 if (Tmp.isValid()) { 957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 958 Addr, Tmp.getElementType()); 959 CGF.Builder.CreateStore(Addr, Tmp); 960 return MostTopTmp; 961 } 962 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 964 Addr, OriginalBaseAddress.getType()); 965 return OriginalBaseAddress.withPointer(Addr); 966 } 967 968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 969 const VarDecl *OrigVD = nullptr; 970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 973 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 975 Base = TempASE->getBase()->IgnoreParenImpCasts(); 976 DE = cast<DeclRefExpr>(Base); 977 OrigVD = cast<VarDecl>(DE->getDecl()); 978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 981 Base = TempASE->getBase()->IgnoreParenImpCasts(); 982 DE = cast<DeclRefExpr>(Base); 983 OrigVD = cast<VarDecl>(DE->getDecl()); 984 } 985 return OrigVD; 986 } 987 988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 989 Address PrivateAddr) { 990 const DeclRefExpr *DE; 991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 992 BaseDecls.emplace_back(OrigVD); 993 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 994 LValue BaseLValue = 995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 996 OriginalBaseLValue); 997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1000 SharedAddr.getPointer()); 1001 llvm::Value *PrivatePointer = 1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1003 PrivateAddr.getPointer(), SharedAddr.getType()); 1004 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1005 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1006 return castToBase(CGF, OrigVD->getType(), 1007 SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue.getAddress(CGF), Ptr); 1009 } 1010 BaseDecls.emplace_back( 1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1012 return PrivateAddr; 1013 } 1014 1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1016 const OMPDeclareReductionDecl *DRD = 1017 getReductionInit(ClausesData[N].ReductionOp); 1018 return DRD && DRD->getInitializer(); 1019 } 1020 1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1022 return CGF.EmitLoadOfPointerLValue( 1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1024 getThreadIDVariable()->getType()->castAs<PointerType>()); 1025 } 1026 1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1028 if (!CGF.HaveInsertPoint()) 1029 return; 1030 // 1.2.2 OpenMP Language Terminology 1031 // Structured block - An executable statement with a single entry at the 1032 // top and a single exit at the bottom. 1033 // The point of exit cannot be a branch out of the structured block. 1034 // longjmp() and throw() must not violate the entry/exit criteria. 1035 CGF.EHStack.pushTerminate(); 1036 if (S) 1037 CGF.incrementProfileCounter(S); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1065 1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1067 OMPBuilder.initialize(); 1068 loadOffloadInfoMetadata(); 1069 } 1070 1071 void CGOpenMPRuntime::clear() { 1072 InternalVars.clear(); 1073 // Clean non-target variable declarations possibly used only in debug info. 1074 for (const auto &Data : EmittedNonTargetVariables) { 1075 if (!Data.getValue().pointsToAliveValue()) 1076 continue; 1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1078 if (!GV) 1079 continue; 1080 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1081 continue; 1082 GV->eraseFromParent(); 1083 } 1084 } 1085 1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1087 SmallString<128> Buffer; 1088 llvm::raw_svector_ostream OS(Buffer); 1089 StringRef Sep = FirstSeparator; 1090 for (StringRef Part : Parts) { 1091 OS << Sep << Part; 1092 Sep = Separator; 1093 } 1094 return std::string(OS.str()); 1095 } 1096 1097 static llvm::Function * 1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1099 const Expr *CombinerInitializer, const VarDecl *In, 1100 const VarDecl *Out, bool IsCombiner) { 1101 // void .omp_combiner.(Ty *in, Ty *out); 1102 ASTContext &C = CGM.getContext(); 1103 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1104 FunctionArgList Args; 1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 Args.push_back(&OmpOutParm); 1110 Args.push_back(&OmpInParm); 1111 const CGFunctionInfo &FnInfo = 1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1114 std::string Name = CGM.getOpenMPRuntime().getName( 1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1117 Name, &CGM.getModule()); 1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1119 if (CGM.getLangOpts().Optimize) { 1120 Fn->removeFnAttr(llvm::Attribute::NoInline); 1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1123 } 1124 CodeGenFunction CGF(CGM); 1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1128 Out->getLocation()); 1129 CodeGenFunction::OMPPrivateScope Scope(CGF); 1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1131 Scope.addPrivate( 1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1133 .getAddress(CGF)); 1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1135 Scope.addPrivate( 1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1137 .getAddress(CGF)); 1138 (void)Scope.Privatize(); 1139 if (!IsCombiner && Out->hasInit() && 1140 !CGF.isTrivialInitializer(Out->getInit())) { 1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1142 Out->getType().getQualifiers(), 1143 /*IsInitializer=*/true); 1144 } 1145 if (CombinerInitializer) 1146 CGF.EmitIgnoredExpr(CombinerInitializer); 1147 Scope.ForceCleanup(); 1148 CGF.FinishFunction(); 1149 return Fn; 1150 } 1151 1152 void CGOpenMPRuntime::emitUserDefinedReduction( 1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1154 if (UDRMap.count(D) > 0) 1155 return; 1156 llvm::Function *Combiner = emitCombinerOrInitializer( 1157 CGM, D->getType(), D->getCombiner(), 1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1160 /*IsCombiner=*/true); 1161 llvm::Function *Initializer = nullptr; 1162 if (const Expr *Init = D->getInitializer()) { 1163 Initializer = emitCombinerOrInitializer( 1164 CGM, D->getType(), 1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1166 : nullptr, 1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1169 /*IsCombiner=*/false); 1170 } 1171 UDRMap.try_emplace(D, Combiner, Initializer); 1172 if (CGF) { 1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1174 Decls.second.push_back(D); 1175 } 1176 } 1177 1178 std::pair<llvm::Function *, llvm::Function *> 1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1180 auto I = UDRMap.find(D); 1181 if (I != UDRMap.end()) 1182 return I->second; 1183 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1184 return UDRMap.lookup(D); 1185 } 1186 1187 namespace { 1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1189 // Builder if one is present. 1190 struct PushAndPopStackRAII { 1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1192 bool HasCancel, llvm::omp::Directive Kind) 1193 : OMPBuilder(OMPBuilder) { 1194 if (!OMPBuilder) 1195 return; 1196 1197 // The following callback is the crucial part of clangs cleanup process. 1198 // 1199 // NOTE: 1200 // Once the OpenMPIRBuilder is used to create parallel regions (and 1201 // similar), the cancellation destination (Dest below) is determined via 1202 // IP. That means if we have variables to finalize we split the block at IP, 1203 // use the new block (=BB) as destination to build a JumpDest (via 1204 // getJumpDestInCurrentScope(BB)) which then is fed to 1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1206 // to push & pop an FinalizationInfo object. 1207 // The FiniCB will still be needed but at the point where the 1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1210 assert(IP.getBlock()->end() == IP.getPoint() && 1211 "Clang CG should cause non-terminated block!"); 1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1213 CGF.Builder.restoreIP(IP); 1214 CodeGenFunction::JumpDest Dest = 1215 CGF.getOMPCancelDestination(OMPD_parallel); 1216 CGF.EmitBranchThroughCleanup(Dest); 1217 }; 1218 1219 // TODO: Remove this once we emit parallel regions through the 1220 // OpenMPIRBuilder as it can do this setup internally. 1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1222 OMPBuilder->pushFinalizationCB(std::move(FI)); 1223 } 1224 ~PushAndPopStackRAII() { 1225 if (OMPBuilder) 1226 OMPBuilder->popFinalizationCB(); 1227 } 1228 llvm::OpenMPIRBuilder *OMPBuilder; 1229 }; 1230 } // namespace 1231 1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1236 assert(ThreadIDVar->getType()->isPointerType() && 1237 "thread id variable must be of type kmp_int32 *"); 1238 CodeGenFunction CGF(CGM, true); 1239 bool HasCancel = false; 1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1241 HasCancel = OPD->hasCancel(); 1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1243 HasCancel = OPD->hasCancel(); 1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1245 HasCancel = OPSD->hasCancel(); 1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1247 HasCancel = OPFD->hasCancel(); 1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1249 HasCancel = OPFD->hasCancel(); 1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1251 HasCancel = OPFD->hasCancel(); 1252 else if (const auto *OPFD = 1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = 1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1257 HasCancel = OPFD->hasCancel(); 1258 1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1260 // parallel region to make cancellation barriers work properly. 1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1264 HasCancel, OutlinedHelperName); 1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1267 } 1268 1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1273 return emitParallelOrTeamsOutlinedFunction( 1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1275 } 1276 1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1281 return emitParallelOrTeamsOutlinedFunction( 1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1283 } 1284 1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1289 bool Tied, unsigned &NumberOfParts) { 1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1291 PrePostActionTy &) { 1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1294 llvm::Value *TaskArgs[] = { 1295 UpLoc, ThreadID, 1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1297 TaskTVar->getType()->castAs<PointerType>()) 1298 .getPointer(CGF)}; 1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1300 CGM.getModule(), OMPRTL___kmpc_omp_task), 1301 TaskArgs); 1302 }; 1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1304 UntiedCodeGen); 1305 CodeGen.setAction(Action); 1306 assert(!ThreadIDVar->getType()->isPointerType() && 1307 "thread id variable must be of type kmp_int32 for tasks"); 1308 const OpenMPDirectiveKind Region = 1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1310 : OMPD_task; 1311 const CapturedStmt *CS = D.getCapturedStmt(Region); 1312 bool HasCancel = false; 1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1318 HasCancel = TD->hasCancel(); 1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1320 HasCancel = TD->hasCancel(); 1321 1322 CodeGenFunction CGF(CGM, true); 1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1324 InnermostKind, HasCancel, Action); 1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1327 if (!Tied) 1328 NumberOfParts = Action.getNumberOfParts(); 1329 return Res; 1330 } 1331 1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1333 bool AtCurrentPoint) { 1334 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1335 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1336 1337 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1338 if (AtCurrentPoint) { 1339 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1340 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1341 } else { 1342 Elem.second.ServiceInsertPt = 1343 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1344 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1345 } 1346 } 1347 1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1349 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1350 if (Elem.second.ServiceInsertPt) { 1351 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1352 Elem.second.ServiceInsertPt = nullptr; 1353 Ptr->eraseFromParent(); 1354 } 1355 } 1356 1357 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1358 SourceLocation Loc, 1359 SmallString<128> &Buffer) { 1360 llvm::raw_svector_ostream OS(Buffer); 1361 // Build debug location 1362 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1363 OS << ";" << PLoc.getFilename() << ";"; 1364 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1365 OS << FD->getQualifiedNameAsString(); 1366 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1367 return OS.str(); 1368 } 1369 1370 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1371 SourceLocation Loc, 1372 unsigned Flags) { 1373 uint32_t SrcLocStrSize; 1374 llvm::Constant *SrcLocStr; 1375 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1376 Loc.isInvalid()) { 1377 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1378 } else { 1379 std::string FunctionName; 1380 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1381 FunctionName = FD->getQualifiedNameAsString(); 1382 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1383 const char *FileName = PLoc.getFilename(); 1384 unsigned Line = PLoc.getLine(); 1385 unsigned Column = PLoc.getColumn(); 1386 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1387 Column, SrcLocStrSize); 1388 } 1389 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1390 return OMPBuilder.getOrCreateIdent( 1391 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1392 } 1393 1394 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1395 SourceLocation Loc) { 1396 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1397 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1398 // the clang invariants used below might be broken. 1399 if (CGM.getLangOpts().OpenMPIRBuilder) { 1400 SmallString<128> Buffer; 1401 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1402 uint32_t SrcLocStrSize; 1403 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1404 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1405 return OMPBuilder.getOrCreateThreadID( 1406 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1407 } 1408 1409 llvm::Value *ThreadID = nullptr; 1410 // Check whether we've already cached a load of the thread id in this 1411 // function. 1412 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1413 if (I != OpenMPLocThreadIDMap.end()) { 1414 ThreadID = I->second.ThreadID; 1415 if (ThreadID != nullptr) 1416 return ThreadID; 1417 } 1418 // If exceptions are enabled, do not use parameter to avoid possible crash. 1419 if (auto *OMPRegionInfo = 1420 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1421 if (OMPRegionInfo->getThreadIDVariable()) { 1422 // Check if this an outlined function with thread id passed as argument. 1423 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1424 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1425 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1426 !CGF.getLangOpts().CXXExceptions || 1427 CGF.Builder.GetInsertBlock() == TopBlock || 1428 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1429 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1430 TopBlock || 1431 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1432 CGF.Builder.GetInsertBlock()) { 1433 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1434 // If value loaded in entry block, cache it and use it everywhere in 1435 // function. 1436 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1437 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1438 Elem.second.ThreadID = ThreadID; 1439 } 1440 return ThreadID; 1441 } 1442 } 1443 } 1444 1445 // This is not an outlined function region - need to call __kmpc_int32 1446 // kmpc_global_thread_num(ident_t *loc). 1447 // Generate thread id value and cache this value for use across the 1448 // function. 1449 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1450 if (!Elem.second.ServiceInsertPt) 1451 setLocThreadIdInsertPt(CGF); 1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1453 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1454 llvm::CallInst *Call = CGF.Builder.CreateCall( 1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1456 OMPRTL___kmpc_global_thread_num), 1457 emitUpdateLocation(CGF, Loc)); 1458 Call->setCallingConv(CGF.getRuntimeCC()); 1459 Elem.second.ThreadID = Call; 1460 return Call; 1461 } 1462 1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1466 clearLocThreadIdInsertPt(CGF); 1467 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1468 } 1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1470 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1471 UDRMap.erase(D); 1472 FunctionUDRMap.erase(CGF.CurFn); 1473 } 1474 auto I = FunctionUDMMap.find(CGF.CurFn); 1475 if (I != FunctionUDMMap.end()) { 1476 for(const auto *D : I->second) 1477 UDMMap.erase(D); 1478 FunctionUDMMap.erase(I); 1479 } 1480 LastprivateConditionalToTypes.erase(CGF.CurFn); 1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1482 } 1483 1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1485 return OMPBuilder.IdentPtr; 1486 } 1487 1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1489 if (!Kmpc_MicroTy) { 1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1492 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1494 } 1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1496 } 1497 1498 llvm::FunctionCallee 1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1500 bool IsGPUDistribute) { 1501 assert((IVSize == 32 || IVSize == 64) && 1502 "IV size is not compatible with the omp runtime"); 1503 StringRef Name; 1504 if (IsGPUDistribute) 1505 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1506 : "__kmpc_distribute_static_init_4u") 1507 : (IVSigned ? "__kmpc_distribute_static_init_8" 1508 : "__kmpc_distribute_static_init_8u"); 1509 else 1510 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1511 : "__kmpc_for_static_init_4u") 1512 : (IVSigned ? "__kmpc_for_static_init_8" 1513 : "__kmpc_for_static_init_8u"); 1514 1515 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1516 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1517 llvm::Type *TypeParams[] = { 1518 getIdentTyPointerTy(), // loc 1519 CGM.Int32Ty, // tid 1520 CGM.Int32Ty, // schedtype 1521 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1522 PtrTy, // p_lower 1523 PtrTy, // p_upper 1524 PtrTy, // p_stride 1525 ITy, // incr 1526 ITy // chunk 1527 }; 1528 auto *FnTy = 1529 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1530 return CGM.CreateRuntimeFunction(FnTy, Name); 1531 } 1532 1533 llvm::FunctionCallee 1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1535 assert((IVSize == 32 || IVSize == 64) && 1536 "IV size is not compatible with the omp runtime"); 1537 StringRef Name = 1538 IVSize == 32 1539 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1540 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1541 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1542 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1543 CGM.Int32Ty, // tid 1544 CGM.Int32Ty, // schedtype 1545 ITy, // lower 1546 ITy, // upper 1547 ITy, // stride 1548 ITy // chunk 1549 }; 1550 auto *FnTy = 1551 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1552 return CGM.CreateRuntimeFunction(FnTy, Name); 1553 } 1554 1555 llvm::FunctionCallee 1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1557 assert((IVSize == 32 || IVSize == 64) && 1558 "IV size is not compatible with the omp runtime"); 1559 StringRef Name = 1560 IVSize == 32 1561 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1562 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1563 llvm::Type *TypeParams[] = { 1564 getIdentTyPointerTy(), // loc 1565 CGM.Int32Ty, // tid 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1579 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1582 llvm::Type *TypeParams[] = { 1583 getIdentTyPointerTy(), // loc 1584 CGM.Int32Ty, // tid 1585 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1586 PtrTy, // p_lower 1587 PtrTy, // p_upper 1588 PtrTy // p_stride 1589 }; 1590 auto *FnTy = 1591 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1592 return CGM.CreateRuntimeFunction(FnTy, Name); 1593 } 1594 1595 /// Obtain information that uniquely identifies a target entry. This 1596 /// consists of the file and device IDs as well as line number associated with 1597 /// the relevant entry source location. 1598 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1599 unsigned &DeviceID, unsigned &FileID, 1600 unsigned &LineNum) { 1601 SourceManager &SM = C.getSourceManager(); 1602 1603 // The loc should be always valid and have a file ID (the user cannot use 1604 // #pragma directives in macros) 1605 1606 assert(Loc.isValid() && "Source location is expected to be always valid."); 1607 1608 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1609 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1610 1611 llvm::sys::fs::UniqueID ID; 1612 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1613 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1614 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1615 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1616 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1617 << PLoc.getFilename() << EC.message(); 1618 } 1619 1620 DeviceID = ID.getDevice(); 1621 FileID = ID.getFile(); 1622 LineNum = PLoc.getLine(); 1623 } 1624 1625 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1626 if (CGM.getLangOpts().OpenMPSimd) 1627 return Address::invalid(); 1628 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1629 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1630 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1631 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1632 HasRequiresUnifiedSharedMemory))) { 1633 SmallString<64> PtrName; 1634 { 1635 llvm::raw_svector_ostream OS(PtrName); 1636 OS << CGM.getMangledName(GlobalDecl(VD)); 1637 if (!VD->isExternallyVisible()) { 1638 unsigned DeviceID, FileID, Line; 1639 getTargetEntryUniqueInfo(CGM.getContext(), 1640 VD->getCanonicalDecl()->getBeginLoc(), 1641 DeviceID, FileID, Line); 1642 OS << llvm::format("_%x", FileID); 1643 } 1644 OS << "_decl_tgt_ref_ptr"; 1645 } 1646 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1647 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1648 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); 1649 if (!Ptr) { 1650 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName); 1651 1652 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1653 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1654 1655 if (!CGM.getLangOpts().OpenMPIsDevice) 1656 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1657 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1658 } 1659 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1660 } 1661 return Address::invalid(); 1662 } 1663 1664 llvm::Constant * 1665 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1666 assert(!CGM.getLangOpts().OpenMPUseTLS || 1667 !CGM.getContext().getTargetInfo().isTLSSupported()); 1668 // Lookup the entry, lazily creating it if necessary. 1669 std::string Suffix = getName({"cache", ""}); 1670 return getOrCreateInternalVariable( 1671 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1672 } 1673 1674 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1675 const VarDecl *VD, 1676 Address VDAddr, 1677 SourceLocation Loc) { 1678 if (CGM.getLangOpts().OpenMPUseTLS && 1679 CGM.getContext().getTargetInfo().isTLSSupported()) 1680 return VDAddr; 1681 1682 llvm::Type *VarTy = VDAddr.getElementType(); 1683 llvm::Value *Args[] = { 1684 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1685 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1686 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1687 getOrCreateThreadPrivateCache(VD)}; 1688 return Address( 1689 CGF.EmitRuntimeCall( 1690 OMPBuilder.getOrCreateRuntimeFunction( 1691 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1692 Args), 1693 CGF.Int8Ty, VDAddr.getAlignment()); 1694 } 1695 1696 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1697 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1698 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1699 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1700 // library. 1701 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1702 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1703 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1704 OMPLoc); 1705 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1706 // to register constructor/destructor for variable. 1707 llvm::Value *Args[] = { 1708 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1709 Ctor, CopyCtor, Dtor}; 1710 CGF.EmitRuntimeCall( 1711 OMPBuilder.getOrCreateRuntimeFunction( 1712 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1713 Args); 1714 } 1715 1716 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1717 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1718 bool PerformInit, CodeGenFunction *CGF) { 1719 if (CGM.getLangOpts().OpenMPUseTLS && 1720 CGM.getContext().getTargetInfo().isTLSSupported()) 1721 return nullptr; 1722 1723 VD = VD->getDefinition(CGM.getContext()); 1724 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1725 QualType ASTTy = VD->getType(); 1726 1727 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1728 const Expr *Init = VD->getAnyInitializer(); 1729 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1730 // Generate function that re-emits the declaration's initializer into the 1731 // threadprivate copy of the variable VD 1732 CodeGenFunction CtorCGF(CGM); 1733 FunctionArgList Args; 1734 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1735 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1736 ImplicitParamDecl::Other); 1737 Args.push_back(&Dst); 1738 1739 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1740 CGM.getContext().VoidPtrTy, Args); 1741 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1742 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1743 llvm::Function *Fn = 1744 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1745 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1746 Args, Loc, Loc); 1747 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1748 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1749 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1750 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); 1751 Arg = CtorCGF.Builder.CreateElementBitCast( 1752 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1753 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1754 /*IsInitializer=*/true); 1755 ArgVal = CtorCGF.EmitLoadOfScalar( 1756 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1757 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1758 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1759 CtorCGF.FinishFunction(); 1760 Ctor = Fn; 1761 } 1762 if (VD->getType().isDestructedType() != QualType::DK_none) { 1763 // Generate function that emits destructor call for the threadprivate copy 1764 // of the variable VD 1765 CodeGenFunction DtorCGF(CGM); 1766 FunctionArgList Args; 1767 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1768 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1769 ImplicitParamDecl::Other); 1770 Args.push_back(&Dst); 1771 1772 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1773 CGM.getContext().VoidTy, Args); 1774 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1775 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1776 llvm::Function *Fn = 1777 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1778 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1779 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1780 Loc, Loc); 1781 // Create a scope with an artificial location for the body of this function. 1782 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1783 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1784 DtorCGF.GetAddrOfLocalVar(&Dst), 1785 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1786 DtorCGF.emitDestroy( 1787 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1788 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1789 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1790 DtorCGF.FinishFunction(); 1791 Dtor = Fn; 1792 } 1793 // Do not emit init function if it is not required. 1794 if (!Ctor && !Dtor) 1795 return nullptr; 1796 1797 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1798 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1799 /*isVarArg=*/false) 1800 ->getPointerTo(); 1801 // Copying constructor for the threadprivate variable. 1802 // Must be NULL - reserved by runtime, but currently it requires that this 1803 // parameter is always NULL. Otherwise it fires assertion. 1804 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1805 if (Ctor == nullptr) { 1806 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1807 /*isVarArg=*/false) 1808 ->getPointerTo(); 1809 Ctor = llvm::Constant::getNullValue(CtorTy); 1810 } 1811 if (Dtor == nullptr) { 1812 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1813 /*isVarArg=*/false) 1814 ->getPointerTo(); 1815 Dtor = llvm::Constant::getNullValue(DtorTy); 1816 } 1817 if (!CGF) { 1818 auto *InitFunctionTy = 1819 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1820 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1821 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1822 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1823 CodeGenFunction InitCGF(CGM); 1824 FunctionArgList ArgList; 1825 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1826 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1827 Loc, Loc); 1828 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1829 InitCGF.FinishFunction(); 1830 return InitFunction; 1831 } 1832 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1833 } 1834 return nullptr; 1835 } 1836 1837 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1838 llvm::GlobalVariable *Addr, 1839 bool PerformInit) { 1840 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1841 !CGM.getLangOpts().OpenMPIsDevice) 1842 return false; 1843 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1844 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1845 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1846 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1847 HasRequiresUnifiedSharedMemory)) 1848 return CGM.getLangOpts().OpenMPIsDevice; 1849 VD = VD->getDefinition(CGM.getContext()); 1850 assert(VD && "Unknown VarDecl"); 1851 1852 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1853 return CGM.getLangOpts().OpenMPIsDevice; 1854 1855 QualType ASTTy = VD->getType(); 1856 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1857 1858 // Produce the unique prefix to identify the new target regions. We use 1859 // the source location of the variable declaration which we know to not 1860 // conflict with any target region. 1861 unsigned DeviceID; 1862 unsigned FileID; 1863 unsigned Line; 1864 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1865 SmallString<128> Buffer, Out; 1866 { 1867 llvm::raw_svector_ostream OS(Buffer); 1868 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1869 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1870 } 1871 1872 const Expr *Init = VD->getAnyInitializer(); 1873 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1874 llvm::Constant *Ctor; 1875 llvm::Constant *ID; 1876 if (CGM.getLangOpts().OpenMPIsDevice) { 1877 // Generate function that re-emits the declaration's initializer into 1878 // the threadprivate copy of the variable VD 1879 CodeGenFunction CtorCGF(CGM); 1880 1881 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1882 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1883 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1884 FTy, Twine(Buffer, "_ctor"), FI, Loc, false, 1885 llvm::GlobalValue::WeakODRLinkage); 1886 if (CGM.getTriple().isAMDGCN()) 1887 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1888 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1889 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1890 FunctionArgList(), Loc, Loc); 1891 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1892 llvm::Constant *AddrInAS0 = Addr; 1893 if (Addr->getAddressSpace() != 0) 1894 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1895 Addr, llvm::PointerType::getWithSamePointeeType( 1896 cast<llvm::PointerType>(Addr->getType()), 0)); 1897 CtorCGF.EmitAnyExprToMem(Init, 1898 Address(AddrInAS0, Addr->getValueType(), 1899 CGM.getContext().getDeclAlign(VD)), 1900 Init->getType().getQualifiers(), 1901 /*IsInitializer=*/true); 1902 CtorCGF.FinishFunction(); 1903 Ctor = Fn; 1904 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1905 } else { 1906 Ctor = new llvm::GlobalVariable( 1907 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1908 llvm::GlobalValue::PrivateLinkage, 1909 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1910 ID = Ctor; 1911 } 1912 1913 // Register the information for the entry associated with the constructor. 1914 Out.clear(); 1915 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1916 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1917 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1918 } 1919 if (VD->getType().isDestructedType() != QualType::DK_none) { 1920 llvm::Constant *Dtor; 1921 llvm::Constant *ID; 1922 if (CGM.getLangOpts().OpenMPIsDevice) { 1923 // Generate function that emits destructor call for the threadprivate 1924 // copy of the variable VD 1925 CodeGenFunction DtorCGF(CGM); 1926 1927 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1928 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1929 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1930 FTy, Twine(Buffer, "_dtor"), FI, Loc, false, 1931 llvm::GlobalValue::WeakODRLinkage); 1932 if (CGM.getTriple().isAMDGCN()) 1933 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1934 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1935 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1936 FunctionArgList(), Loc, Loc); 1937 // Create a scope with an artificial location for the body of this 1938 // function. 1939 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1940 llvm::Constant *AddrInAS0 = Addr; 1941 if (Addr->getAddressSpace() != 0) 1942 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1943 Addr, llvm::PointerType::getWithSamePointeeType( 1944 cast<llvm::PointerType>(Addr->getType()), 0)); 1945 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), 1946 CGM.getContext().getDeclAlign(VD)), 1947 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1948 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1949 DtorCGF.FinishFunction(); 1950 Dtor = Fn; 1951 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1952 } else { 1953 Dtor = new llvm::GlobalVariable( 1954 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1955 llvm::GlobalValue::PrivateLinkage, 1956 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1957 ID = Dtor; 1958 } 1959 // Register the information for the entry associated with the destructor. 1960 Out.clear(); 1961 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1962 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1963 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1964 } 1965 return CGM.getLangOpts().OpenMPIsDevice; 1966 } 1967 1968 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1969 QualType VarType, 1970 StringRef Name) { 1971 std::string Suffix = getName({"artificial", ""}); 1972 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1973 llvm::GlobalVariable *GAddr = 1974 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1975 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1976 CGM.getTarget().isTLSSupported()) { 1977 GAddr->setThreadLocal(/*Val=*/true); 1978 return Address(GAddr, GAddr->getValueType(), 1979 CGM.getContext().getTypeAlignInChars(VarType)); 1980 } 1981 std::string CacheSuffix = getName({"cache", ""}); 1982 llvm::Value *Args[] = { 1983 emitUpdateLocation(CGF, SourceLocation()), 1984 getThreadID(CGF, SourceLocation()), 1985 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1986 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1987 /*isSigned=*/false), 1988 getOrCreateInternalVariable( 1989 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 1990 return Address( 1991 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1992 CGF.EmitRuntimeCall( 1993 OMPBuilder.getOrCreateRuntimeFunction( 1994 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1995 Args), 1996 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1997 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1998 } 1999 2000 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2001 const RegionCodeGenTy &ThenGen, 2002 const RegionCodeGenTy &ElseGen) { 2003 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2004 2005 // If the condition constant folds and can be elided, try to avoid emitting 2006 // the condition and the dead arm of the if/else. 2007 bool CondConstant; 2008 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2009 if (CondConstant) 2010 ThenGen(CGF); 2011 else 2012 ElseGen(CGF); 2013 return; 2014 } 2015 2016 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2017 // emit the conditional branch. 2018 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2019 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2020 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2021 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2022 2023 // Emit the 'then' code. 2024 CGF.EmitBlock(ThenBlock); 2025 ThenGen(CGF); 2026 CGF.EmitBranch(ContBlock); 2027 // Emit the 'else' code if present. 2028 // There is no need to emit line number for unconditional branch. 2029 (void)ApplyDebugLocation::CreateEmpty(CGF); 2030 CGF.EmitBlock(ElseBlock); 2031 ElseGen(CGF); 2032 // There is no need to emit line number for unconditional branch. 2033 (void)ApplyDebugLocation::CreateEmpty(CGF); 2034 CGF.EmitBranch(ContBlock); 2035 // Emit the continuation block for code after the if. 2036 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2037 } 2038 2039 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2040 llvm::Function *OutlinedFn, 2041 ArrayRef<llvm::Value *> CapturedVars, 2042 const Expr *IfCond, 2043 llvm::Value *NumThreads) { 2044 if (!CGF.HaveInsertPoint()) 2045 return; 2046 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2047 auto &M = CGM.getModule(); 2048 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2049 this](CodeGenFunction &CGF, PrePostActionTy &) { 2050 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2051 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2052 llvm::Value *Args[] = { 2053 RTLoc, 2054 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2055 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2056 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2057 RealArgs.append(std::begin(Args), std::end(Args)); 2058 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2059 2060 llvm::FunctionCallee RTLFn = 2061 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2062 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2063 }; 2064 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2065 this](CodeGenFunction &CGF, PrePostActionTy &) { 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2068 // Build calls: 2069 // __kmpc_serialized_parallel(&Loc, GTid); 2070 llvm::Value *Args[] = {RTLoc, ThreadID}; 2071 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2072 M, OMPRTL___kmpc_serialized_parallel), 2073 Args); 2074 2075 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2076 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2077 Address ZeroAddrBound = 2078 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2079 /*Name=*/".bound.zero.addr"); 2080 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2081 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2082 // ThreadId for serialized parallels is 0. 2083 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2084 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2085 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2086 2087 // Ensure we do not inline the function. This is trivially true for the ones 2088 // passed to __kmpc_fork_call but the ones called in serialized regions 2089 // could be inlined. This is not a perfect but it is closer to the invariant 2090 // we want, namely, every data environment starts with a new function. 2091 // TODO: We should pass the if condition to the runtime function and do the 2092 // handling there. Much cleaner code. 2093 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2094 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2095 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2096 2097 // __kmpc_end_serialized_parallel(&Loc, GTid); 2098 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2099 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2100 M, OMPRTL___kmpc_end_serialized_parallel), 2101 EndArgs); 2102 }; 2103 if (IfCond) { 2104 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2105 } else { 2106 RegionCodeGenTy ThenRCG(ThenGen); 2107 ThenRCG(CGF); 2108 } 2109 } 2110 2111 // If we're inside an (outlined) parallel region, use the region info's 2112 // thread-ID variable (it is passed in a first argument of the outlined function 2113 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2114 // regular serial code region, get thread ID by calling kmp_int32 2115 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2116 // return the address of that temp. 2117 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2118 SourceLocation Loc) { 2119 if (auto *OMPRegionInfo = 2120 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2121 if (OMPRegionInfo->getThreadIDVariable()) 2122 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2123 2124 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2125 QualType Int32Ty = 2126 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2127 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2128 CGF.EmitStoreOfScalar(ThreadID, 2129 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2130 2131 return ThreadIDTemp; 2132 } 2133 2134 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2135 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2136 SmallString<256> Buffer; 2137 llvm::raw_svector_ostream Out(Buffer); 2138 Out << Name; 2139 StringRef RuntimeName = Out.str(); 2140 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2141 if (Elem.second) { 2142 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2143 "OMP internal variable has different type than requested"); 2144 return &*Elem.second; 2145 } 2146 2147 return Elem.second = new llvm::GlobalVariable( 2148 CGM.getModule(), Ty, /*IsConstant*/ false, 2149 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2150 Elem.first(), /*InsertBefore=*/nullptr, 2151 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2152 } 2153 2154 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2155 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2156 std::string Name = getName({Prefix, "var"}); 2157 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2158 } 2159 2160 namespace { 2161 /// Common pre(post)-action for different OpenMP constructs. 2162 class CommonActionTy final : public PrePostActionTy { 2163 llvm::FunctionCallee EnterCallee; 2164 ArrayRef<llvm::Value *> EnterArgs; 2165 llvm::FunctionCallee ExitCallee; 2166 ArrayRef<llvm::Value *> ExitArgs; 2167 bool Conditional; 2168 llvm::BasicBlock *ContBlock = nullptr; 2169 2170 public: 2171 CommonActionTy(llvm::FunctionCallee EnterCallee, 2172 ArrayRef<llvm::Value *> EnterArgs, 2173 llvm::FunctionCallee ExitCallee, 2174 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2175 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2176 ExitArgs(ExitArgs), Conditional(Conditional) {} 2177 void Enter(CodeGenFunction &CGF) override { 2178 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2179 if (Conditional) { 2180 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2181 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2182 ContBlock = CGF.createBasicBlock("omp_if.end"); 2183 // Generate the branch (If-stmt) 2184 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2185 CGF.EmitBlock(ThenBlock); 2186 } 2187 } 2188 void Done(CodeGenFunction &CGF) { 2189 // Emit the rest of blocks/branches 2190 CGF.EmitBranch(ContBlock); 2191 CGF.EmitBlock(ContBlock, true); 2192 } 2193 void Exit(CodeGenFunction &CGF) override { 2194 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2195 } 2196 }; 2197 } // anonymous namespace 2198 2199 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2200 StringRef CriticalName, 2201 const RegionCodeGenTy &CriticalOpGen, 2202 SourceLocation Loc, const Expr *Hint) { 2203 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2204 // CriticalOpGen(); 2205 // __kmpc_end_critical(ident_t *, gtid, Lock); 2206 // Prepare arguments and build a call to __kmpc_critical 2207 if (!CGF.HaveInsertPoint()) 2208 return; 2209 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2210 getCriticalRegionLock(CriticalName)}; 2211 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2212 std::end(Args)); 2213 if (Hint) { 2214 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2215 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2216 } 2217 CommonActionTy Action( 2218 OMPBuilder.getOrCreateRuntimeFunction( 2219 CGM.getModule(), 2220 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2221 EnterArgs, 2222 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2223 OMPRTL___kmpc_end_critical), 2224 Args); 2225 CriticalOpGen.setAction(Action); 2226 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2227 } 2228 2229 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2230 const RegionCodeGenTy &MasterOpGen, 2231 SourceLocation Loc) { 2232 if (!CGF.HaveInsertPoint()) 2233 return; 2234 // if(__kmpc_master(ident_t *, gtid)) { 2235 // MasterOpGen(); 2236 // __kmpc_end_master(ident_t *, gtid); 2237 // } 2238 // Prepare arguments and build a call to __kmpc_master 2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2240 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2241 CGM.getModule(), OMPRTL___kmpc_master), 2242 Args, 2243 OMPBuilder.getOrCreateRuntimeFunction( 2244 CGM.getModule(), OMPRTL___kmpc_end_master), 2245 Args, 2246 /*Conditional=*/true); 2247 MasterOpGen.setAction(Action); 2248 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2249 Action.Done(CGF); 2250 } 2251 2252 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2253 const RegionCodeGenTy &MaskedOpGen, 2254 SourceLocation Loc, const Expr *Filter) { 2255 if (!CGF.HaveInsertPoint()) 2256 return; 2257 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2258 // MaskedOpGen(); 2259 // __kmpc_end_masked(iden_t *, gtid); 2260 // } 2261 // Prepare arguments and build a call to __kmpc_masked 2262 llvm::Value *FilterVal = Filter 2263 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2264 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2265 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2266 FilterVal}; 2267 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2268 getThreadID(CGF, Loc)}; 2269 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_masked), 2271 Args, 2272 OMPBuilder.getOrCreateRuntimeFunction( 2273 CGM.getModule(), OMPRTL___kmpc_end_masked), 2274 ArgsEnd, 2275 /*Conditional=*/true); 2276 MaskedOpGen.setAction(Action); 2277 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2278 Action.Done(CGF); 2279 } 2280 2281 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2282 SourceLocation Loc) { 2283 if (!CGF.HaveInsertPoint()) 2284 return; 2285 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2286 OMPBuilder.createTaskyield(CGF.Builder); 2287 } else { 2288 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2289 llvm::Value *Args[] = { 2290 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2291 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2292 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2293 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2294 Args); 2295 } 2296 2297 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2298 Region->emitUntiedSwitch(CGF); 2299 } 2300 2301 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2302 const RegionCodeGenTy &TaskgroupOpGen, 2303 SourceLocation Loc) { 2304 if (!CGF.HaveInsertPoint()) 2305 return; 2306 // __kmpc_taskgroup(ident_t *, gtid); 2307 // TaskgroupOpGen(); 2308 // __kmpc_end_taskgroup(ident_t *, gtid); 2309 // Prepare arguments and build a call to __kmpc_taskgroup 2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2311 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2312 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2313 Args, 2314 OMPBuilder.getOrCreateRuntimeFunction( 2315 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2316 Args); 2317 TaskgroupOpGen.setAction(Action); 2318 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2319 } 2320 2321 /// Given an array of pointers to variables, project the address of a 2322 /// given variable. 2323 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2324 unsigned Index, const VarDecl *Var) { 2325 // Pull out the pointer to the variable. 2326 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2327 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2328 2329 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2330 return Address( 2331 CGF.Builder.CreateBitCast( 2332 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2333 ElemTy, CGF.getContext().getDeclAlign(Var)); 2334 } 2335 2336 static llvm::Value *emitCopyprivateCopyFunction( 2337 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2338 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2339 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2340 SourceLocation Loc) { 2341 ASTContext &C = CGM.getContext(); 2342 // void copy_func(void *LHSArg, void *RHSArg); 2343 FunctionArgList Args; 2344 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2345 ImplicitParamDecl::Other); 2346 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2347 ImplicitParamDecl::Other); 2348 Args.push_back(&LHSArg); 2349 Args.push_back(&RHSArg); 2350 const auto &CGFI = 2351 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2352 std::string Name = 2353 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2354 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2355 llvm::GlobalValue::InternalLinkage, Name, 2356 &CGM.getModule()); 2357 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2358 Fn->setDoesNotRecurse(); 2359 CodeGenFunction CGF(CGM); 2360 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2361 // Dest = (void*[n])(LHSArg); 2362 // Src = (void*[n])(RHSArg); 2363 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2364 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2365 ArgsElemType->getPointerTo()), 2366 ArgsElemType, CGF.getPointerAlign()); 2367 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2368 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2369 ArgsElemType->getPointerTo()), 2370 ArgsElemType, CGF.getPointerAlign()); 2371 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2372 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2373 // ... 2374 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2375 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2376 const auto *DestVar = 2377 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2378 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2379 2380 const auto *SrcVar = 2381 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2382 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2383 2384 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2385 QualType Type = VD->getType(); 2386 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2387 } 2388 CGF.FinishFunction(); 2389 return Fn; 2390 } 2391 2392 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2393 const RegionCodeGenTy &SingleOpGen, 2394 SourceLocation Loc, 2395 ArrayRef<const Expr *> CopyprivateVars, 2396 ArrayRef<const Expr *> SrcExprs, 2397 ArrayRef<const Expr *> DstExprs, 2398 ArrayRef<const Expr *> AssignmentOps) { 2399 if (!CGF.HaveInsertPoint()) 2400 return; 2401 assert(CopyprivateVars.size() == SrcExprs.size() && 2402 CopyprivateVars.size() == DstExprs.size() && 2403 CopyprivateVars.size() == AssignmentOps.size()); 2404 ASTContext &C = CGM.getContext(); 2405 // int32 did_it = 0; 2406 // if(__kmpc_single(ident_t *, gtid)) { 2407 // SingleOpGen(); 2408 // __kmpc_end_single(ident_t *, gtid); 2409 // did_it = 1; 2410 // } 2411 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2412 // <copy_func>, did_it); 2413 2414 Address DidIt = Address::invalid(); 2415 if (!CopyprivateVars.empty()) { 2416 // int32 did_it = 0; 2417 QualType KmpInt32Ty = 2418 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2419 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2420 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2421 } 2422 // Prepare arguments and build a call to __kmpc_single 2423 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2424 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2425 CGM.getModule(), OMPRTL___kmpc_single), 2426 Args, 2427 OMPBuilder.getOrCreateRuntimeFunction( 2428 CGM.getModule(), OMPRTL___kmpc_end_single), 2429 Args, 2430 /*Conditional=*/true); 2431 SingleOpGen.setAction(Action); 2432 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2433 if (DidIt.isValid()) { 2434 // did_it = 1; 2435 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2436 } 2437 Action.Done(CGF); 2438 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2439 // <copy_func>, did_it); 2440 if (DidIt.isValid()) { 2441 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2442 QualType CopyprivateArrayTy = C.getConstantArrayType( 2443 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2444 /*IndexTypeQuals=*/0); 2445 // Create a list of all private variables for copyprivate. 2446 Address CopyprivateList = 2447 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2448 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2449 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2450 CGF.Builder.CreateStore( 2451 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2452 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2453 CGF.VoidPtrTy), 2454 Elem); 2455 } 2456 // Build function that copies private values from single region to all other 2457 // threads in the corresponding parallel region. 2458 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2459 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2460 SrcExprs, DstExprs, AssignmentOps, Loc); 2461 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2462 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2463 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2464 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2465 llvm::Value *Args[] = { 2466 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2467 getThreadID(CGF, Loc), // i32 <gtid> 2468 BufSize, // size_t <buf_size> 2469 CL.getPointer(), // void *<copyprivate list> 2470 CpyFn, // void (*) (void *, void *) <copy_func> 2471 DidItVal // i32 did_it 2472 }; 2473 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2474 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2475 Args); 2476 } 2477 } 2478 2479 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2480 const RegionCodeGenTy &OrderedOpGen, 2481 SourceLocation Loc, bool IsThreads) { 2482 if (!CGF.HaveInsertPoint()) 2483 return; 2484 // __kmpc_ordered(ident_t *, gtid); 2485 // OrderedOpGen(); 2486 // __kmpc_end_ordered(ident_t *, gtid); 2487 // Prepare arguments and build a call to __kmpc_ordered 2488 if (IsThreads) { 2489 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2490 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2491 CGM.getModule(), OMPRTL___kmpc_ordered), 2492 Args, 2493 OMPBuilder.getOrCreateRuntimeFunction( 2494 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2495 Args); 2496 OrderedOpGen.setAction(Action); 2497 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2498 return; 2499 } 2500 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2501 } 2502 2503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2504 unsigned Flags; 2505 if (Kind == OMPD_for) 2506 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2507 else if (Kind == OMPD_sections) 2508 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2509 else if (Kind == OMPD_single) 2510 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2511 else if (Kind == OMPD_barrier) 2512 Flags = OMP_IDENT_BARRIER_EXPL; 2513 else 2514 Flags = OMP_IDENT_BARRIER_IMPL; 2515 return Flags; 2516 } 2517 2518 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2519 CodeGenFunction &CGF, const OMPLoopDirective &S, 2520 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2521 // Check if the loop directive is actually a doacross loop directive. In this 2522 // case choose static, 1 schedule. 2523 if (llvm::any_of( 2524 S.getClausesOfKind<OMPOrderedClause>(), 2525 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2526 ScheduleKind = OMPC_SCHEDULE_static; 2527 // Chunk size is 1 in this case. 2528 llvm::APInt ChunkSize(32, 1); 2529 ChunkExpr = IntegerLiteral::Create( 2530 CGF.getContext(), ChunkSize, 2531 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2532 SourceLocation()); 2533 } 2534 } 2535 2536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2537 OpenMPDirectiveKind Kind, bool EmitChecks, 2538 bool ForceSimpleCall) { 2539 // Check if we should use the OMPBuilder 2540 auto *OMPRegionInfo = 2541 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2542 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2543 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2544 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2545 return; 2546 } 2547 2548 if (!CGF.HaveInsertPoint()) 2549 return; 2550 // Build call __kmpc_cancel_barrier(loc, thread_id); 2551 // Build call __kmpc_barrier(loc, thread_id); 2552 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2553 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2554 // thread_id); 2555 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2556 getThreadID(CGF, Loc)}; 2557 if (OMPRegionInfo) { 2558 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2559 llvm::Value *Result = CGF.EmitRuntimeCall( 2560 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2561 OMPRTL___kmpc_cancel_barrier), 2562 Args); 2563 if (EmitChecks) { 2564 // if (__kmpc_cancel_barrier()) { 2565 // exit from construct; 2566 // } 2567 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2568 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2569 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2570 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2571 CGF.EmitBlock(ExitBB); 2572 // exit from construct; 2573 CodeGenFunction::JumpDest CancelDestination = 2574 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2575 CGF.EmitBranchThroughCleanup(CancelDestination); 2576 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2577 } 2578 return; 2579 } 2580 } 2581 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2582 CGM.getModule(), OMPRTL___kmpc_barrier), 2583 Args); 2584 } 2585 2586 /// Map the OpenMP loop schedule to the runtime enumeration. 2587 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2588 bool Chunked, bool Ordered) { 2589 switch (ScheduleKind) { 2590 case OMPC_SCHEDULE_static: 2591 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2592 : (Ordered ? OMP_ord_static : OMP_sch_static); 2593 case OMPC_SCHEDULE_dynamic: 2594 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2595 case OMPC_SCHEDULE_guided: 2596 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2597 case OMPC_SCHEDULE_runtime: 2598 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2599 case OMPC_SCHEDULE_auto: 2600 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2601 case OMPC_SCHEDULE_unknown: 2602 assert(!Chunked && "chunk was specified but schedule kind not known"); 2603 return Ordered ? OMP_ord_static : OMP_sch_static; 2604 } 2605 llvm_unreachable("Unexpected runtime schedule"); 2606 } 2607 2608 /// Map the OpenMP distribute schedule to the runtime enumeration. 2609 static OpenMPSchedType 2610 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2611 // only static is allowed for dist_schedule 2612 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2613 } 2614 2615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2616 bool Chunked) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2619 return Schedule == OMP_sch_static; 2620 } 2621 2622 bool CGOpenMPRuntime::isStaticNonchunked( 2623 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2624 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2625 return Schedule == OMP_dist_sch_static; 2626 } 2627 2628 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2629 bool Chunked) const { 2630 OpenMPSchedType Schedule = 2631 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2632 return Schedule == OMP_sch_static_chunked; 2633 } 2634 2635 bool CGOpenMPRuntime::isStaticChunked( 2636 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2637 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2638 return Schedule == OMP_dist_sch_static_chunked; 2639 } 2640 2641 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2642 OpenMPSchedType Schedule = 2643 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2644 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2645 return Schedule != OMP_sch_static; 2646 } 2647 2648 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2649 OpenMPScheduleClauseModifier M1, 2650 OpenMPScheduleClauseModifier M2) { 2651 int Modifier = 0; 2652 switch (M1) { 2653 case OMPC_SCHEDULE_MODIFIER_monotonic: 2654 Modifier = OMP_sch_modifier_monotonic; 2655 break; 2656 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2657 Modifier = OMP_sch_modifier_nonmonotonic; 2658 break; 2659 case OMPC_SCHEDULE_MODIFIER_simd: 2660 if (Schedule == OMP_sch_static_chunked) 2661 Schedule = OMP_sch_static_balanced_chunked; 2662 break; 2663 case OMPC_SCHEDULE_MODIFIER_last: 2664 case OMPC_SCHEDULE_MODIFIER_unknown: 2665 break; 2666 } 2667 switch (M2) { 2668 case OMPC_SCHEDULE_MODIFIER_monotonic: 2669 Modifier = OMP_sch_modifier_monotonic; 2670 break; 2671 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2672 Modifier = OMP_sch_modifier_nonmonotonic; 2673 break; 2674 case OMPC_SCHEDULE_MODIFIER_simd: 2675 if (Schedule == OMP_sch_static_chunked) 2676 Schedule = OMP_sch_static_balanced_chunked; 2677 break; 2678 case OMPC_SCHEDULE_MODIFIER_last: 2679 case OMPC_SCHEDULE_MODIFIER_unknown: 2680 break; 2681 } 2682 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2683 // If the static schedule kind is specified or if the ordered clause is 2684 // specified, and if the nonmonotonic modifier is not specified, the effect is 2685 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2686 // modifier is specified, the effect is as if the nonmonotonic modifier is 2687 // specified. 2688 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2689 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2690 Schedule == OMP_sch_static_balanced_chunked || 2691 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2692 Schedule == OMP_dist_sch_static_chunked || 2693 Schedule == OMP_dist_sch_static)) 2694 Modifier = OMP_sch_modifier_nonmonotonic; 2695 } 2696 return Schedule | Modifier; 2697 } 2698 2699 void CGOpenMPRuntime::emitForDispatchInit( 2700 CodeGenFunction &CGF, SourceLocation Loc, 2701 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2702 bool Ordered, const DispatchRTInput &DispatchValues) { 2703 if (!CGF.HaveInsertPoint()) 2704 return; 2705 OpenMPSchedType Schedule = getRuntimeSchedule( 2706 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2707 assert(Ordered || 2708 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2709 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2710 Schedule != OMP_sch_static_balanced_chunked)); 2711 // Call __kmpc_dispatch_init( 2712 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2713 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2714 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2715 2716 // If the Chunk was not specified in the clause - use default value 1. 2717 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2718 : CGF.Builder.getIntN(IVSize, 1); 2719 llvm::Value *Args[] = { 2720 emitUpdateLocation(CGF, Loc), 2721 getThreadID(CGF, Loc), 2722 CGF.Builder.getInt32(addMonoNonMonoModifier( 2723 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2724 DispatchValues.LB, // Lower 2725 DispatchValues.UB, // Upper 2726 CGF.Builder.getIntN(IVSize, 1), // Stride 2727 Chunk // Chunk 2728 }; 2729 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2730 } 2731 2732 static void emitForStaticInitCall( 2733 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2734 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2735 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2736 const CGOpenMPRuntime::StaticRTInput &Values) { 2737 if (!CGF.HaveInsertPoint()) 2738 return; 2739 2740 assert(!Values.Ordered); 2741 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2742 Schedule == OMP_sch_static_balanced_chunked || 2743 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2744 Schedule == OMP_dist_sch_static || 2745 Schedule == OMP_dist_sch_static_chunked); 2746 2747 // Call __kmpc_for_static_init( 2748 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2749 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2750 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2751 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2752 llvm::Value *Chunk = Values.Chunk; 2753 if (Chunk == nullptr) { 2754 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2755 Schedule == OMP_dist_sch_static) && 2756 "expected static non-chunked schedule"); 2757 // If the Chunk was not specified in the clause - use default value 1. 2758 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2759 } else { 2760 assert((Schedule == OMP_sch_static_chunked || 2761 Schedule == OMP_sch_static_balanced_chunked || 2762 Schedule == OMP_ord_static_chunked || 2763 Schedule == OMP_dist_sch_static_chunked) && 2764 "expected static chunked schedule"); 2765 } 2766 llvm::Value *Args[] = { 2767 UpdateLocation, 2768 ThreadId, 2769 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2770 M2)), // Schedule type 2771 Values.IL.getPointer(), // &isLastIter 2772 Values.LB.getPointer(), // &LB 2773 Values.UB.getPointer(), // &UB 2774 Values.ST.getPointer(), // &Stride 2775 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2776 Chunk // Chunk 2777 }; 2778 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2779 } 2780 2781 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2782 SourceLocation Loc, 2783 OpenMPDirectiveKind DKind, 2784 const OpenMPScheduleTy &ScheduleKind, 2785 const StaticRTInput &Values) { 2786 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2787 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2788 assert(isOpenMPWorksharingDirective(DKind) && 2789 "Expected loop-based or sections-based directive."); 2790 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2791 isOpenMPLoopDirective(DKind) 2792 ? OMP_IDENT_WORK_LOOP 2793 : OMP_IDENT_WORK_SECTIONS); 2794 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2795 llvm::FunctionCallee StaticInitFunction = 2796 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2797 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2798 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2799 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2800 } 2801 2802 void CGOpenMPRuntime::emitDistributeStaticInit( 2803 CodeGenFunction &CGF, SourceLocation Loc, 2804 OpenMPDistScheduleClauseKind SchedKind, 2805 const CGOpenMPRuntime::StaticRTInput &Values) { 2806 OpenMPSchedType ScheduleNum = 2807 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2808 llvm::Value *UpdatedLocation = 2809 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2810 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2811 llvm::FunctionCallee StaticInitFunction; 2812 bool isGPUDistribute = 2813 CGM.getLangOpts().OpenMPIsDevice && 2814 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2815 StaticInitFunction = createForStaticInitFunction( 2816 Values.IVSize, Values.IVSigned, isGPUDistribute); 2817 2818 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2819 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2820 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2821 } 2822 2823 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2824 SourceLocation Loc, 2825 OpenMPDirectiveKind DKind) { 2826 if (!CGF.HaveInsertPoint()) 2827 return; 2828 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2829 llvm::Value *Args[] = { 2830 emitUpdateLocation(CGF, Loc, 2831 isOpenMPDistributeDirective(DKind) 2832 ? OMP_IDENT_WORK_DISTRIBUTE 2833 : isOpenMPLoopDirective(DKind) 2834 ? OMP_IDENT_WORK_LOOP 2835 : OMP_IDENT_WORK_SECTIONS), 2836 getThreadID(CGF, Loc)}; 2837 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2838 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2839 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2840 CGF.EmitRuntimeCall( 2841 OMPBuilder.getOrCreateRuntimeFunction( 2842 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2843 Args); 2844 else 2845 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2846 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2847 Args); 2848 } 2849 2850 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2851 SourceLocation Loc, 2852 unsigned IVSize, 2853 bool IVSigned) { 2854 if (!CGF.HaveInsertPoint()) 2855 return; 2856 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2857 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2858 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2859 } 2860 2861 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2862 SourceLocation Loc, unsigned IVSize, 2863 bool IVSigned, Address IL, 2864 Address LB, Address UB, 2865 Address ST) { 2866 // Call __kmpc_dispatch_next( 2867 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2868 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2869 // kmp_int[32|64] *p_stride); 2870 llvm::Value *Args[] = { 2871 emitUpdateLocation(CGF, Loc), 2872 getThreadID(CGF, Loc), 2873 IL.getPointer(), // &isLastIter 2874 LB.getPointer(), // &Lower 2875 UB.getPointer(), // &Upper 2876 ST.getPointer() // &Stride 2877 }; 2878 llvm::Value *Call = 2879 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2880 return CGF.EmitScalarConversion( 2881 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2882 CGF.getContext().BoolTy, Loc); 2883 } 2884 2885 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2886 llvm::Value *NumThreads, 2887 SourceLocation Loc) { 2888 if (!CGF.HaveInsertPoint()) 2889 return; 2890 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2891 llvm::Value *Args[] = { 2892 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2893 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2894 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2895 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2896 Args); 2897 } 2898 2899 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2900 ProcBindKind ProcBind, 2901 SourceLocation Loc) { 2902 if (!CGF.HaveInsertPoint()) 2903 return; 2904 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2905 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2906 llvm::Value *Args[] = { 2907 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2908 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2910 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2911 Args); 2912 } 2913 2914 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2915 SourceLocation Loc, llvm::AtomicOrdering AO) { 2916 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2917 OMPBuilder.createFlush(CGF.Builder); 2918 } else { 2919 if (!CGF.HaveInsertPoint()) 2920 return; 2921 // Build call void __kmpc_flush(ident_t *loc) 2922 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2923 CGM.getModule(), OMPRTL___kmpc_flush), 2924 emitUpdateLocation(CGF, Loc)); 2925 } 2926 } 2927 2928 namespace { 2929 /// Indexes of fields for type kmp_task_t. 2930 enum KmpTaskTFields { 2931 /// List of shared variables. 2932 KmpTaskTShareds, 2933 /// Task routine. 2934 KmpTaskTRoutine, 2935 /// Partition id for the untied tasks. 2936 KmpTaskTPartId, 2937 /// Function with call of destructors for private variables. 2938 Data1, 2939 /// Task priority. 2940 Data2, 2941 /// (Taskloops only) Lower bound. 2942 KmpTaskTLowerBound, 2943 /// (Taskloops only) Upper bound. 2944 KmpTaskTUpperBound, 2945 /// (Taskloops only) Stride. 2946 KmpTaskTStride, 2947 /// (Taskloops only) Is last iteration flag. 2948 KmpTaskTLastIter, 2949 /// (Taskloops only) Reduction data. 2950 KmpTaskTReductions, 2951 }; 2952 } // anonymous namespace 2953 2954 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2955 return OffloadEntriesTargetRegion.empty() && 2956 OffloadEntriesDeviceGlobalVar.empty(); 2957 } 2958 2959 /// Initialize target region entry. 2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2961 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2962 StringRef ParentName, unsigned LineNum, 2963 unsigned Order) { 2964 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2965 "only required for the device " 2966 "code generation."); 2967 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2968 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2969 OMPTargetRegionEntryTargetRegion); 2970 ++OffloadingEntriesNum; 2971 } 2972 2973 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2974 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2975 StringRef ParentName, unsigned LineNum, 2976 llvm::Constant *Addr, llvm::Constant *ID, 2977 OMPTargetRegionEntryKind Flags) { 2978 // If we are emitting code for a target, the entry is already initialized, 2979 // only has to be registered. 2980 if (CGM.getLangOpts().OpenMPIsDevice) { 2981 // This could happen if the device compilation is invoked standalone. 2982 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2983 return; 2984 auto &Entry = 2985 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2986 Entry.setAddress(Addr); 2987 Entry.setID(ID); 2988 Entry.setFlags(Flags); 2989 } else { 2990 if (Flags == 2991 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2992 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2993 /*IgnoreAddressId*/ true)) 2994 return; 2995 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2996 "Target region entry already registered!"); 2997 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2998 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2999 ++OffloadingEntriesNum; 3000 } 3001 } 3002 3003 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3004 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3005 bool IgnoreAddressId) const { 3006 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3007 if (PerDevice == OffloadEntriesTargetRegion.end()) 3008 return false; 3009 auto PerFile = PerDevice->second.find(FileID); 3010 if (PerFile == PerDevice->second.end()) 3011 return false; 3012 auto PerParentName = PerFile->second.find(ParentName); 3013 if (PerParentName == PerFile->second.end()) 3014 return false; 3015 auto PerLine = PerParentName->second.find(LineNum); 3016 if (PerLine == PerParentName->second.end()) 3017 return false; 3018 // Fail if this entry is already registered. 3019 if (!IgnoreAddressId && 3020 (PerLine->second.getAddress() || PerLine->second.getID())) 3021 return false; 3022 return true; 3023 } 3024 3025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3026 const OffloadTargetRegionEntryInfoActTy &Action) { 3027 // Scan all target region entries and perform the provided action. 3028 for (const auto &D : OffloadEntriesTargetRegion) 3029 for (const auto &F : D.second) 3030 for (const auto &P : F.second) 3031 for (const auto &L : P.second) 3032 Action(D.first, F.first, P.first(), L.first, L.second); 3033 } 3034 3035 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3036 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3037 OMPTargetGlobalVarEntryKind Flags, 3038 unsigned Order) { 3039 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3040 "only required for the device " 3041 "code generation."); 3042 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3043 ++OffloadingEntriesNum; 3044 } 3045 3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3047 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3048 CharUnits VarSize, 3049 OMPTargetGlobalVarEntryKind Flags, 3050 llvm::GlobalValue::LinkageTypes Linkage) { 3051 if (CGM.getLangOpts().OpenMPIsDevice) { 3052 // This could happen if the device compilation is invoked standalone. 3053 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3054 return; 3055 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3056 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3057 if (Entry.getVarSize().isZero()) { 3058 Entry.setVarSize(VarSize); 3059 Entry.setLinkage(Linkage); 3060 } 3061 return; 3062 } 3063 Entry.setVarSize(VarSize); 3064 Entry.setLinkage(Linkage); 3065 Entry.setAddress(Addr); 3066 } else { 3067 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3068 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3069 assert(Entry.isValid() && Entry.getFlags() == Flags && 3070 "Entry not initialized!"); 3071 if (Entry.getVarSize().isZero()) { 3072 Entry.setVarSize(VarSize); 3073 Entry.setLinkage(Linkage); 3074 } 3075 return; 3076 } 3077 OffloadEntriesDeviceGlobalVar.try_emplace( 3078 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3079 ++OffloadingEntriesNum; 3080 } 3081 } 3082 3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3084 actOnDeviceGlobalVarEntriesInfo( 3085 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3086 // Scan all target region entries and perform the provided action. 3087 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3088 Action(E.getKey(), E.getValue()); 3089 } 3090 3091 void CGOpenMPRuntime::createOffloadEntry( 3092 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3093 llvm::GlobalValue::LinkageTypes Linkage) { 3094 OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags); 3095 } 3096 3097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3098 // Emit the offloading entries and metadata so that the device codegen side 3099 // can easily figure out what to emit. The produced metadata looks like 3100 // this: 3101 // 3102 // !omp_offload.info = !{!1, ...} 3103 // 3104 // Right now we only generate metadata for function that contain target 3105 // regions. 3106 3107 // If we are in simd mode or there are no entries, we don't need to do 3108 // anything. 3109 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3110 return; 3111 3112 llvm::Module &M = CGM.getModule(); 3113 llvm::LLVMContext &C = M.getContext(); 3114 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3115 SourceLocation, StringRef>, 3116 16> 3117 OrderedEntries(OffloadEntriesInfoManager.size()); 3118 llvm::SmallVector<StringRef, 16> ParentFunctions( 3119 OffloadEntriesInfoManager.size()); 3120 3121 // Auxiliary methods to create metadata values and strings. 3122 auto &&GetMDInt = [this](unsigned V) { 3123 return llvm::ConstantAsMetadata::get( 3124 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3125 }; 3126 3127 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3128 3129 // Create the offloading info metadata node. 3130 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3131 3132 // Create function that emits metadata for each target region entry; 3133 auto &&TargetRegionMetadataEmitter = 3134 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3135 &GetMDString]( 3136 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3137 unsigned Line, 3138 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3139 // Generate metadata for target regions. Each entry of this metadata 3140 // contains: 3141 // - Entry 0 -> Kind of this type of metadata (0). 3142 // - Entry 1 -> Device ID of the file where the entry was identified. 3143 // - Entry 2 -> File ID of the file where the entry was identified. 3144 // - Entry 3 -> Mangled name of the function where the entry was 3145 // identified. 3146 // - Entry 4 -> Line in the file where the entry was identified. 3147 // - Entry 5 -> Order the entry was created. 3148 // The first element of the metadata node is the kind. 3149 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3150 GetMDInt(FileID), GetMDString(ParentName), 3151 GetMDInt(Line), GetMDInt(E.getOrder())}; 3152 3153 SourceLocation Loc; 3154 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3155 E = CGM.getContext().getSourceManager().fileinfo_end(); 3156 I != E; ++I) { 3157 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3158 I->getFirst()->getUniqueID().getFile() == FileID) { 3159 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3160 I->getFirst(), Line, 1); 3161 break; 3162 } 3163 } 3164 // Save this entry in the right position of the ordered entries array. 3165 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3166 ParentFunctions[E.getOrder()] = ParentName; 3167 3168 // Add metadata to the named metadata node. 3169 MD->addOperand(llvm::MDNode::get(C, Ops)); 3170 }; 3171 3172 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3173 TargetRegionMetadataEmitter); 3174 3175 // Create function that emits metadata for each device global variable entry; 3176 auto &&DeviceGlobalVarMetadataEmitter = 3177 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3178 MD](StringRef MangledName, 3179 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3180 &E) { 3181 // Generate metadata for global variables. Each entry of this metadata 3182 // contains: 3183 // - Entry 0 -> Kind of this type of metadata (1). 3184 // - Entry 1 -> Mangled name of the variable. 3185 // - Entry 2 -> Declare target kind. 3186 // - Entry 3 -> Order the entry was created. 3187 // The first element of the metadata node is the kind. 3188 llvm::Metadata *Ops[] = { 3189 GetMDInt(E.getKind()), GetMDString(MangledName), 3190 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3191 3192 // Save this entry in the right position of the ordered entries array. 3193 OrderedEntries[E.getOrder()] = 3194 std::make_tuple(&E, SourceLocation(), MangledName); 3195 3196 // Add metadata to the named metadata node. 3197 MD->addOperand(llvm::MDNode::get(C, Ops)); 3198 }; 3199 3200 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3201 DeviceGlobalVarMetadataEmitter); 3202 3203 for (const auto &E : OrderedEntries) { 3204 assert(std::get<0>(E) && "All ordered entries must exist!"); 3205 if (const auto *CE = 3206 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3207 std::get<0>(E))) { 3208 if (!CE->getID() || !CE->getAddress()) { 3209 // Do not blame the entry if the parent funtion is not emitted. 3210 StringRef FnName = ParentFunctions[CE->getOrder()]; 3211 if (!CGM.GetGlobalValue(FnName)) 3212 continue; 3213 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3214 DiagnosticsEngine::Error, 3215 "Offloading entry for target region in %0 is incorrect: either the " 3216 "address or the ID is invalid."); 3217 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3218 continue; 3219 } 3220 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3221 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3222 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3223 OffloadEntryInfoDeviceGlobalVar>( 3224 std::get<0>(E))) { 3225 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3226 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3227 CE->getFlags()); 3228 switch (Flags) { 3229 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3230 if (CGM.getLangOpts().OpenMPIsDevice && 3231 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3232 continue; 3233 if (!CE->getAddress()) { 3234 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3235 DiagnosticsEngine::Error, "Offloading entry for declare target " 3236 "variable %0 is incorrect: the " 3237 "address is invalid."); 3238 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3239 continue; 3240 } 3241 // The vaiable has no definition - no need to add the entry. 3242 if (CE->getVarSize().isZero()) 3243 continue; 3244 break; 3245 } 3246 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3247 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3248 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3249 "Declaret target link address is set."); 3250 if (CGM.getLangOpts().OpenMPIsDevice) 3251 continue; 3252 if (!CE->getAddress()) { 3253 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3254 DiagnosticsEngine::Error, 3255 "Offloading entry for declare target variable is incorrect: the " 3256 "address is invalid."); 3257 CGM.getDiags().Report(DiagID); 3258 continue; 3259 } 3260 break; 3261 } 3262 3263 // Hidden or internal symbols on the device are not externally visible. We 3264 // should not attempt to register them by creating an offloading entry. 3265 if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress())) 3266 if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) 3267 continue; 3268 3269 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3270 CE->getVarSize().getQuantity(), Flags, 3271 CE->getLinkage()); 3272 } else { 3273 llvm_unreachable("Unsupported entry kind."); 3274 } 3275 } 3276 } 3277 3278 /// Loads all the offload entries information from the host IR 3279 /// metadata. 3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3281 // If we are in target mode, load the metadata from the host IR. This code has 3282 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3283 3284 if (!CGM.getLangOpts().OpenMPIsDevice) 3285 return; 3286 3287 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3288 return; 3289 3290 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3291 if (auto EC = Buf.getError()) { 3292 CGM.getDiags().Report(diag::err_cannot_open_file) 3293 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3294 return; 3295 } 3296 3297 llvm::LLVMContext C; 3298 auto ME = expectedToErrorOrAndEmitErrors( 3299 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3300 3301 if (auto EC = ME.getError()) { 3302 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3303 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3304 CGM.getDiags().Report(DiagID) 3305 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3306 return; 3307 } 3308 3309 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3310 if (!MD) 3311 return; 3312 3313 for (llvm::MDNode *MN : MD->operands()) { 3314 auto &&GetMDInt = [MN](unsigned Idx) { 3315 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3316 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3317 }; 3318 3319 auto &&GetMDString = [MN](unsigned Idx) { 3320 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3321 return V->getString(); 3322 }; 3323 3324 switch (GetMDInt(0)) { 3325 default: 3326 llvm_unreachable("Unexpected metadata!"); 3327 break; 3328 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3329 OffloadingEntryInfoTargetRegion: 3330 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3331 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3332 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3333 /*Order=*/GetMDInt(5)); 3334 break; 3335 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3336 OffloadingEntryInfoDeviceGlobalVar: 3337 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3338 /*MangledName=*/GetMDString(1), 3339 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3340 /*Flags=*/GetMDInt(2)), 3341 /*Order=*/GetMDInt(3)); 3342 break; 3343 } 3344 } 3345 } 3346 3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3348 if (!KmpRoutineEntryPtrTy) { 3349 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3350 ASTContext &C = CGM.getContext(); 3351 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3352 FunctionProtoType::ExtProtoInfo EPI; 3353 KmpRoutineEntryPtrQTy = C.getPointerType( 3354 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3355 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3356 } 3357 } 3358 3359 namespace { 3360 struct PrivateHelpersTy { 3361 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3362 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3363 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3364 PrivateElemInit(PrivateElemInit) {} 3365 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3366 const Expr *OriginalRef = nullptr; 3367 const VarDecl *Original = nullptr; 3368 const VarDecl *PrivateCopy = nullptr; 3369 const VarDecl *PrivateElemInit = nullptr; 3370 bool isLocalPrivate() const { 3371 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3372 } 3373 }; 3374 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3375 } // anonymous namespace 3376 3377 static bool isAllocatableDecl(const VarDecl *VD) { 3378 const VarDecl *CVD = VD->getCanonicalDecl(); 3379 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3380 return false; 3381 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3382 // Use the default allocation. 3383 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3384 !AA->getAllocator()); 3385 } 3386 3387 static RecordDecl * 3388 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3389 if (!Privates.empty()) { 3390 ASTContext &C = CGM.getContext(); 3391 // Build struct .kmp_privates_t. { 3392 // /* private vars */ 3393 // }; 3394 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3395 RD->startDefinition(); 3396 for (const auto &Pair : Privates) { 3397 const VarDecl *VD = Pair.second.Original; 3398 QualType Type = VD->getType().getNonReferenceType(); 3399 // If the private variable is a local variable with lvalue ref type, 3400 // allocate the pointer instead of the pointee type. 3401 if (Pair.second.isLocalPrivate()) { 3402 if (VD->getType()->isLValueReferenceType()) 3403 Type = C.getPointerType(Type); 3404 if (isAllocatableDecl(VD)) 3405 Type = C.getPointerType(Type); 3406 } 3407 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3408 if (VD->hasAttrs()) { 3409 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3410 E(VD->getAttrs().end()); 3411 I != E; ++I) 3412 FD->addAttr(*I); 3413 } 3414 } 3415 RD->completeDefinition(); 3416 return RD; 3417 } 3418 return nullptr; 3419 } 3420 3421 static RecordDecl * 3422 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3423 QualType KmpInt32Ty, 3424 QualType KmpRoutineEntryPointerQTy) { 3425 ASTContext &C = CGM.getContext(); 3426 // Build struct kmp_task_t { 3427 // void * shareds; 3428 // kmp_routine_entry_t routine; 3429 // kmp_int32 part_id; 3430 // kmp_cmplrdata_t data1; 3431 // kmp_cmplrdata_t data2; 3432 // For taskloops additional fields: 3433 // kmp_uint64 lb; 3434 // kmp_uint64 ub; 3435 // kmp_int64 st; 3436 // kmp_int32 liter; 3437 // void * reductions; 3438 // }; 3439 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3440 UD->startDefinition(); 3441 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3442 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3443 UD->completeDefinition(); 3444 QualType KmpCmplrdataTy = C.getRecordType(UD); 3445 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3446 RD->startDefinition(); 3447 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3448 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3449 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3450 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3451 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3452 if (isOpenMPTaskLoopDirective(Kind)) { 3453 QualType KmpUInt64Ty = 3454 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3455 QualType KmpInt64Ty = 3456 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3457 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3458 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3459 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3460 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3461 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3462 } 3463 RD->completeDefinition(); 3464 return RD; 3465 } 3466 3467 static RecordDecl * 3468 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3469 ArrayRef<PrivateDataTy> Privates) { 3470 ASTContext &C = CGM.getContext(); 3471 // Build struct kmp_task_t_with_privates { 3472 // kmp_task_t task_data; 3473 // .kmp_privates_t. privates; 3474 // }; 3475 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3476 RD->startDefinition(); 3477 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3478 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3479 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3480 RD->completeDefinition(); 3481 return RD; 3482 } 3483 3484 /// Emit a proxy function which accepts kmp_task_t as the second 3485 /// argument. 3486 /// \code 3487 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3488 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3489 /// For taskloops: 3490 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3491 /// tt->reductions, tt->shareds); 3492 /// return 0; 3493 /// } 3494 /// \endcode 3495 static llvm::Function * 3496 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3497 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3498 QualType KmpTaskTWithPrivatesPtrQTy, 3499 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3500 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3501 llvm::Value *TaskPrivatesMap) { 3502 ASTContext &C = CGM.getContext(); 3503 FunctionArgList Args; 3504 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3505 ImplicitParamDecl::Other); 3506 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3507 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3508 ImplicitParamDecl::Other); 3509 Args.push_back(&GtidArg); 3510 Args.push_back(&TaskTypeArg); 3511 const auto &TaskEntryFnInfo = 3512 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3513 llvm::FunctionType *TaskEntryTy = 3514 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3515 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3516 auto *TaskEntry = llvm::Function::Create( 3517 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3518 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3519 TaskEntry->setDoesNotRecurse(); 3520 CodeGenFunction CGF(CGM); 3521 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3522 Loc, Loc); 3523 3524 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3525 // tt, 3526 // For taskloops: 3527 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3528 // tt->task_data.shareds); 3529 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3530 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3531 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3532 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3533 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3534 const auto *KmpTaskTWithPrivatesQTyRD = 3535 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3536 LValue Base = 3537 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3538 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3539 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3540 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3541 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3542 3543 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3544 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3545 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3546 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3547 CGF.ConvertTypeForMem(SharedsPtrTy)); 3548 3549 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3550 llvm::Value *PrivatesParam; 3551 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3552 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3553 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3554 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3555 } else { 3556 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3557 } 3558 3559 llvm::Value *CommonArgs[] = { 3560 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3561 CGF.Builder 3562 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3563 CGF.VoidPtrTy, CGF.Int8Ty) 3564 .getPointer()}; 3565 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3566 std::end(CommonArgs)); 3567 if (isOpenMPTaskLoopDirective(Kind)) { 3568 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3569 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3570 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3571 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3572 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3573 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3574 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3575 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3576 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3577 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3578 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3579 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3580 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3581 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3582 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3583 CallArgs.push_back(LBParam); 3584 CallArgs.push_back(UBParam); 3585 CallArgs.push_back(StParam); 3586 CallArgs.push_back(LIParam); 3587 CallArgs.push_back(RParam); 3588 } 3589 CallArgs.push_back(SharedsParam); 3590 3591 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3592 CallArgs); 3593 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3594 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3595 CGF.FinishFunction(); 3596 return TaskEntry; 3597 } 3598 3599 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3600 SourceLocation Loc, 3601 QualType KmpInt32Ty, 3602 QualType KmpTaskTWithPrivatesPtrQTy, 3603 QualType KmpTaskTWithPrivatesQTy) { 3604 ASTContext &C = CGM.getContext(); 3605 FunctionArgList Args; 3606 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3607 ImplicitParamDecl::Other); 3608 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3609 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3610 ImplicitParamDecl::Other); 3611 Args.push_back(&GtidArg); 3612 Args.push_back(&TaskTypeArg); 3613 const auto &DestructorFnInfo = 3614 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3615 llvm::FunctionType *DestructorFnTy = 3616 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3617 std::string Name = 3618 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3619 auto *DestructorFn = 3620 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3621 Name, &CGM.getModule()); 3622 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3623 DestructorFnInfo); 3624 DestructorFn->setDoesNotRecurse(); 3625 CodeGenFunction CGF(CGM); 3626 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3627 Args, Loc, Loc); 3628 3629 LValue Base = CGF.EmitLoadOfPointerLValue( 3630 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3631 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3632 const auto *KmpTaskTWithPrivatesQTyRD = 3633 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3634 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3635 Base = CGF.EmitLValueForField(Base, *FI); 3636 for (const auto *Field : 3637 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3638 if (QualType::DestructionKind DtorKind = 3639 Field->getType().isDestructedType()) { 3640 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3641 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3642 } 3643 } 3644 CGF.FinishFunction(); 3645 return DestructorFn; 3646 } 3647 3648 /// Emit a privates mapping function for correct handling of private and 3649 /// firstprivate variables. 3650 /// \code 3651 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3652 /// **noalias priv1,..., <tyn> **noalias privn) { 3653 /// *priv1 = &.privates.priv1; 3654 /// ...; 3655 /// *privn = &.privates.privn; 3656 /// } 3657 /// \endcode 3658 static llvm::Value * 3659 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3660 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3661 ArrayRef<PrivateDataTy> Privates) { 3662 ASTContext &C = CGM.getContext(); 3663 FunctionArgList Args; 3664 ImplicitParamDecl TaskPrivatesArg( 3665 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3666 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3667 ImplicitParamDecl::Other); 3668 Args.push_back(&TaskPrivatesArg); 3669 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3670 unsigned Counter = 1; 3671 for (const Expr *E : Data.PrivateVars) { 3672 Args.push_back(ImplicitParamDecl::Create( 3673 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3674 C.getPointerType(C.getPointerType(E->getType())) 3675 .withConst() 3676 .withRestrict(), 3677 ImplicitParamDecl::Other)); 3678 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3679 PrivateVarsPos[VD] = Counter; 3680 ++Counter; 3681 } 3682 for (const Expr *E : Data.FirstprivateVars) { 3683 Args.push_back(ImplicitParamDecl::Create( 3684 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3685 C.getPointerType(C.getPointerType(E->getType())) 3686 .withConst() 3687 .withRestrict(), 3688 ImplicitParamDecl::Other)); 3689 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3690 PrivateVarsPos[VD] = Counter; 3691 ++Counter; 3692 } 3693 for (const Expr *E : Data.LastprivateVars) { 3694 Args.push_back(ImplicitParamDecl::Create( 3695 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3696 C.getPointerType(C.getPointerType(E->getType())) 3697 .withConst() 3698 .withRestrict(), 3699 ImplicitParamDecl::Other)); 3700 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3701 PrivateVarsPos[VD] = Counter; 3702 ++Counter; 3703 } 3704 for (const VarDecl *VD : Data.PrivateLocals) { 3705 QualType Ty = VD->getType().getNonReferenceType(); 3706 if (VD->getType()->isLValueReferenceType()) 3707 Ty = C.getPointerType(Ty); 3708 if (isAllocatableDecl(VD)) 3709 Ty = C.getPointerType(Ty); 3710 Args.push_back(ImplicitParamDecl::Create( 3711 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3712 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3713 ImplicitParamDecl::Other)); 3714 PrivateVarsPos[VD] = Counter; 3715 ++Counter; 3716 } 3717 const auto &TaskPrivatesMapFnInfo = 3718 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3719 llvm::FunctionType *TaskPrivatesMapTy = 3720 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3721 std::string Name = 3722 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3723 auto *TaskPrivatesMap = llvm::Function::Create( 3724 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3725 &CGM.getModule()); 3726 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3727 TaskPrivatesMapFnInfo); 3728 if (CGM.getLangOpts().Optimize) { 3729 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3730 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3731 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3732 } 3733 CodeGenFunction CGF(CGM); 3734 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3735 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3736 3737 // *privi = &.privates.privi; 3738 LValue Base = CGF.EmitLoadOfPointerLValue( 3739 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3740 TaskPrivatesArg.getType()->castAs<PointerType>()); 3741 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3742 Counter = 0; 3743 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3744 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3745 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3746 LValue RefLVal = 3747 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3748 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3749 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3750 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3751 ++Counter; 3752 } 3753 CGF.FinishFunction(); 3754 return TaskPrivatesMap; 3755 } 3756 3757 /// Emit initialization for private variables in task-based directives. 3758 static void emitPrivatesInit(CodeGenFunction &CGF, 3759 const OMPExecutableDirective &D, 3760 Address KmpTaskSharedsPtr, LValue TDBase, 3761 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3762 QualType SharedsTy, QualType SharedsPtrTy, 3763 const OMPTaskDataTy &Data, 3764 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3765 ASTContext &C = CGF.getContext(); 3766 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3767 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3768 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3769 ? OMPD_taskloop 3770 : OMPD_task; 3771 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3772 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3773 LValue SrcBase; 3774 bool IsTargetTask = 3775 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3776 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3777 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3778 // PointersArray, SizesArray, and MappersArray. The original variables for 3779 // these arrays are not captured and we get their addresses explicitly. 3780 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3781 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3782 SrcBase = CGF.MakeAddrLValue( 3783 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3784 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3785 CGF.ConvertTypeForMem(SharedsTy)), 3786 SharedsTy); 3787 } 3788 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3789 for (const PrivateDataTy &Pair : Privates) { 3790 // Do not initialize private locals. 3791 if (Pair.second.isLocalPrivate()) { 3792 ++FI; 3793 continue; 3794 } 3795 const VarDecl *VD = Pair.second.PrivateCopy; 3796 const Expr *Init = VD->getAnyInitializer(); 3797 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3798 !CGF.isTrivialInitializer(Init)))) { 3799 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3800 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3801 const VarDecl *OriginalVD = Pair.second.Original; 3802 // Check if the variable is the target-based BasePointersArray, 3803 // PointersArray, SizesArray, or MappersArray. 3804 LValue SharedRefLValue; 3805 QualType Type = PrivateLValue.getType(); 3806 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3807 if (IsTargetTask && !SharedField) { 3808 assert(isa<ImplicitParamDecl>(OriginalVD) && 3809 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3810 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3811 ->getNumParams() == 0 && 3812 isa<TranslationUnitDecl>( 3813 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3814 ->getDeclContext()) && 3815 "Expected artificial target data variable."); 3816 SharedRefLValue = 3817 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3818 } else if (ForDup) { 3819 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3820 SharedRefLValue = CGF.MakeAddrLValue( 3821 SharedRefLValue.getAddress(CGF).withAlignment( 3822 C.getDeclAlign(OriginalVD)), 3823 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3824 SharedRefLValue.getTBAAInfo()); 3825 } else if (CGF.LambdaCaptureFields.count( 3826 Pair.second.Original->getCanonicalDecl()) > 0 || 3827 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3828 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3829 } else { 3830 // Processing for implicitly captured variables. 3831 InlinedOpenMPRegionRAII Region( 3832 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3833 /*HasCancel=*/false, /*NoInheritance=*/true); 3834 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3835 } 3836 if (Type->isArrayType()) { 3837 // Initialize firstprivate array. 3838 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3839 // Perform simple memcpy. 3840 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3841 } else { 3842 // Initialize firstprivate array using element-by-element 3843 // initialization. 3844 CGF.EmitOMPAggregateAssign( 3845 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3846 Type, 3847 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3848 Address SrcElement) { 3849 // Clean up any temporaries needed by the initialization. 3850 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3851 InitScope.addPrivate(Elem, SrcElement); 3852 (void)InitScope.Privatize(); 3853 // Emit initialization for single element. 3854 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3855 CGF, &CapturesInfo); 3856 CGF.EmitAnyExprToMem(Init, DestElement, 3857 Init->getType().getQualifiers(), 3858 /*IsInitializer=*/false); 3859 }); 3860 } 3861 } else { 3862 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3863 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3864 (void)InitScope.Privatize(); 3865 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3866 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3867 /*capturedByInit=*/false); 3868 } 3869 } else { 3870 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3871 } 3872 } 3873 ++FI; 3874 } 3875 } 3876 3877 /// Check if duplication function is required for taskloops. 3878 static bool checkInitIsRequired(CodeGenFunction &CGF, 3879 ArrayRef<PrivateDataTy> Privates) { 3880 bool InitRequired = false; 3881 for (const PrivateDataTy &Pair : Privates) { 3882 if (Pair.second.isLocalPrivate()) 3883 continue; 3884 const VarDecl *VD = Pair.second.PrivateCopy; 3885 const Expr *Init = VD->getAnyInitializer(); 3886 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3887 !CGF.isTrivialInitializer(Init)); 3888 if (InitRequired) 3889 break; 3890 } 3891 return InitRequired; 3892 } 3893 3894 3895 /// Emit task_dup function (for initialization of 3896 /// private/firstprivate/lastprivate vars and last_iter flag) 3897 /// \code 3898 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3899 /// lastpriv) { 3900 /// // setup lastprivate flag 3901 /// task_dst->last = lastpriv; 3902 /// // could be constructor calls here... 3903 /// } 3904 /// \endcode 3905 static llvm::Value * 3906 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3907 const OMPExecutableDirective &D, 3908 QualType KmpTaskTWithPrivatesPtrQTy, 3909 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3910 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3911 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3912 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3913 ASTContext &C = CGM.getContext(); 3914 FunctionArgList Args; 3915 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3916 KmpTaskTWithPrivatesPtrQTy, 3917 ImplicitParamDecl::Other); 3918 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3919 KmpTaskTWithPrivatesPtrQTy, 3920 ImplicitParamDecl::Other); 3921 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3922 ImplicitParamDecl::Other); 3923 Args.push_back(&DstArg); 3924 Args.push_back(&SrcArg); 3925 Args.push_back(&LastprivArg); 3926 const auto &TaskDupFnInfo = 3927 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3928 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3929 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3930 auto *TaskDup = llvm::Function::Create( 3931 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3932 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3933 TaskDup->setDoesNotRecurse(); 3934 CodeGenFunction CGF(CGM); 3935 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3936 Loc); 3937 3938 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3939 CGF.GetAddrOfLocalVar(&DstArg), 3940 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3941 // task_dst->liter = lastpriv; 3942 if (WithLastIter) { 3943 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3944 LValue Base = CGF.EmitLValueForField( 3945 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3946 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3947 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3948 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3949 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3950 } 3951 3952 // Emit initial values for private copies (if any). 3953 assert(!Privates.empty()); 3954 Address KmpTaskSharedsPtr = Address::invalid(); 3955 if (!Data.FirstprivateVars.empty()) { 3956 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3957 CGF.GetAddrOfLocalVar(&SrcArg), 3958 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3959 LValue Base = CGF.EmitLValueForField( 3960 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3961 KmpTaskSharedsPtr = Address( 3962 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3963 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3964 KmpTaskTShareds)), 3965 Loc), 3966 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3967 } 3968 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3969 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3970 CGF.FinishFunction(); 3971 return TaskDup; 3972 } 3973 3974 /// Checks if destructor function is required to be generated. 3975 /// \return true if cleanups are required, false otherwise. 3976 static bool 3977 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3978 ArrayRef<PrivateDataTy> Privates) { 3979 for (const PrivateDataTy &P : Privates) { 3980 if (P.second.isLocalPrivate()) 3981 continue; 3982 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3983 if (Ty.isDestructedType()) 3984 return true; 3985 } 3986 return false; 3987 } 3988 3989 namespace { 3990 /// Loop generator for OpenMP iterator expression. 3991 class OMPIteratorGeneratorScope final 3992 : public CodeGenFunction::OMPPrivateScope { 3993 CodeGenFunction &CGF; 3994 const OMPIteratorExpr *E = nullptr; 3995 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3996 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3997 OMPIteratorGeneratorScope() = delete; 3998 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3999 4000 public: 4001 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4002 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4003 if (!E) 4004 return; 4005 SmallVector<llvm::Value *, 4> Uppers; 4006 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4007 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4008 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4009 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 4010 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4011 addPrivate( 4012 HelperData.CounterVD, 4013 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 4014 } 4015 Privatize(); 4016 4017 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4018 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4019 LValue CLVal = 4020 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4021 HelperData.CounterVD->getType()); 4022 // Counter = 0; 4023 CGF.EmitStoreOfScalar( 4024 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4025 CLVal); 4026 CodeGenFunction::JumpDest &ContDest = 4027 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4028 CodeGenFunction::JumpDest &ExitDest = 4029 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4030 // N = <number-of_iterations>; 4031 llvm::Value *N = Uppers[I]; 4032 // cont: 4033 // if (Counter < N) goto body; else goto exit; 4034 CGF.EmitBlock(ContDest.getBlock()); 4035 auto *CVal = 4036 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4037 llvm::Value *Cmp = 4038 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4039 ? CGF.Builder.CreateICmpSLT(CVal, N) 4040 : CGF.Builder.CreateICmpULT(CVal, N); 4041 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4042 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4043 // body: 4044 CGF.EmitBlock(BodyBB); 4045 // Iteri = Begini + Counter * Stepi; 4046 CGF.EmitIgnoredExpr(HelperData.Update); 4047 } 4048 } 4049 ~OMPIteratorGeneratorScope() { 4050 if (!E) 4051 return; 4052 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4053 // Counter = Counter + 1; 4054 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4055 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4056 // goto cont; 4057 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4058 // exit: 4059 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4060 } 4061 } 4062 }; 4063 } // namespace 4064 4065 static std::pair<llvm::Value *, llvm::Value *> 4066 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4067 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4068 llvm::Value *Addr; 4069 if (OASE) { 4070 const Expr *Base = OASE->getBase(); 4071 Addr = CGF.EmitScalarExpr(Base); 4072 } else { 4073 Addr = CGF.EmitLValue(E).getPointer(CGF); 4074 } 4075 llvm::Value *SizeVal; 4076 QualType Ty = E->getType(); 4077 if (OASE) { 4078 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4079 for (const Expr *SE : OASE->getDimensions()) { 4080 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4081 Sz = CGF.EmitScalarConversion( 4082 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4083 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4084 } 4085 } else if (const auto *ASE = 4086 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4087 LValue UpAddrLVal = 4088 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4089 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4090 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4091 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4092 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4093 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4094 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4095 } else { 4096 SizeVal = CGF.getTypeSize(Ty); 4097 } 4098 return std::make_pair(Addr, SizeVal); 4099 } 4100 4101 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4102 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4103 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4104 if (KmpTaskAffinityInfoTy.isNull()) { 4105 RecordDecl *KmpAffinityInfoRD = 4106 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4107 KmpAffinityInfoRD->startDefinition(); 4108 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4109 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4110 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4111 KmpAffinityInfoRD->completeDefinition(); 4112 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4113 } 4114 } 4115 4116 CGOpenMPRuntime::TaskResultTy 4117 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4118 const OMPExecutableDirective &D, 4119 llvm::Function *TaskFunction, QualType SharedsTy, 4120 Address Shareds, const OMPTaskDataTy &Data) { 4121 ASTContext &C = CGM.getContext(); 4122 llvm::SmallVector<PrivateDataTy, 4> Privates; 4123 // Aggregate privates and sort them by the alignment. 4124 const auto *I = Data.PrivateCopies.begin(); 4125 for (const Expr *E : Data.PrivateVars) { 4126 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4127 Privates.emplace_back( 4128 C.getDeclAlign(VD), 4129 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4130 /*PrivateElemInit=*/nullptr)); 4131 ++I; 4132 } 4133 I = Data.FirstprivateCopies.begin(); 4134 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4135 for (const Expr *E : Data.FirstprivateVars) { 4136 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4137 Privates.emplace_back( 4138 C.getDeclAlign(VD), 4139 PrivateHelpersTy( 4140 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4141 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4142 ++I; 4143 ++IElemInitRef; 4144 } 4145 I = Data.LastprivateCopies.begin(); 4146 for (const Expr *E : Data.LastprivateVars) { 4147 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4148 Privates.emplace_back( 4149 C.getDeclAlign(VD), 4150 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4151 /*PrivateElemInit=*/nullptr)); 4152 ++I; 4153 } 4154 for (const VarDecl *VD : Data.PrivateLocals) { 4155 if (isAllocatableDecl(VD)) 4156 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4157 else 4158 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4159 } 4160 llvm::stable_sort(Privates, 4161 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4162 return L.first > R.first; 4163 }); 4164 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4165 // Build type kmp_routine_entry_t (if not built yet). 4166 emitKmpRoutineEntryT(KmpInt32Ty); 4167 // Build type kmp_task_t (if not built yet). 4168 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4169 if (SavedKmpTaskloopTQTy.isNull()) { 4170 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4171 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4172 } 4173 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4174 } else { 4175 assert((D.getDirectiveKind() == OMPD_task || 4176 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4177 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4178 "Expected taskloop, task or target directive"); 4179 if (SavedKmpTaskTQTy.isNull()) { 4180 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4181 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4182 } 4183 KmpTaskTQTy = SavedKmpTaskTQTy; 4184 } 4185 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4186 // Build particular struct kmp_task_t for the given task. 4187 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4188 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4189 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4190 QualType KmpTaskTWithPrivatesPtrQTy = 4191 C.getPointerType(KmpTaskTWithPrivatesQTy); 4192 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4193 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4194 KmpTaskTWithPrivatesTy->getPointerTo(); 4195 llvm::Value *KmpTaskTWithPrivatesTySize = 4196 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4197 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4198 4199 // Emit initial values for private copies (if any). 4200 llvm::Value *TaskPrivatesMap = nullptr; 4201 llvm::Type *TaskPrivatesMapTy = 4202 std::next(TaskFunction->arg_begin(), 3)->getType(); 4203 if (!Privates.empty()) { 4204 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4205 TaskPrivatesMap = 4206 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4207 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4208 TaskPrivatesMap, TaskPrivatesMapTy); 4209 } else { 4210 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4211 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4212 } 4213 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4214 // kmp_task_t *tt); 4215 llvm::Function *TaskEntry = emitProxyTaskFunction( 4216 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4217 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4218 TaskPrivatesMap); 4219 4220 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4221 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4222 // kmp_routine_entry_t *task_entry); 4223 // Task flags. Format is taken from 4224 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4225 // description of kmp_tasking_flags struct. 4226 enum { 4227 TiedFlag = 0x1, 4228 FinalFlag = 0x2, 4229 DestructorsFlag = 0x8, 4230 PriorityFlag = 0x20, 4231 DetachableFlag = 0x40, 4232 }; 4233 unsigned Flags = Data.Tied ? TiedFlag : 0; 4234 bool NeedsCleanup = false; 4235 if (!Privates.empty()) { 4236 NeedsCleanup = 4237 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4238 if (NeedsCleanup) 4239 Flags = Flags | DestructorsFlag; 4240 } 4241 if (Data.Priority.getInt()) 4242 Flags = Flags | PriorityFlag; 4243 if (D.hasClausesOfKind<OMPDetachClause>()) 4244 Flags = Flags | DetachableFlag; 4245 llvm::Value *TaskFlags = 4246 Data.Final.getPointer() 4247 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4248 CGF.Builder.getInt32(FinalFlag), 4249 CGF.Builder.getInt32(/*C=*/0)) 4250 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4251 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4252 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4253 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4254 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4255 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4256 TaskEntry, KmpRoutineEntryPtrTy)}; 4257 llvm::Value *NewTask; 4258 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4259 // Check if we have any device clause associated with the directive. 4260 const Expr *Device = nullptr; 4261 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4262 Device = C->getDevice(); 4263 // Emit device ID if any otherwise use default value. 4264 llvm::Value *DeviceID; 4265 if (Device) 4266 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4267 CGF.Int64Ty, /*isSigned=*/true); 4268 else 4269 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4270 AllocArgs.push_back(DeviceID); 4271 NewTask = CGF.EmitRuntimeCall( 4272 OMPBuilder.getOrCreateRuntimeFunction( 4273 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4274 AllocArgs); 4275 } else { 4276 NewTask = 4277 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4278 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4279 AllocArgs); 4280 } 4281 // Emit detach clause initialization. 4282 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4283 // task_descriptor); 4284 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4285 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4286 LValue EvtLVal = CGF.EmitLValue(Evt); 4287 4288 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4289 // int gtid, kmp_task_t *task); 4290 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4291 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4292 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4293 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4294 OMPBuilder.getOrCreateRuntimeFunction( 4295 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4296 {Loc, Tid, NewTask}); 4297 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4298 Evt->getExprLoc()); 4299 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4300 } 4301 // Process affinity clauses. 4302 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4303 // Process list of affinity data. 4304 ASTContext &C = CGM.getContext(); 4305 Address AffinitiesArray = Address::invalid(); 4306 // Calculate number of elements to form the array of affinity data. 4307 llvm::Value *NumOfElements = nullptr; 4308 unsigned NumAffinities = 0; 4309 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4310 if (const Expr *Modifier = C->getModifier()) { 4311 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4312 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4313 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4314 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4315 NumOfElements = 4316 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4317 } 4318 } else { 4319 NumAffinities += C->varlist_size(); 4320 } 4321 } 4322 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4323 // Fields ids in kmp_task_affinity_info record. 4324 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4325 4326 QualType KmpTaskAffinityInfoArrayTy; 4327 if (NumOfElements) { 4328 NumOfElements = CGF.Builder.CreateNUWAdd( 4329 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4330 auto *OVE = new (C) OpaqueValueExpr( 4331 Loc, 4332 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4333 VK_PRValue); 4334 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4335 RValue::get(NumOfElements)); 4336 KmpTaskAffinityInfoArrayTy = 4337 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4338 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4339 // Properly emit variable-sized array. 4340 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4341 ImplicitParamDecl::Other); 4342 CGF.EmitVarDecl(*PD); 4343 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4344 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4345 /*isSigned=*/false); 4346 } else { 4347 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4348 KmpTaskAffinityInfoTy, 4349 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4350 ArrayType::Normal, /*IndexTypeQuals=*/0); 4351 AffinitiesArray = 4352 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4353 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4354 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4355 /*isSigned=*/false); 4356 } 4357 4358 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4359 // Fill array by elements without iterators. 4360 unsigned Pos = 0; 4361 bool HasIterator = false; 4362 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4363 if (C->getModifier()) { 4364 HasIterator = true; 4365 continue; 4366 } 4367 for (const Expr *E : C->varlists()) { 4368 llvm::Value *Addr; 4369 llvm::Value *Size; 4370 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4371 LValue Base = 4372 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4373 KmpTaskAffinityInfoTy); 4374 // affs[i].base_addr = &<Affinities[i].second>; 4375 LValue BaseAddrLVal = CGF.EmitLValueForField( 4376 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4377 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4378 BaseAddrLVal); 4379 // affs[i].len = sizeof(<Affinities[i].second>); 4380 LValue LenLVal = CGF.EmitLValueForField( 4381 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4382 CGF.EmitStoreOfScalar(Size, LenLVal); 4383 ++Pos; 4384 } 4385 } 4386 LValue PosLVal; 4387 if (HasIterator) { 4388 PosLVal = CGF.MakeAddrLValue( 4389 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4390 C.getSizeType()); 4391 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4392 } 4393 // Process elements with iterators. 4394 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4395 const Expr *Modifier = C->getModifier(); 4396 if (!Modifier) 4397 continue; 4398 OMPIteratorGeneratorScope IteratorScope( 4399 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4400 for (const Expr *E : C->varlists()) { 4401 llvm::Value *Addr; 4402 llvm::Value *Size; 4403 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4404 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4405 LValue Base = CGF.MakeAddrLValue( 4406 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4407 // affs[i].base_addr = &<Affinities[i].second>; 4408 LValue BaseAddrLVal = CGF.EmitLValueForField( 4409 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4410 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4411 BaseAddrLVal); 4412 // affs[i].len = sizeof(<Affinities[i].second>); 4413 LValue LenLVal = CGF.EmitLValueForField( 4414 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4415 CGF.EmitStoreOfScalar(Size, LenLVal); 4416 Idx = CGF.Builder.CreateNUWAdd( 4417 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4418 CGF.EmitStoreOfScalar(Idx, PosLVal); 4419 } 4420 } 4421 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4422 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4423 // naffins, kmp_task_affinity_info_t *affin_list); 4424 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4425 llvm::Value *GTid = getThreadID(CGF, Loc); 4426 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4427 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4428 // FIXME: Emit the function and ignore its result for now unless the 4429 // runtime function is properly implemented. 4430 (void)CGF.EmitRuntimeCall( 4431 OMPBuilder.getOrCreateRuntimeFunction( 4432 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4433 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4434 } 4435 llvm::Value *NewTaskNewTaskTTy = 4436 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4437 NewTask, KmpTaskTWithPrivatesPtrTy); 4438 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4439 KmpTaskTWithPrivatesQTy); 4440 LValue TDBase = 4441 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4442 // Fill the data in the resulting kmp_task_t record. 4443 // Copy shareds if there are any. 4444 Address KmpTaskSharedsPtr = Address::invalid(); 4445 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4446 KmpTaskSharedsPtr = Address( 4447 CGF.EmitLoadOfScalar( 4448 CGF.EmitLValueForField( 4449 TDBase, 4450 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4451 Loc), 4452 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4453 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4454 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4455 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4456 } 4457 // Emit initial values for private copies (if any). 4458 TaskResultTy Result; 4459 if (!Privates.empty()) { 4460 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4461 SharedsTy, SharedsPtrTy, Data, Privates, 4462 /*ForDup=*/false); 4463 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4464 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4465 Result.TaskDupFn = emitTaskDupFunction( 4466 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4467 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4468 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4469 } 4470 } 4471 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4472 enum { Priority = 0, Destructors = 1 }; 4473 // Provide pointer to function with destructors for privates. 4474 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4475 const RecordDecl *KmpCmplrdataUD = 4476 (*FI)->getType()->getAsUnionType()->getDecl(); 4477 if (NeedsCleanup) { 4478 llvm::Value *DestructorFn = emitDestructorsFunction( 4479 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4480 KmpTaskTWithPrivatesQTy); 4481 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4482 LValue DestructorsLV = CGF.EmitLValueForField( 4483 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4484 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4485 DestructorFn, KmpRoutineEntryPtrTy), 4486 DestructorsLV); 4487 } 4488 // Set priority. 4489 if (Data.Priority.getInt()) { 4490 LValue Data2LV = CGF.EmitLValueForField( 4491 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4492 LValue PriorityLV = CGF.EmitLValueForField( 4493 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4494 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4495 } 4496 Result.NewTask = NewTask; 4497 Result.TaskEntry = TaskEntry; 4498 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4499 Result.TDBase = TDBase; 4500 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4501 return Result; 4502 } 4503 4504 namespace { 4505 /// Dependence kind for RTL. 4506 enum RTLDependenceKindTy { 4507 DepIn = 0x01, 4508 DepInOut = 0x3, 4509 DepMutexInOutSet = 0x4, 4510 DepInOutSet = 0x8, 4511 DepOmpAllMem = 0x80, 4512 }; 4513 /// Fields ids in kmp_depend_info record. 4514 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4515 } // namespace 4516 4517 /// Translates internal dependency kind into the runtime kind. 4518 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4519 RTLDependenceKindTy DepKind; 4520 switch (K) { 4521 case OMPC_DEPEND_in: 4522 DepKind = DepIn; 4523 break; 4524 // Out and InOut dependencies must use the same code. 4525 case OMPC_DEPEND_out: 4526 case OMPC_DEPEND_inout: 4527 DepKind = DepInOut; 4528 break; 4529 case OMPC_DEPEND_mutexinoutset: 4530 DepKind = DepMutexInOutSet; 4531 break; 4532 case OMPC_DEPEND_inoutset: 4533 DepKind = DepInOutSet; 4534 break; 4535 case OMPC_DEPEND_outallmemory: 4536 DepKind = DepOmpAllMem; 4537 break; 4538 case OMPC_DEPEND_source: 4539 case OMPC_DEPEND_sink: 4540 case OMPC_DEPEND_depobj: 4541 case OMPC_DEPEND_inoutallmemory: 4542 case OMPC_DEPEND_unknown: 4543 llvm_unreachable("Unknown task dependence type"); 4544 } 4545 return DepKind; 4546 } 4547 4548 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4549 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4550 QualType &FlagsTy) { 4551 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4552 if (KmpDependInfoTy.isNull()) { 4553 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4554 KmpDependInfoRD->startDefinition(); 4555 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4556 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4557 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4558 KmpDependInfoRD->completeDefinition(); 4559 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4560 } 4561 } 4562 4563 std::pair<llvm::Value *, LValue> 4564 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4565 SourceLocation Loc) { 4566 ASTContext &C = CGM.getContext(); 4567 QualType FlagsTy; 4568 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4569 RecordDecl *KmpDependInfoRD = 4570 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4571 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4572 LValue Base = CGF.EmitLoadOfPointerLValue( 4573 CGF.Builder.CreateElementBitCast( 4574 DepobjLVal.getAddress(CGF), 4575 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4576 KmpDependInfoPtrTy->castAs<PointerType>()); 4577 Address DepObjAddr = CGF.Builder.CreateGEP( 4578 Base.getAddress(CGF), 4579 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4580 LValue NumDepsBase = CGF.MakeAddrLValue( 4581 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4582 // NumDeps = deps[i].base_addr; 4583 LValue BaseAddrLVal = CGF.EmitLValueForField( 4584 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4585 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4586 return std::make_pair(NumDeps, Base); 4587 } 4588 4589 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4590 llvm::PointerUnion<unsigned *, LValue *> Pos, 4591 const OMPTaskDataTy::DependData &Data, 4592 Address DependenciesArray) { 4593 CodeGenModule &CGM = CGF.CGM; 4594 ASTContext &C = CGM.getContext(); 4595 QualType FlagsTy; 4596 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4597 RecordDecl *KmpDependInfoRD = 4598 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4599 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4600 4601 OMPIteratorGeneratorScope IteratorScope( 4602 CGF, cast_or_null<OMPIteratorExpr>( 4603 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4604 : nullptr)); 4605 for (const Expr *E : Data.DepExprs) { 4606 llvm::Value *Addr; 4607 llvm::Value *Size; 4608 4609 // The expression will be a nullptr in the 'omp_all_memory' case. 4610 if (E) { 4611 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4612 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4613 } else { 4614 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4615 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4616 } 4617 LValue Base; 4618 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4619 Base = CGF.MakeAddrLValue( 4620 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4621 } else { 4622 assert(E && "Expected a non-null expression"); 4623 LValue &PosLVal = *Pos.get<LValue *>(); 4624 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4625 Base = CGF.MakeAddrLValue( 4626 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4627 } 4628 // deps[i].base_addr = &<Dependencies[i].second>; 4629 LValue BaseAddrLVal = CGF.EmitLValueForField( 4630 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4631 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4632 // deps[i].len = sizeof(<Dependencies[i].second>); 4633 LValue LenLVal = CGF.EmitLValueForField( 4634 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4635 CGF.EmitStoreOfScalar(Size, LenLVal); 4636 // deps[i].flags = <Dependencies[i].first>; 4637 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4638 LValue FlagsLVal = CGF.EmitLValueForField( 4639 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4640 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4641 FlagsLVal); 4642 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4643 ++(*P); 4644 } else { 4645 LValue &PosLVal = *Pos.get<LValue *>(); 4646 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4647 Idx = CGF.Builder.CreateNUWAdd(Idx, 4648 llvm::ConstantInt::get(Idx->getType(), 1)); 4649 CGF.EmitStoreOfScalar(Idx, PosLVal); 4650 } 4651 } 4652 } 4653 4654 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4655 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4656 const OMPTaskDataTy::DependData &Data) { 4657 assert(Data.DepKind == OMPC_DEPEND_depobj && 4658 "Expected depobj dependecy kind."); 4659 SmallVector<llvm::Value *, 4> Sizes; 4660 SmallVector<LValue, 4> SizeLVals; 4661 ASTContext &C = CGF.getContext(); 4662 { 4663 OMPIteratorGeneratorScope IteratorScope( 4664 CGF, cast_or_null<OMPIteratorExpr>( 4665 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4666 : nullptr)); 4667 for (const Expr *E : Data.DepExprs) { 4668 llvm::Value *NumDeps; 4669 LValue Base; 4670 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4671 std::tie(NumDeps, Base) = 4672 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4673 LValue NumLVal = CGF.MakeAddrLValue( 4674 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4675 C.getUIntPtrType()); 4676 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4677 NumLVal.getAddress(CGF)); 4678 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4679 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4680 CGF.EmitStoreOfScalar(Add, NumLVal); 4681 SizeLVals.push_back(NumLVal); 4682 } 4683 } 4684 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4685 llvm::Value *Size = 4686 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4687 Sizes.push_back(Size); 4688 } 4689 return Sizes; 4690 } 4691 4692 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4693 QualType &KmpDependInfoTy, 4694 LValue PosLVal, 4695 const OMPTaskDataTy::DependData &Data, 4696 Address DependenciesArray) { 4697 assert(Data.DepKind == OMPC_DEPEND_depobj && 4698 "Expected depobj dependecy kind."); 4699 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4700 { 4701 OMPIteratorGeneratorScope IteratorScope( 4702 CGF, cast_or_null<OMPIteratorExpr>( 4703 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4704 : nullptr)); 4705 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4706 const Expr *E = Data.DepExprs[I]; 4707 llvm::Value *NumDeps; 4708 LValue Base; 4709 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4710 std::tie(NumDeps, Base) = 4711 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4712 4713 // memcopy dependency data. 4714 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4715 ElSize, 4716 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4717 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4718 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4719 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4720 4721 // Increase pos. 4722 // pos += size; 4723 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4724 CGF.EmitStoreOfScalar(Add, PosLVal); 4725 } 4726 } 4727 } 4728 4729 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4730 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4731 SourceLocation Loc) { 4732 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4733 return D.DepExprs.empty(); 4734 })) 4735 return std::make_pair(nullptr, Address::invalid()); 4736 // Process list of dependencies. 4737 ASTContext &C = CGM.getContext(); 4738 Address DependenciesArray = Address::invalid(); 4739 llvm::Value *NumOfElements = nullptr; 4740 unsigned NumDependencies = std::accumulate( 4741 Dependencies.begin(), Dependencies.end(), 0, 4742 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4743 return D.DepKind == OMPC_DEPEND_depobj 4744 ? V 4745 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4746 }); 4747 QualType FlagsTy; 4748 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4749 bool HasDepobjDeps = false; 4750 bool HasRegularWithIterators = false; 4751 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4752 llvm::Value *NumOfRegularWithIterators = 4753 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4754 // Calculate number of depobj dependecies and regular deps with the iterators. 4755 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4756 if (D.DepKind == OMPC_DEPEND_depobj) { 4757 SmallVector<llvm::Value *, 4> Sizes = 4758 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4759 for (llvm::Value *Size : Sizes) { 4760 NumOfDepobjElements = 4761 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4762 } 4763 HasDepobjDeps = true; 4764 continue; 4765 } 4766 // Include number of iterations, if any. 4767 4768 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4769 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4770 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4771 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4772 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4773 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4774 NumOfRegularWithIterators = 4775 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4776 } 4777 HasRegularWithIterators = true; 4778 continue; 4779 } 4780 } 4781 4782 QualType KmpDependInfoArrayTy; 4783 if (HasDepobjDeps || HasRegularWithIterators) { 4784 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4785 /*isSigned=*/false); 4786 if (HasDepobjDeps) { 4787 NumOfElements = 4788 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4789 } 4790 if (HasRegularWithIterators) { 4791 NumOfElements = 4792 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4793 } 4794 auto *OVE = new (C) OpaqueValueExpr( 4795 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4796 VK_PRValue); 4797 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4798 RValue::get(NumOfElements)); 4799 KmpDependInfoArrayTy = 4800 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4801 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4802 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4803 // Properly emit variable-sized array. 4804 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4805 ImplicitParamDecl::Other); 4806 CGF.EmitVarDecl(*PD); 4807 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4808 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4809 /*isSigned=*/false); 4810 } else { 4811 KmpDependInfoArrayTy = C.getConstantArrayType( 4812 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4813 ArrayType::Normal, /*IndexTypeQuals=*/0); 4814 DependenciesArray = 4815 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4816 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4817 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4818 /*isSigned=*/false); 4819 } 4820 unsigned Pos = 0; 4821 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4822 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4823 Dependencies[I].IteratorExpr) 4824 continue; 4825 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4826 DependenciesArray); 4827 } 4828 // Copy regular dependecies with iterators. 4829 LValue PosLVal = CGF.MakeAddrLValue( 4830 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4831 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4832 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4833 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4834 !Dependencies[I].IteratorExpr) 4835 continue; 4836 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4837 DependenciesArray); 4838 } 4839 // Copy final depobj arrays without iterators. 4840 if (HasDepobjDeps) { 4841 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4842 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4843 continue; 4844 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4845 DependenciesArray); 4846 } 4847 } 4848 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4849 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4850 return std::make_pair(NumOfElements, DependenciesArray); 4851 } 4852 4853 Address CGOpenMPRuntime::emitDepobjDependClause( 4854 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4855 SourceLocation Loc) { 4856 if (Dependencies.DepExprs.empty()) 4857 return Address::invalid(); 4858 // Process list of dependencies. 4859 ASTContext &C = CGM.getContext(); 4860 Address DependenciesArray = Address::invalid(); 4861 unsigned NumDependencies = Dependencies.DepExprs.size(); 4862 QualType FlagsTy; 4863 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4864 RecordDecl *KmpDependInfoRD = 4865 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4866 4867 llvm::Value *Size; 4868 // Define type kmp_depend_info[<Dependencies.size()>]; 4869 // For depobj reserve one extra element to store the number of elements. 4870 // It is required to handle depobj(x) update(in) construct. 4871 // kmp_depend_info[<Dependencies.size()>] deps; 4872 llvm::Value *NumDepsVal; 4873 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4874 if (const auto *IE = 4875 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4876 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4877 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4878 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4879 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4880 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4881 } 4882 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4883 NumDepsVal); 4884 CharUnits SizeInBytes = 4885 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4886 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4887 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4888 NumDepsVal = 4889 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4890 } else { 4891 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4892 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4893 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4894 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4895 Size = CGM.getSize(Sz.alignTo(Align)); 4896 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4897 } 4898 // Need to allocate on the dynamic memory. 4899 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4900 // Use default allocator. 4901 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4902 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4903 4904 llvm::Value *Addr = 4905 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4906 CGM.getModule(), OMPRTL___kmpc_alloc), 4907 Args, ".dep.arr.addr"); 4908 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4909 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4910 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4911 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4912 // Write number of elements in the first element of array for depobj. 4913 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4914 // deps[i].base_addr = NumDependencies; 4915 LValue BaseAddrLVal = CGF.EmitLValueForField( 4916 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4917 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4918 llvm::PointerUnion<unsigned *, LValue *> Pos; 4919 unsigned Idx = 1; 4920 LValue PosLVal; 4921 if (Dependencies.IteratorExpr) { 4922 PosLVal = CGF.MakeAddrLValue( 4923 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4924 C.getSizeType()); 4925 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4926 /*IsInit=*/true); 4927 Pos = &PosLVal; 4928 } else { 4929 Pos = &Idx; 4930 } 4931 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4932 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4933 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4934 CGF.Int8Ty); 4935 return DependenciesArray; 4936 } 4937 4938 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4939 SourceLocation Loc) { 4940 ASTContext &C = CGM.getContext(); 4941 QualType FlagsTy; 4942 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4943 LValue Base = CGF.EmitLoadOfPointerLValue( 4944 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 4945 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4946 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4947 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4948 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4949 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4950 Addr.getElementType(), Addr.getPointer(), 4951 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4952 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4953 CGF.VoidPtrTy); 4954 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4955 // Use default allocator. 4956 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4957 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4958 4959 // _kmpc_free(gtid, addr, nullptr); 4960 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4961 CGM.getModule(), OMPRTL___kmpc_free), 4962 Args); 4963 } 4964 4965 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4966 OpenMPDependClauseKind NewDepKind, 4967 SourceLocation Loc) { 4968 ASTContext &C = CGM.getContext(); 4969 QualType FlagsTy; 4970 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4971 RecordDecl *KmpDependInfoRD = 4972 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4973 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4974 llvm::Value *NumDeps; 4975 LValue Base; 4976 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4977 4978 Address Begin = Base.getAddress(CGF); 4979 // Cast from pointer to array type to pointer to single element. 4980 llvm::Value *End = CGF.Builder.CreateGEP( 4981 Begin.getElementType(), Begin.getPointer(), NumDeps); 4982 // The basic structure here is a while-do loop. 4983 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4984 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4985 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4986 CGF.EmitBlock(BodyBB); 4987 llvm::PHINode *ElementPHI = 4988 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4989 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4990 Begin = Begin.withPointer(ElementPHI); 4991 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4992 Base.getTBAAInfo()); 4993 // deps[i].flags = NewDepKind; 4994 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4995 LValue FlagsLVal = CGF.EmitLValueForField( 4996 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4997 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4998 FlagsLVal); 4999 5000 // Shift the address forward by one element. 5001 Address ElementNext = 5002 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5003 ElementPHI->addIncoming(ElementNext.getPointer(), 5004 CGF.Builder.GetInsertBlock()); 5005 llvm::Value *IsEmpty = 5006 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5007 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5008 // Done. 5009 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5010 } 5011 5012 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5013 const OMPExecutableDirective &D, 5014 llvm::Function *TaskFunction, 5015 QualType SharedsTy, Address Shareds, 5016 const Expr *IfCond, 5017 const OMPTaskDataTy &Data) { 5018 if (!CGF.HaveInsertPoint()) 5019 return; 5020 5021 TaskResultTy Result = 5022 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5023 llvm::Value *NewTask = Result.NewTask; 5024 llvm::Function *TaskEntry = Result.TaskEntry; 5025 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5026 LValue TDBase = Result.TDBase; 5027 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5028 // Process list of dependences. 5029 Address DependenciesArray = Address::invalid(); 5030 llvm::Value *NumOfElements; 5031 std::tie(NumOfElements, DependenciesArray) = 5032 emitDependClause(CGF, Data.Dependences, Loc); 5033 5034 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5035 // libcall. 5036 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5037 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5038 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5039 // list is not empty 5040 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5041 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5042 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5043 llvm::Value *DepTaskArgs[7]; 5044 if (!Data.Dependences.empty()) { 5045 DepTaskArgs[0] = UpLoc; 5046 DepTaskArgs[1] = ThreadID; 5047 DepTaskArgs[2] = NewTask; 5048 DepTaskArgs[3] = NumOfElements; 5049 DepTaskArgs[4] = DependenciesArray.getPointer(); 5050 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5051 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5052 } 5053 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5054 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5055 if (!Data.Tied) { 5056 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5057 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5058 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5059 } 5060 if (!Data.Dependences.empty()) { 5061 CGF.EmitRuntimeCall( 5062 OMPBuilder.getOrCreateRuntimeFunction( 5063 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5064 DepTaskArgs); 5065 } else { 5066 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5067 CGM.getModule(), OMPRTL___kmpc_omp_task), 5068 TaskArgs); 5069 } 5070 // Check if parent region is untied and build return for untied task; 5071 if (auto *Region = 5072 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5073 Region->emitUntiedSwitch(CGF); 5074 }; 5075 5076 llvm::Value *DepWaitTaskArgs[6]; 5077 if (!Data.Dependences.empty()) { 5078 DepWaitTaskArgs[0] = UpLoc; 5079 DepWaitTaskArgs[1] = ThreadID; 5080 DepWaitTaskArgs[2] = NumOfElements; 5081 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5082 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5083 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5084 } 5085 auto &M = CGM.getModule(); 5086 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5087 TaskEntry, &Data, &DepWaitTaskArgs, 5088 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5089 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5090 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5091 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5092 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5093 // is specified. 5094 if (!Data.Dependences.empty()) 5095 CGF.EmitRuntimeCall( 5096 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5097 DepWaitTaskArgs); 5098 // Call proxy_task_entry(gtid, new_task); 5099 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5100 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5101 Action.Enter(CGF); 5102 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5103 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5104 OutlinedFnArgs); 5105 }; 5106 5107 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5108 // kmp_task_t *new_task); 5109 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5110 // kmp_task_t *new_task); 5111 RegionCodeGenTy RCG(CodeGen); 5112 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5113 M, OMPRTL___kmpc_omp_task_begin_if0), 5114 TaskArgs, 5115 OMPBuilder.getOrCreateRuntimeFunction( 5116 M, OMPRTL___kmpc_omp_task_complete_if0), 5117 TaskArgs); 5118 RCG.setAction(Action); 5119 RCG(CGF); 5120 }; 5121 5122 if (IfCond) { 5123 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5124 } else { 5125 RegionCodeGenTy ThenRCG(ThenCodeGen); 5126 ThenRCG(CGF); 5127 } 5128 } 5129 5130 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5131 const OMPLoopDirective &D, 5132 llvm::Function *TaskFunction, 5133 QualType SharedsTy, Address Shareds, 5134 const Expr *IfCond, 5135 const OMPTaskDataTy &Data) { 5136 if (!CGF.HaveInsertPoint()) 5137 return; 5138 TaskResultTy Result = 5139 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5140 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5141 // libcall. 5142 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5143 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5144 // sched, kmp_uint64 grainsize, void *task_dup); 5145 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5146 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5147 llvm::Value *IfVal; 5148 if (IfCond) { 5149 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5150 /*isSigned=*/true); 5151 } else { 5152 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5153 } 5154 5155 LValue LBLVal = CGF.EmitLValueForField( 5156 Result.TDBase, 5157 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5158 const auto *LBVar = 5159 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5160 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5161 LBLVal.getQuals(), 5162 /*IsInitializer=*/true); 5163 LValue UBLVal = CGF.EmitLValueForField( 5164 Result.TDBase, 5165 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5166 const auto *UBVar = 5167 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5168 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5169 UBLVal.getQuals(), 5170 /*IsInitializer=*/true); 5171 LValue StLVal = CGF.EmitLValueForField( 5172 Result.TDBase, 5173 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5174 const auto *StVar = 5175 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5176 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5177 StLVal.getQuals(), 5178 /*IsInitializer=*/true); 5179 // Store reductions address. 5180 LValue RedLVal = CGF.EmitLValueForField( 5181 Result.TDBase, 5182 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5183 if (Data.Reductions) { 5184 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5185 } else { 5186 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5187 CGF.getContext().VoidPtrTy); 5188 } 5189 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5190 llvm::Value *TaskArgs[] = { 5191 UpLoc, 5192 ThreadID, 5193 Result.NewTask, 5194 IfVal, 5195 LBLVal.getPointer(CGF), 5196 UBLVal.getPointer(CGF), 5197 CGF.EmitLoadOfScalar(StLVal, Loc), 5198 llvm::ConstantInt::getSigned( 5199 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5200 llvm::ConstantInt::getSigned( 5201 CGF.IntTy, Data.Schedule.getPointer() 5202 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5203 : NoSchedule), 5204 Data.Schedule.getPointer() 5205 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5206 /*isSigned=*/false) 5207 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5208 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5209 Result.TaskDupFn, CGF.VoidPtrTy) 5210 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5211 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5212 CGM.getModule(), OMPRTL___kmpc_taskloop), 5213 TaskArgs); 5214 } 5215 5216 /// Emit reduction operation for each element of array (required for 5217 /// array sections) LHS op = RHS. 5218 /// \param Type Type of array. 5219 /// \param LHSVar Variable on the left side of the reduction operation 5220 /// (references element of array in original variable). 5221 /// \param RHSVar Variable on the right side of the reduction operation 5222 /// (references element of array in original variable). 5223 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5224 /// RHSVar. 5225 static void EmitOMPAggregateReduction( 5226 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5227 const VarDecl *RHSVar, 5228 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5229 const Expr *, const Expr *)> &RedOpGen, 5230 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5231 const Expr *UpExpr = nullptr) { 5232 // Perform element-by-element initialization. 5233 QualType ElementTy; 5234 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5235 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5236 5237 // Drill down to the base element type on both arrays. 5238 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5239 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5240 5241 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5242 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5243 // Cast from pointer to array type to pointer to single element. 5244 llvm::Value *LHSEnd = 5245 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5246 // The basic structure here is a while-do loop. 5247 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5248 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5249 llvm::Value *IsEmpty = 5250 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5251 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5252 5253 // Enter the loop body, making that address the current address. 5254 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5255 CGF.EmitBlock(BodyBB); 5256 5257 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5258 5259 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5260 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5261 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5262 Address RHSElementCurrent( 5263 RHSElementPHI, RHSAddr.getElementType(), 5264 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5265 5266 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5267 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5268 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5269 Address LHSElementCurrent( 5270 LHSElementPHI, LHSAddr.getElementType(), 5271 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5272 5273 // Emit copy. 5274 CodeGenFunction::OMPPrivateScope Scope(CGF); 5275 Scope.addPrivate(LHSVar, LHSElementCurrent); 5276 Scope.addPrivate(RHSVar, RHSElementCurrent); 5277 Scope.Privatize(); 5278 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5279 Scope.ForceCleanup(); 5280 5281 // Shift the address forward by one element. 5282 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5283 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5284 "omp.arraycpy.dest.element"); 5285 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5286 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5287 "omp.arraycpy.src.element"); 5288 // Check whether we've reached the end. 5289 llvm::Value *Done = 5290 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5291 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5292 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5293 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5294 5295 // Done. 5296 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5297 } 5298 5299 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5300 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5301 /// UDR combiner function. 5302 static void emitReductionCombiner(CodeGenFunction &CGF, 5303 const Expr *ReductionOp) { 5304 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5305 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5306 if (const auto *DRE = 5307 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5308 if (const auto *DRD = 5309 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5310 std::pair<llvm::Function *, llvm::Function *> Reduction = 5311 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5312 RValue Func = RValue::get(Reduction.first); 5313 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5314 CGF.EmitIgnoredExpr(ReductionOp); 5315 return; 5316 } 5317 CGF.EmitIgnoredExpr(ReductionOp); 5318 } 5319 5320 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5321 SourceLocation Loc, llvm::Type *ArgsElemType, 5322 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5323 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5324 ASTContext &C = CGM.getContext(); 5325 5326 // void reduction_func(void *LHSArg, void *RHSArg); 5327 FunctionArgList Args; 5328 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5329 ImplicitParamDecl::Other); 5330 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5331 ImplicitParamDecl::Other); 5332 Args.push_back(&LHSArg); 5333 Args.push_back(&RHSArg); 5334 const auto &CGFI = 5335 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5336 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5337 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5338 llvm::GlobalValue::InternalLinkage, Name, 5339 &CGM.getModule()); 5340 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5341 Fn->setDoesNotRecurse(); 5342 CodeGenFunction CGF(CGM); 5343 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5344 5345 // Dst = (void*[n])(LHSArg); 5346 // Src = (void*[n])(RHSArg); 5347 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5348 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5349 ArgsElemType->getPointerTo()), 5350 ArgsElemType, CGF.getPointerAlign()); 5351 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5352 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5353 ArgsElemType->getPointerTo()), 5354 ArgsElemType, CGF.getPointerAlign()); 5355 5356 // ... 5357 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5358 // ... 5359 CodeGenFunction::OMPPrivateScope Scope(CGF); 5360 const auto *IPriv = Privates.begin(); 5361 unsigned Idx = 0; 5362 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5363 const auto *RHSVar = 5364 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5365 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5366 const auto *LHSVar = 5367 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5368 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5369 QualType PrivTy = (*IPriv)->getType(); 5370 if (PrivTy->isVariablyModifiedType()) { 5371 // Get array size and emit VLA type. 5372 ++Idx; 5373 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5374 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5375 const VariableArrayType *VLA = 5376 CGF.getContext().getAsVariableArrayType(PrivTy); 5377 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5378 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5379 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5380 CGF.EmitVariablyModifiedType(PrivTy); 5381 } 5382 } 5383 Scope.Privatize(); 5384 IPriv = Privates.begin(); 5385 const auto *ILHS = LHSExprs.begin(); 5386 const auto *IRHS = RHSExprs.begin(); 5387 for (const Expr *E : ReductionOps) { 5388 if ((*IPriv)->getType()->isArrayType()) { 5389 // Emit reduction for array section. 5390 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5391 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5392 EmitOMPAggregateReduction( 5393 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5394 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5395 emitReductionCombiner(CGF, E); 5396 }); 5397 } else { 5398 // Emit reduction for array subscript or single variable. 5399 emitReductionCombiner(CGF, E); 5400 } 5401 ++IPriv; 5402 ++ILHS; 5403 ++IRHS; 5404 } 5405 Scope.ForceCleanup(); 5406 CGF.FinishFunction(); 5407 return Fn; 5408 } 5409 5410 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5411 const Expr *ReductionOp, 5412 const Expr *PrivateRef, 5413 const DeclRefExpr *LHS, 5414 const DeclRefExpr *RHS) { 5415 if (PrivateRef->getType()->isArrayType()) { 5416 // Emit reduction for array section. 5417 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5418 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5419 EmitOMPAggregateReduction( 5420 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5421 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5422 emitReductionCombiner(CGF, ReductionOp); 5423 }); 5424 } else { 5425 // Emit reduction for array subscript or single variable. 5426 emitReductionCombiner(CGF, ReductionOp); 5427 } 5428 } 5429 5430 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5431 ArrayRef<const Expr *> Privates, 5432 ArrayRef<const Expr *> LHSExprs, 5433 ArrayRef<const Expr *> RHSExprs, 5434 ArrayRef<const Expr *> ReductionOps, 5435 ReductionOptionsTy Options) { 5436 if (!CGF.HaveInsertPoint()) 5437 return; 5438 5439 bool WithNowait = Options.WithNowait; 5440 bool SimpleReduction = Options.SimpleReduction; 5441 5442 // Next code should be emitted for reduction: 5443 // 5444 // static kmp_critical_name lock = { 0 }; 5445 // 5446 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5447 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5448 // ... 5449 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5450 // *(Type<n>-1*)rhs[<n>-1]); 5451 // } 5452 // 5453 // ... 5454 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5455 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5456 // RedList, reduce_func, &<lock>)) { 5457 // case 1: 5458 // ... 5459 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5460 // ... 5461 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5462 // break; 5463 // case 2: 5464 // ... 5465 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5466 // ... 5467 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5468 // break; 5469 // default:; 5470 // } 5471 // 5472 // if SimpleReduction is true, only the next code is generated: 5473 // ... 5474 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5475 // ... 5476 5477 ASTContext &C = CGM.getContext(); 5478 5479 if (SimpleReduction) { 5480 CodeGenFunction::RunCleanupsScope Scope(CGF); 5481 const auto *IPriv = Privates.begin(); 5482 const auto *ILHS = LHSExprs.begin(); 5483 const auto *IRHS = RHSExprs.begin(); 5484 for (const Expr *E : ReductionOps) { 5485 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5486 cast<DeclRefExpr>(*IRHS)); 5487 ++IPriv; 5488 ++ILHS; 5489 ++IRHS; 5490 } 5491 return; 5492 } 5493 5494 // 1. Build a list of reduction variables. 5495 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5496 auto Size = RHSExprs.size(); 5497 for (const Expr *E : Privates) { 5498 if (E->getType()->isVariablyModifiedType()) 5499 // Reserve place for array size. 5500 ++Size; 5501 } 5502 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5503 QualType ReductionArrayTy = 5504 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5505 /*IndexTypeQuals=*/0); 5506 Address ReductionList = 5507 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5508 const auto *IPriv = Privates.begin(); 5509 unsigned Idx = 0; 5510 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5511 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5512 CGF.Builder.CreateStore( 5513 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5514 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5515 Elem); 5516 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5517 // Store array size. 5518 ++Idx; 5519 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5520 llvm::Value *Size = CGF.Builder.CreateIntCast( 5521 CGF.getVLASize( 5522 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5523 .NumElts, 5524 CGF.SizeTy, /*isSigned=*/false); 5525 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5526 Elem); 5527 } 5528 } 5529 5530 // 2. Emit reduce_func(). 5531 llvm::Function *ReductionFn = 5532 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5533 Privates, LHSExprs, RHSExprs, ReductionOps); 5534 5535 // 3. Create static kmp_critical_name lock = { 0 }; 5536 std::string Name = getName({"reduction"}); 5537 llvm::Value *Lock = getCriticalRegionLock(Name); 5538 5539 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5540 // RedList, reduce_func, &<lock>); 5541 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5542 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5543 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5544 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5545 ReductionList.getPointer(), CGF.VoidPtrTy); 5546 llvm::Value *Args[] = { 5547 IdentTLoc, // ident_t *<loc> 5548 ThreadId, // i32 <gtid> 5549 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5550 ReductionArrayTySize, // size_type sizeof(RedList) 5551 RL, // void *RedList 5552 ReductionFn, // void (*) (void *, void *) <reduce_func> 5553 Lock // kmp_critical_name *&<lock> 5554 }; 5555 llvm::Value *Res = CGF.EmitRuntimeCall( 5556 OMPBuilder.getOrCreateRuntimeFunction( 5557 CGM.getModule(), 5558 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5559 Args); 5560 5561 // 5. Build switch(res) 5562 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5563 llvm::SwitchInst *SwInst = 5564 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5565 5566 // 6. Build case 1: 5567 // ... 5568 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5569 // ... 5570 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5571 // break; 5572 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5573 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5574 CGF.EmitBlock(Case1BB); 5575 5576 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5577 llvm::Value *EndArgs[] = { 5578 IdentTLoc, // ident_t *<loc> 5579 ThreadId, // i32 <gtid> 5580 Lock // kmp_critical_name *&<lock> 5581 }; 5582 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5583 CodeGenFunction &CGF, PrePostActionTy &Action) { 5584 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5585 const auto *IPriv = Privates.begin(); 5586 const auto *ILHS = LHSExprs.begin(); 5587 const auto *IRHS = RHSExprs.begin(); 5588 for (const Expr *E : ReductionOps) { 5589 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5590 cast<DeclRefExpr>(*IRHS)); 5591 ++IPriv; 5592 ++ILHS; 5593 ++IRHS; 5594 } 5595 }; 5596 RegionCodeGenTy RCG(CodeGen); 5597 CommonActionTy Action( 5598 nullptr, llvm::None, 5599 OMPBuilder.getOrCreateRuntimeFunction( 5600 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5601 : OMPRTL___kmpc_end_reduce), 5602 EndArgs); 5603 RCG.setAction(Action); 5604 RCG(CGF); 5605 5606 CGF.EmitBranch(DefaultBB); 5607 5608 // 7. Build case 2: 5609 // ... 5610 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5611 // ... 5612 // break; 5613 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5614 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5615 CGF.EmitBlock(Case2BB); 5616 5617 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5618 CodeGenFunction &CGF, PrePostActionTy &Action) { 5619 const auto *ILHS = LHSExprs.begin(); 5620 const auto *IRHS = RHSExprs.begin(); 5621 const auto *IPriv = Privates.begin(); 5622 for (const Expr *E : ReductionOps) { 5623 const Expr *XExpr = nullptr; 5624 const Expr *EExpr = nullptr; 5625 const Expr *UpExpr = nullptr; 5626 BinaryOperatorKind BO = BO_Comma; 5627 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5628 if (BO->getOpcode() == BO_Assign) { 5629 XExpr = BO->getLHS(); 5630 UpExpr = BO->getRHS(); 5631 } 5632 } 5633 // Try to emit update expression as a simple atomic. 5634 const Expr *RHSExpr = UpExpr; 5635 if (RHSExpr) { 5636 // Analyze RHS part of the whole expression. 5637 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5638 RHSExpr->IgnoreParenImpCasts())) { 5639 // If this is a conditional operator, analyze its condition for 5640 // min/max reduction operator. 5641 RHSExpr = ACO->getCond(); 5642 } 5643 if (const auto *BORHS = 5644 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5645 EExpr = BORHS->getRHS(); 5646 BO = BORHS->getOpcode(); 5647 } 5648 } 5649 if (XExpr) { 5650 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5651 auto &&AtomicRedGen = [BO, VD, 5652 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5653 const Expr *EExpr, const Expr *UpExpr) { 5654 LValue X = CGF.EmitLValue(XExpr); 5655 RValue E; 5656 if (EExpr) 5657 E = CGF.EmitAnyExpr(EExpr); 5658 CGF.EmitOMPAtomicSimpleUpdateExpr( 5659 X, E, BO, /*IsXLHSInRHSPart=*/true, 5660 llvm::AtomicOrdering::Monotonic, Loc, 5661 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5662 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5663 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5664 CGF.emitOMPSimpleStore( 5665 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5666 VD->getType().getNonReferenceType(), Loc); 5667 PrivateScope.addPrivate(VD, LHSTemp); 5668 (void)PrivateScope.Privatize(); 5669 return CGF.EmitAnyExpr(UpExpr); 5670 }); 5671 }; 5672 if ((*IPriv)->getType()->isArrayType()) { 5673 // Emit atomic reduction for array section. 5674 const auto *RHSVar = 5675 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5676 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5677 AtomicRedGen, XExpr, EExpr, UpExpr); 5678 } else { 5679 // Emit atomic reduction for array subscript or single variable. 5680 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5681 } 5682 } else { 5683 // Emit as a critical region. 5684 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5685 const Expr *, const Expr *) { 5686 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5687 std::string Name = RT.getName({"atomic_reduction"}); 5688 RT.emitCriticalRegion( 5689 CGF, Name, 5690 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5691 Action.Enter(CGF); 5692 emitReductionCombiner(CGF, E); 5693 }, 5694 Loc); 5695 }; 5696 if ((*IPriv)->getType()->isArrayType()) { 5697 const auto *LHSVar = 5698 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5699 const auto *RHSVar = 5700 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5701 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5702 CritRedGen); 5703 } else { 5704 CritRedGen(CGF, nullptr, nullptr, nullptr); 5705 } 5706 } 5707 ++ILHS; 5708 ++IRHS; 5709 ++IPriv; 5710 } 5711 }; 5712 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5713 if (!WithNowait) { 5714 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5715 llvm::Value *EndArgs[] = { 5716 IdentTLoc, // ident_t *<loc> 5717 ThreadId, // i32 <gtid> 5718 Lock // kmp_critical_name *&<lock> 5719 }; 5720 CommonActionTy Action(nullptr, llvm::None, 5721 OMPBuilder.getOrCreateRuntimeFunction( 5722 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5723 EndArgs); 5724 AtomicRCG.setAction(Action); 5725 AtomicRCG(CGF); 5726 } else { 5727 AtomicRCG(CGF); 5728 } 5729 5730 CGF.EmitBranch(DefaultBB); 5731 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5732 } 5733 5734 /// Generates unique name for artificial threadprivate variables. 5735 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5736 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5737 const Expr *Ref) { 5738 SmallString<256> Buffer; 5739 llvm::raw_svector_ostream Out(Buffer); 5740 const clang::DeclRefExpr *DE; 5741 const VarDecl *D = ::getBaseDecl(Ref, DE); 5742 if (!D) 5743 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5744 D = D->getCanonicalDecl(); 5745 std::string Name = CGM.getOpenMPRuntime().getName( 5746 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5747 Out << Prefix << Name << "_" 5748 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5749 return std::string(Out.str()); 5750 } 5751 5752 /// Emits reduction initializer function: 5753 /// \code 5754 /// void @.red_init(void* %arg, void* %orig) { 5755 /// %0 = bitcast void* %arg to <type>* 5756 /// store <type> <init>, <type>* %0 5757 /// ret void 5758 /// } 5759 /// \endcode 5760 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5761 SourceLocation Loc, 5762 ReductionCodeGen &RCG, unsigned N) { 5763 ASTContext &C = CGM.getContext(); 5764 QualType VoidPtrTy = C.VoidPtrTy; 5765 VoidPtrTy.addRestrict(); 5766 FunctionArgList Args; 5767 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5768 ImplicitParamDecl::Other); 5769 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5770 ImplicitParamDecl::Other); 5771 Args.emplace_back(&Param); 5772 Args.emplace_back(&ParamOrig); 5773 const auto &FnInfo = 5774 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5775 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5776 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5777 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5778 Name, &CGM.getModule()); 5779 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5780 Fn->setDoesNotRecurse(); 5781 CodeGenFunction CGF(CGM); 5782 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5783 QualType PrivateType = RCG.getPrivateType(N); 5784 Address PrivateAddr = CGF.EmitLoadOfPointer( 5785 CGF.Builder.CreateElementBitCast( 5786 CGF.GetAddrOfLocalVar(&Param), 5787 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5788 C.getPointerType(PrivateType)->castAs<PointerType>()); 5789 llvm::Value *Size = nullptr; 5790 // If the size of the reduction item is non-constant, load it from global 5791 // threadprivate variable. 5792 if (RCG.getSizes(N).second) { 5793 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5794 CGF, CGM.getContext().getSizeType(), 5795 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5796 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5797 CGM.getContext().getSizeType(), Loc); 5798 } 5799 RCG.emitAggregateType(CGF, N, Size); 5800 Address OrigAddr = Address::invalid(); 5801 // If initializer uses initializer from declare reduction construct, emit a 5802 // pointer to the address of the original reduction item (reuired by reduction 5803 // initializer) 5804 if (RCG.usesReductionInitializer(N)) { 5805 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5806 OrigAddr = CGF.EmitLoadOfPointer( 5807 SharedAddr, 5808 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5809 } 5810 // Emit the initializer: 5811 // %0 = bitcast void* %arg to <type>* 5812 // store <type> <init>, <type>* %0 5813 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5814 [](CodeGenFunction &) { return false; }); 5815 CGF.FinishFunction(); 5816 return Fn; 5817 } 5818 5819 /// Emits reduction combiner function: 5820 /// \code 5821 /// void @.red_comb(void* %arg0, void* %arg1) { 5822 /// %lhs = bitcast void* %arg0 to <type>* 5823 /// %rhs = bitcast void* %arg1 to <type>* 5824 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5825 /// store <type> %2, <type>* %lhs 5826 /// ret void 5827 /// } 5828 /// \endcode 5829 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5830 SourceLocation Loc, 5831 ReductionCodeGen &RCG, unsigned N, 5832 const Expr *ReductionOp, 5833 const Expr *LHS, const Expr *RHS, 5834 const Expr *PrivateRef) { 5835 ASTContext &C = CGM.getContext(); 5836 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5837 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5838 FunctionArgList Args; 5839 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5840 C.VoidPtrTy, ImplicitParamDecl::Other); 5841 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5842 ImplicitParamDecl::Other); 5843 Args.emplace_back(&ParamInOut); 5844 Args.emplace_back(&ParamIn); 5845 const auto &FnInfo = 5846 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5847 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5848 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5849 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5850 Name, &CGM.getModule()); 5851 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5852 Fn->setDoesNotRecurse(); 5853 CodeGenFunction CGF(CGM); 5854 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5855 llvm::Value *Size = nullptr; 5856 // If the size of the reduction item is non-constant, load it from global 5857 // threadprivate variable. 5858 if (RCG.getSizes(N).second) { 5859 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5860 CGF, CGM.getContext().getSizeType(), 5861 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5862 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5863 CGM.getContext().getSizeType(), Loc); 5864 } 5865 RCG.emitAggregateType(CGF, N, Size); 5866 // Remap lhs and rhs variables to the addresses of the function arguments. 5867 // %lhs = bitcast void* %arg0 to <type>* 5868 // %rhs = bitcast void* %arg1 to <type>* 5869 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5870 PrivateScope.addPrivate( 5871 LHSVD, 5872 // Pull out the pointer to the variable. 5873 CGF.EmitLoadOfPointer( 5874 CGF.Builder.CreateElementBitCast( 5875 CGF.GetAddrOfLocalVar(&ParamInOut), 5876 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5877 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5878 PrivateScope.addPrivate( 5879 RHSVD, 5880 // Pull out the pointer to the variable. 5881 CGF.EmitLoadOfPointer( 5882 CGF.Builder.CreateElementBitCast( 5883 CGF.GetAddrOfLocalVar(&ParamIn), 5884 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5885 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5886 PrivateScope.Privatize(); 5887 // Emit the combiner body: 5888 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5889 // store <type> %2, <type>* %lhs 5890 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5891 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5892 cast<DeclRefExpr>(RHS)); 5893 CGF.FinishFunction(); 5894 return Fn; 5895 } 5896 5897 /// Emits reduction finalizer function: 5898 /// \code 5899 /// void @.red_fini(void* %arg) { 5900 /// %0 = bitcast void* %arg to <type>* 5901 /// <destroy>(<type>* %0) 5902 /// ret void 5903 /// } 5904 /// \endcode 5905 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5906 SourceLocation Loc, 5907 ReductionCodeGen &RCG, unsigned N) { 5908 if (!RCG.needCleanups(N)) 5909 return nullptr; 5910 ASTContext &C = CGM.getContext(); 5911 FunctionArgList Args; 5912 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5913 ImplicitParamDecl::Other); 5914 Args.emplace_back(&Param); 5915 const auto &FnInfo = 5916 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5917 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5918 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5919 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5920 Name, &CGM.getModule()); 5921 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5922 Fn->setDoesNotRecurse(); 5923 CodeGenFunction CGF(CGM); 5924 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5925 Address PrivateAddr = CGF.EmitLoadOfPointer( 5926 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5927 llvm::Value *Size = nullptr; 5928 // If the size of the reduction item is non-constant, load it from global 5929 // threadprivate variable. 5930 if (RCG.getSizes(N).second) { 5931 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5932 CGF, CGM.getContext().getSizeType(), 5933 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5934 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5935 CGM.getContext().getSizeType(), Loc); 5936 } 5937 RCG.emitAggregateType(CGF, N, Size); 5938 // Emit the finalizer body: 5939 // <destroy>(<type>* %0) 5940 RCG.emitCleanups(CGF, N, PrivateAddr); 5941 CGF.FinishFunction(Loc); 5942 return Fn; 5943 } 5944 5945 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5946 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5947 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5948 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5949 return nullptr; 5950 5951 // Build typedef struct: 5952 // kmp_taskred_input { 5953 // void *reduce_shar; // shared reduction item 5954 // void *reduce_orig; // original reduction item used for initialization 5955 // size_t reduce_size; // size of data item 5956 // void *reduce_init; // data initialization routine 5957 // void *reduce_fini; // data finalization routine 5958 // void *reduce_comb; // data combiner routine 5959 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5960 // } kmp_taskred_input_t; 5961 ASTContext &C = CGM.getContext(); 5962 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5963 RD->startDefinition(); 5964 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5965 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5966 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5967 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5968 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5969 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5970 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5971 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5972 RD->completeDefinition(); 5973 QualType RDType = C.getRecordType(RD); 5974 unsigned Size = Data.ReductionVars.size(); 5975 llvm::APInt ArraySize(/*numBits=*/64, Size); 5976 QualType ArrayRDType = C.getConstantArrayType( 5977 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5978 // kmp_task_red_input_t .rd_input.[Size]; 5979 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5980 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5981 Data.ReductionCopies, Data.ReductionOps); 5982 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5983 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5984 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5985 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5986 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5987 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5988 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5989 ".rd_input.gep."); 5990 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5991 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5992 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5993 RCG.emitSharedOrigLValue(CGF, Cnt); 5994 llvm::Value *CastedShared = 5995 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 5996 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5997 // ElemLVal.reduce_orig = &Origs[Cnt]; 5998 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5999 llvm::Value *CastedOrig = 6000 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6001 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6002 RCG.emitAggregateType(CGF, Cnt); 6003 llvm::Value *SizeValInChars; 6004 llvm::Value *SizeVal; 6005 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6006 // We use delayed creation/initialization for VLAs and array sections. It is 6007 // required because runtime does not provide the way to pass the sizes of 6008 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6009 // threadprivate global variables are used to store these values and use 6010 // them in the functions. 6011 bool DelayedCreation = !!SizeVal; 6012 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6013 /*isSigned=*/false); 6014 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6015 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6016 // ElemLVal.reduce_init = init; 6017 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6018 llvm::Value *InitAddr = 6019 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6020 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6021 // ElemLVal.reduce_fini = fini; 6022 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6023 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6024 llvm::Value *FiniAddr = Fini 6025 ? CGF.EmitCastToVoidPtr(Fini) 6026 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6027 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6028 // ElemLVal.reduce_comb = comb; 6029 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6030 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6031 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6032 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6033 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6034 // ElemLVal.flags = 0; 6035 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6036 if (DelayedCreation) { 6037 CGF.EmitStoreOfScalar( 6038 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6039 FlagsLVal); 6040 } else 6041 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6042 FlagsLVal.getType()); 6043 } 6044 if (Data.IsReductionWithTaskMod) { 6045 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6046 // is_ws, int num, void *data); 6047 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6048 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6049 CGM.IntTy, /*isSigned=*/true); 6050 llvm::Value *Args[] = { 6051 IdentTLoc, GTid, 6052 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6053 /*isSigned=*/true), 6054 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6055 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6056 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6057 return CGF.EmitRuntimeCall( 6058 OMPBuilder.getOrCreateRuntimeFunction( 6059 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6060 Args); 6061 } 6062 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6063 llvm::Value *Args[] = { 6064 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6065 /*isSigned=*/true), 6066 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6067 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6068 CGM.VoidPtrTy)}; 6069 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6070 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6071 Args); 6072 } 6073 6074 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6075 SourceLocation Loc, 6076 bool IsWorksharingReduction) { 6077 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6078 // is_ws, int num, void *data); 6079 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6080 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6081 CGM.IntTy, /*isSigned=*/true); 6082 llvm::Value *Args[] = {IdentTLoc, GTid, 6083 llvm::ConstantInt::get(CGM.IntTy, 6084 IsWorksharingReduction ? 1 : 0, 6085 /*isSigned=*/true)}; 6086 (void)CGF.EmitRuntimeCall( 6087 OMPBuilder.getOrCreateRuntimeFunction( 6088 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6089 Args); 6090 } 6091 6092 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6093 SourceLocation Loc, 6094 ReductionCodeGen &RCG, 6095 unsigned N) { 6096 auto Sizes = RCG.getSizes(N); 6097 // Emit threadprivate global variable if the type is non-constant 6098 // (Sizes.second = nullptr). 6099 if (Sizes.second) { 6100 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6101 /*isSigned=*/false); 6102 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6103 CGF, CGM.getContext().getSizeType(), 6104 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6105 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6106 } 6107 } 6108 6109 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6110 SourceLocation Loc, 6111 llvm::Value *ReductionsPtr, 6112 LValue SharedLVal) { 6113 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6114 // *d); 6115 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6116 CGM.IntTy, 6117 /*isSigned=*/true), 6118 ReductionsPtr, 6119 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6120 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6121 return Address( 6122 CGF.EmitRuntimeCall( 6123 OMPBuilder.getOrCreateRuntimeFunction( 6124 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6125 Args), 6126 CGF.Int8Ty, SharedLVal.getAlignment()); 6127 } 6128 6129 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6130 const OMPTaskDataTy &Data) { 6131 if (!CGF.HaveInsertPoint()) 6132 return; 6133 6134 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6135 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6136 OMPBuilder.createTaskwait(CGF.Builder); 6137 } else { 6138 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6139 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6140 auto &M = CGM.getModule(); 6141 Address DependenciesArray = Address::invalid(); 6142 llvm::Value *NumOfElements; 6143 std::tie(NumOfElements, DependenciesArray) = 6144 emitDependClause(CGF, Data.Dependences, Loc); 6145 llvm::Value *DepWaitTaskArgs[6]; 6146 if (!Data.Dependences.empty()) { 6147 DepWaitTaskArgs[0] = UpLoc; 6148 DepWaitTaskArgs[1] = ThreadID; 6149 DepWaitTaskArgs[2] = NumOfElements; 6150 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6151 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6152 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6153 6154 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6155 6156 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6157 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6158 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6159 // is specified. 6160 CGF.EmitRuntimeCall( 6161 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6162 DepWaitTaskArgs); 6163 6164 } else { 6165 6166 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6167 // global_tid); 6168 llvm::Value *Args[] = {UpLoc, ThreadID}; 6169 // Ignore return result until untied tasks are supported. 6170 CGF.EmitRuntimeCall( 6171 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6172 Args); 6173 } 6174 } 6175 6176 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6177 Region->emitUntiedSwitch(CGF); 6178 } 6179 6180 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6181 OpenMPDirectiveKind InnerKind, 6182 const RegionCodeGenTy &CodeGen, 6183 bool HasCancel) { 6184 if (!CGF.HaveInsertPoint()) 6185 return; 6186 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6187 InnerKind != OMPD_critical && 6188 InnerKind != OMPD_master && 6189 InnerKind != OMPD_masked); 6190 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6191 } 6192 6193 namespace { 6194 enum RTCancelKind { 6195 CancelNoreq = 0, 6196 CancelParallel = 1, 6197 CancelLoop = 2, 6198 CancelSections = 3, 6199 CancelTaskgroup = 4 6200 }; 6201 } // anonymous namespace 6202 6203 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6204 RTCancelKind CancelKind = CancelNoreq; 6205 if (CancelRegion == OMPD_parallel) 6206 CancelKind = CancelParallel; 6207 else if (CancelRegion == OMPD_for) 6208 CancelKind = CancelLoop; 6209 else if (CancelRegion == OMPD_sections) 6210 CancelKind = CancelSections; 6211 else { 6212 assert(CancelRegion == OMPD_taskgroup); 6213 CancelKind = CancelTaskgroup; 6214 } 6215 return CancelKind; 6216 } 6217 6218 void CGOpenMPRuntime::emitCancellationPointCall( 6219 CodeGenFunction &CGF, SourceLocation Loc, 6220 OpenMPDirectiveKind CancelRegion) { 6221 if (!CGF.HaveInsertPoint()) 6222 return; 6223 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6224 // global_tid, kmp_int32 cncl_kind); 6225 if (auto *OMPRegionInfo = 6226 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6227 // For 'cancellation point taskgroup', the task region info may not have a 6228 // cancel. This may instead happen in another adjacent task. 6229 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6230 llvm::Value *Args[] = { 6231 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6232 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6233 // Ignore return result until untied tasks are supported. 6234 llvm::Value *Result = CGF.EmitRuntimeCall( 6235 OMPBuilder.getOrCreateRuntimeFunction( 6236 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6237 Args); 6238 // if (__kmpc_cancellationpoint()) { 6239 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6240 // exit from construct; 6241 // } 6242 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6243 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6244 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6245 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6246 CGF.EmitBlock(ExitBB); 6247 if (CancelRegion == OMPD_parallel) 6248 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6249 // exit from construct; 6250 CodeGenFunction::JumpDest CancelDest = 6251 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6252 CGF.EmitBranchThroughCleanup(CancelDest); 6253 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6254 } 6255 } 6256 } 6257 6258 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6259 const Expr *IfCond, 6260 OpenMPDirectiveKind CancelRegion) { 6261 if (!CGF.HaveInsertPoint()) 6262 return; 6263 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6264 // kmp_int32 cncl_kind); 6265 auto &M = CGM.getModule(); 6266 if (auto *OMPRegionInfo = 6267 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6268 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6269 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6270 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6271 llvm::Value *Args[] = { 6272 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6273 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6274 // Ignore return result until untied tasks are supported. 6275 llvm::Value *Result = CGF.EmitRuntimeCall( 6276 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6277 // if (__kmpc_cancel()) { 6278 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6279 // exit from construct; 6280 // } 6281 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6282 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6283 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6284 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6285 CGF.EmitBlock(ExitBB); 6286 if (CancelRegion == OMPD_parallel) 6287 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6288 // exit from construct; 6289 CodeGenFunction::JumpDest CancelDest = 6290 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6291 CGF.EmitBranchThroughCleanup(CancelDest); 6292 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6293 }; 6294 if (IfCond) { 6295 emitIfClause(CGF, IfCond, ThenGen, 6296 [](CodeGenFunction &, PrePostActionTy &) {}); 6297 } else { 6298 RegionCodeGenTy ThenRCG(ThenGen); 6299 ThenRCG(CGF); 6300 } 6301 } 6302 } 6303 6304 namespace { 6305 /// Cleanup action for uses_allocators support. 6306 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6307 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6308 6309 public: 6310 OMPUsesAllocatorsActionTy( 6311 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6312 : Allocators(Allocators) {} 6313 void Enter(CodeGenFunction &CGF) override { 6314 if (!CGF.HaveInsertPoint()) 6315 return; 6316 for (const auto &AllocatorData : Allocators) { 6317 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6318 CGF, AllocatorData.first, AllocatorData.second); 6319 } 6320 } 6321 void Exit(CodeGenFunction &CGF) override { 6322 if (!CGF.HaveInsertPoint()) 6323 return; 6324 for (const auto &AllocatorData : Allocators) { 6325 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6326 AllocatorData.first); 6327 } 6328 } 6329 }; 6330 } // namespace 6331 6332 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6333 const OMPExecutableDirective &D, StringRef ParentName, 6334 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6335 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6336 assert(!ParentName.empty() && "Invalid target region parent name!"); 6337 HasEmittedTargetRegion = true; 6338 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6339 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6340 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6341 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6342 if (!D.AllocatorTraits) 6343 continue; 6344 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6345 } 6346 } 6347 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6348 CodeGen.setAction(UsesAllocatorAction); 6349 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6350 IsOffloadEntry, CodeGen); 6351 } 6352 6353 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6354 const Expr *Allocator, 6355 const Expr *AllocatorTraits) { 6356 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6357 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6358 // Use default memspace handle. 6359 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6360 llvm::Value *NumTraits = llvm::ConstantInt::get( 6361 CGF.IntTy, cast<ConstantArrayType>( 6362 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6363 ->getSize() 6364 .getLimitedValue()); 6365 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6366 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6367 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6368 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6369 AllocatorTraitsLVal.getBaseInfo(), 6370 AllocatorTraitsLVal.getTBAAInfo()); 6371 llvm::Value *Traits = 6372 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6373 6374 llvm::Value *AllocatorVal = 6375 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6376 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6377 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6378 // Store to allocator. 6379 CGF.EmitVarDecl(*cast<VarDecl>( 6380 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6381 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6382 AllocatorVal = 6383 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6384 Allocator->getType(), Allocator->getExprLoc()); 6385 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6386 } 6387 6388 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6389 const Expr *Allocator) { 6390 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6391 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6392 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6393 llvm::Value *AllocatorVal = 6394 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6395 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6396 CGF.getContext().VoidPtrTy, 6397 Allocator->getExprLoc()); 6398 (void)CGF.EmitRuntimeCall( 6399 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6400 OMPRTL___kmpc_destroy_allocator), 6401 {ThreadId, AllocatorVal}); 6402 } 6403 6404 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6405 const OMPExecutableDirective &D, StringRef ParentName, 6406 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6407 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6408 // Create a unique name for the entry function using the source location 6409 // information of the current target region. The name will be something like: 6410 // 6411 // __omp_offloading_DD_FFFF_PP_lBB 6412 // 6413 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6414 // mangled name of the function that encloses the target region and BB is the 6415 // line number of the target region. 6416 6417 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6418 !CGM.getLangOpts().OpenMPOffloadMandatory; 6419 unsigned DeviceID; 6420 unsigned FileID; 6421 unsigned Line; 6422 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6423 Line); 6424 SmallString<64> EntryFnName; 6425 { 6426 llvm::raw_svector_ostream OS(EntryFnName); 6427 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6428 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6429 } 6430 6431 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6432 6433 CodeGenFunction CGF(CGM, true); 6434 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6435 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6436 6437 if (BuildOutlinedFn) 6438 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6439 6440 // If this target outline function is not an offload entry, we don't need to 6441 // register it. 6442 if (!IsOffloadEntry) 6443 return; 6444 6445 // The target region ID is used by the runtime library to identify the current 6446 // target region, so it only has to be unique and not necessarily point to 6447 // anything. It could be the pointer to the outlined function that implements 6448 // the target region, but we aren't using that so that the compiler doesn't 6449 // need to keep that, and could therefore inline the host function if proven 6450 // worthwhile during optimization. In the other hand, if emitting code for the 6451 // device, the ID has to be the function address so that it can retrieved from 6452 // the offloading entry and launched by the runtime library. We also mark the 6453 // outlined function to have external linkage in case we are emitting code for 6454 // the device, because these functions will be entry points to the device. 6455 6456 if (CGM.getLangOpts().OpenMPIsDevice) { 6457 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6458 OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage); 6459 OutlinedFn->setDSOLocal(false); 6460 if (CGM.getTriple().isAMDGCN()) 6461 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6462 } else { 6463 std::string Name = getName({EntryFnName, "region_id"}); 6464 OutlinedFnID = new llvm::GlobalVariable( 6465 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6466 llvm::GlobalValue::WeakAnyLinkage, 6467 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6468 } 6469 6470 // If we do not allow host fallback we still need a named address to use. 6471 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6472 if (!BuildOutlinedFn) { 6473 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6474 "Named kernel already exists?"); 6475 TargetRegionEntryAddr = new llvm::GlobalVariable( 6476 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6477 llvm::GlobalValue::InternalLinkage, 6478 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6479 } 6480 6481 // Register the information for the entry associated with this target region. 6482 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6483 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6484 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6485 6486 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6487 int32_t DefaultValTeams = -1; 6488 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6489 if (DefaultValTeams > 0 && OutlinedFn) { 6490 OutlinedFn->addFnAttr("omp_target_num_teams", 6491 std::to_string(DefaultValTeams)); 6492 } 6493 int32_t DefaultValThreads = -1; 6494 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6495 if (DefaultValThreads > 0 && OutlinedFn) { 6496 OutlinedFn->addFnAttr("omp_target_thread_limit", 6497 std::to_string(DefaultValThreads)); 6498 } 6499 6500 if (BuildOutlinedFn) 6501 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6502 } 6503 6504 /// Checks if the expression is constant or does not have non-trivial function 6505 /// calls. 6506 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6507 // We can skip constant expressions. 6508 // We can skip expressions with trivial calls or simple expressions. 6509 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6510 !E->hasNonTrivialCall(Ctx)) && 6511 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6512 } 6513 6514 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6515 const Stmt *Body) { 6516 const Stmt *Child = Body->IgnoreContainers(); 6517 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6518 Child = nullptr; 6519 for (const Stmt *S : C->body()) { 6520 if (const auto *E = dyn_cast<Expr>(S)) { 6521 if (isTrivial(Ctx, E)) 6522 continue; 6523 } 6524 // Some of the statements can be ignored. 6525 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6526 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6527 continue; 6528 // Analyze declarations. 6529 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6530 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6531 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6532 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6533 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6534 isa<UsingDirectiveDecl>(D) || 6535 isa<OMPDeclareReductionDecl>(D) || 6536 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6537 return true; 6538 const auto *VD = dyn_cast<VarDecl>(D); 6539 if (!VD) 6540 return false; 6541 return VD->hasGlobalStorage() || !VD->isUsed(); 6542 })) 6543 continue; 6544 } 6545 // Found multiple children - cannot get the one child only. 6546 if (Child) 6547 return nullptr; 6548 Child = S; 6549 } 6550 if (Child) 6551 Child = Child->IgnoreContainers(); 6552 } 6553 return Child; 6554 } 6555 6556 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6557 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6558 int32_t &DefaultVal) { 6559 6560 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6561 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6562 "Expected target-based executable directive."); 6563 switch (DirectiveKind) { 6564 case OMPD_target: { 6565 const auto *CS = D.getInnermostCapturedStmt(); 6566 const auto *Body = 6567 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6568 const Stmt *ChildStmt = 6569 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6570 if (const auto *NestedDir = 6571 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6572 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6573 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6574 const Expr *NumTeams = 6575 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6576 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6577 if (auto Constant = 6578 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6579 DefaultVal = Constant->getExtValue(); 6580 return NumTeams; 6581 } 6582 DefaultVal = 0; 6583 return nullptr; 6584 } 6585 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6586 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6587 DefaultVal = 1; 6588 return nullptr; 6589 } 6590 DefaultVal = 1; 6591 return nullptr; 6592 } 6593 // A value of -1 is used to check if we need to emit no teams region 6594 DefaultVal = -1; 6595 return nullptr; 6596 } 6597 case OMPD_target_teams: 6598 case OMPD_target_teams_distribute: 6599 case OMPD_target_teams_distribute_simd: 6600 case OMPD_target_teams_distribute_parallel_for: 6601 case OMPD_target_teams_distribute_parallel_for_simd: { 6602 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6603 const Expr *NumTeams = 6604 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6605 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6606 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6607 DefaultVal = Constant->getExtValue(); 6608 return NumTeams; 6609 } 6610 DefaultVal = 0; 6611 return nullptr; 6612 } 6613 case OMPD_target_parallel: 6614 case OMPD_target_parallel_for: 6615 case OMPD_target_parallel_for_simd: 6616 case OMPD_target_simd: 6617 DefaultVal = 1; 6618 return nullptr; 6619 case OMPD_parallel: 6620 case OMPD_for: 6621 case OMPD_parallel_for: 6622 case OMPD_parallel_master: 6623 case OMPD_parallel_sections: 6624 case OMPD_for_simd: 6625 case OMPD_parallel_for_simd: 6626 case OMPD_cancel: 6627 case OMPD_cancellation_point: 6628 case OMPD_ordered: 6629 case OMPD_threadprivate: 6630 case OMPD_allocate: 6631 case OMPD_task: 6632 case OMPD_simd: 6633 case OMPD_tile: 6634 case OMPD_unroll: 6635 case OMPD_sections: 6636 case OMPD_section: 6637 case OMPD_single: 6638 case OMPD_master: 6639 case OMPD_critical: 6640 case OMPD_taskyield: 6641 case OMPD_barrier: 6642 case OMPD_taskwait: 6643 case OMPD_taskgroup: 6644 case OMPD_atomic: 6645 case OMPD_flush: 6646 case OMPD_depobj: 6647 case OMPD_scan: 6648 case OMPD_teams: 6649 case OMPD_target_data: 6650 case OMPD_target_exit_data: 6651 case OMPD_target_enter_data: 6652 case OMPD_distribute: 6653 case OMPD_distribute_simd: 6654 case OMPD_distribute_parallel_for: 6655 case OMPD_distribute_parallel_for_simd: 6656 case OMPD_teams_distribute: 6657 case OMPD_teams_distribute_simd: 6658 case OMPD_teams_distribute_parallel_for: 6659 case OMPD_teams_distribute_parallel_for_simd: 6660 case OMPD_target_update: 6661 case OMPD_declare_simd: 6662 case OMPD_declare_variant: 6663 case OMPD_begin_declare_variant: 6664 case OMPD_end_declare_variant: 6665 case OMPD_declare_target: 6666 case OMPD_end_declare_target: 6667 case OMPD_declare_reduction: 6668 case OMPD_declare_mapper: 6669 case OMPD_taskloop: 6670 case OMPD_taskloop_simd: 6671 case OMPD_master_taskloop: 6672 case OMPD_master_taskloop_simd: 6673 case OMPD_parallel_master_taskloop: 6674 case OMPD_parallel_master_taskloop_simd: 6675 case OMPD_requires: 6676 case OMPD_metadirective: 6677 case OMPD_unknown: 6678 break; 6679 default: 6680 break; 6681 } 6682 llvm_unreachable("Unexpected directive kind."); 6683 } 6684 6685 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6686 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6687 assert(!CGF.getLangOpts().OpenMPIsDevice && 6688 "Clauses associated with the teams directive expected to be emitted " 6689 "only for the host!"); 6690 CGBuilderTy &Bld = CGF.Builder; 6691 int32_t DefaultNT = -1; 6692 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6693 if (NumTeams != nullptr) { 6694 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6695 6696 switch (DirectiveKind) { 6697 case OMPD_target: { 6698 const auto *CS = D.getInnermostCapturedStmt(); 6699 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6700 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6701 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6702 /*IgnoreResultAssign*/ true); 6703 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6704 /*isSigned=*/true); 6705 } 6706 case OMPD_target_teams: 6707 case OMPD_target_teams_distribute: 6708 case OMPD_target_teams_distribute_simd: 6709 case OMPD_target_teams_distribute_parallel_for: 6710 case OMPD_target_teams_distribute_parallel_for_simd: { 6711 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6712 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6713 /*IgnoreResultAssign*/ true); 6714 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6715 /*isSigned=*/true); 6716 } 6717 default: 6718 break; 6719 } 6720 } else if (DefaultNT == -1) { 6721 return nullptr; 6722 } 6723 6724 return Bld.getInt32(DefaultNT); 6725 } 6726 6727 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6728 llvm::Value *DefaultThreadLimitVal) { 6729 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6730 CGF.getContext(), CS->getCapturedStmt()); 6731 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6732 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6733 llvm::Value *NumThreads = nullptr; 6734 llvm::Value *CondVal = nullptr; 6735 // Handle if clause. If if clause present, the number of threads is 6736 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6737 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6738 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6739 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6740 const OMPIfClause *IfClause = nullptr; 6741 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6742 if (C->getNameModifier() == OMPD_unknown || 6743 C->getNameModifier() == OMPD_parallel) { 6744 IfClause = C; 6745 break; 6746 } 6747 } 6748 if (IfClause) { 6749 const Expr *Cond = IfClause->getCondition(); 6750 bool Result; 6751 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6752 if (!Result) 6753 return CGF.Builder.getInt32(1); 6754 } else { 6755 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6756 if (const auto *PreInit = 6757 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6758 for (const auto *I : PreInit->decls()) { 6759 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6760 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6761 } else { 6762 CodeGenFunction::AutoVarEmission Emission = 6763 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6764 CGF.EmitAutoVarCleanups(Emission); 6765 } 6766 } 6767 } 6768 CondVal = CGF.EvaluateExprAsBool(Cond); 6769 } 6770 } 6771 } 6772 // Check the value of num_threads clause iff if clause was not specified 6773 // or is not evaluated to false. 6774 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6775 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6776 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6777 const auto *NumThreadsClause = 6778 Dir->getSingleClause<OMPNumThreadsClause>(); 6779 CodeGenFunction::LexicalScope Scope( 6780 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6781 if (const auto *PreInit = 6782 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6783 for (const auto *I : PreInit->decls()) { 6784 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6785 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6786 } else { 6787 CodeGenFunction::AutoVarEmission Emission = 6788 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6789 CGF.EmitAutoVarCleanups(Emission); 6790 } 6791 } 6792 } 6793 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6794 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6795 /*isSigned=*/false); 6796 if (DefaultThreadLimitVal) 6797 NumThreads = CGF.Builder.CreateSelect( 6798 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6799 DefaultThreadLimitVal, NumThreads); 6800 } else { 6801 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6802 : CGF.Builder.getInt32(0); 6803 } 6804 // Process condition of the if clause. 6805 if (CondVal) { 6806 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6807 CGF.Builder.getInt32(1)); 6808 } 6809 return NumThreads; 6810 } 6811 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6812 return CGF.Builder.getInt32(1); 6813 return DefaultThreadLimitVal; 6814 } 6815 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6816 : CGF.Builder.getInt32(0); 6817 } 6818 6819 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6820 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6821 int32_t &DefaultVal) { 6822 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6823 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6824 "Expected target-based executable directive."); 6825 6826 switch (DirectiveKind) { 6827 case OMPD_target: 6828 // Teams have no clause thread_limit 6829 return nullptr; 6830 case OMPD_target_teams: 6831 case OMPD_target_teams_distribute: 6832 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6833 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6834 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6835 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6836 if (auto Constant = 6837 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6838 DefaultVal = Constant->getExtValue(); 6839 return ThreadLimit; 6840 } 6841 return nullptr; 6842 case OMPD_target_parallel: 6843 case OMPD_target_parallel_for: 6844 case OMPD_target_parallel_for_simd: 6845 case OMPD_target_teams_distribute_parallel_for: 6846 case OMPD_target_teams_distribute_parallel_for_simd: { 6847 Expr *ThreadLimit = nullptr; 6848 Expr *NumThreads = nullptr; 6849 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6850 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6851 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6852 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6853 if (auto Constant = 6854 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6855 DefaultVal = Constant->getExtValue(); 6856 } 6857 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6858 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6859 NumThreads = NumThreadsClause->getNumThreads(); 6860 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6861 if (auto Constant = 6862 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6863 if (Constant->getExtValue() < DefaultVal) { 6864 DefaultVal = Constant->getExtValue(); 6865 ThreadLimit = NumThreads; 6866 } 6867 } 6868 } 6869 } 6870 return ThreadLimit; 6871 } 6872 case OMPD_target_teams_distribute_simd: 6873 case OMPD_target_simd: 6874 DefaultVal = 1; 6875 return nullptr; 6876 case OMPD_parallel: 6877 case OMPD_for: 6878 case OMPD_parallel_for: 6879 case OMPD_parallel_master: 6880 case OMPD_parallel_sections: 6881 case OMPD_for_simd: 6882 case OMPD_parallel_for_simd: 6883 case OMPD_cancel: 6884 case OMPD_cancellation_point: 6885 case OMPD_ordered: 6886 case OMPD_threadprivate: 6887 case OMPD_allocate: 6888 case OMPD_task: 6889 case OMPD_simd: 6890 case OMPD_tile: 6891 case OMPD_unroll: 6892 case OMPD_sections: 6893 case OMPD_section: 6894 case OMPD_single: 6895 case OMPD_master: 6896 case OMPD_critical: 6897 case OMPD_taskyield: 6898 case OMPD_barrier: 6899 case OMPD_taskwait: 6900 case OMPD_taskgroup: 6901 case OMPD_atomic: 6902 case OMPD_flush: 6903 case OMPD_depobj: 6904 case OMPD_scan: 6905 case OMPD_teams: 6906 case OMPD_target_data: 6907 case OMPD_target_exit_data: 6908 case OMPD_target_enter_data: 6909 case OMPD_distribute: 6910 case OMPD_distribute_simd: 6911 case OMPD_distribute_parallel_for: 6912 case OMPD_distribute_parallel_for_simd: 6913 case OMPD_teams_distribute: 6914 case OMPD_teams_distribute_simd: 6915 case OMPD_teams_distribute_parallel_for: 6916 case OMPD_teams_distribute_parallel_for_simd: 6917 case OMPD_target_update: 6918 case OMPD_declare_simd: 6919 case OMPD_declare_variant: 6920 case OMPD_begin_declare_variant: 6921 case OMPD_end_declare_variant: 6922 case OMPD_declare_target: 6923 case OMPD_end_declare_target: 6924 case OMPD_declare_reduction: 6925 case OMPD_declare_mapper: 6926 case OMPD_taskloop: 6927 case OMPD_taskloop_simd: 6928 case OMPD_master_taskloop: 6929 case OMPD_master_taskloop_simd: 6930 case OMPD_parallel_master_taskloop: 6931 case OMPD_parallel_master_taskloop_simd: 6932 case OMPD_requires: 6933 case OMPD_unknown: 6934 break; 6935 default: 6936 break; 6937 } 6938 llvm_unreachable("Unsupported directive kind."); 6939 } 6940 6941 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6942 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6943 assert(!CGF.getLangOpts().OpenMPIsDevice && 6944 "Clauses associated with the teams directive expected to be emitted " 6945 "only for the host!"); 6946 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6947 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6948 "Expected target-based executable directive."); 6949 CGBuilderTy &Bld = CGF.Builder; 6950 llvm::Value *ThreadLimitVal = nullptr; 6951 llvm::Value *NumThreadsVal = nullptr; 6952 switch (DirectiveKind) { 6953 case OMPD_target: { 6954 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6955 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6956 return NumThreads; 6957 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6958 CGF.getContext(), CS->getCapturedStmt()); 6959 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6960 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6961 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6962 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6963 const auto *ThreadLimitClause = 6964 Dir->getSingleClause<OMPThreadLimitClause>(); 6965 CodeGenFunction::LexicalScope Scope( 6966 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6967 if (const auto *PreInit = 6968 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6969 for (const auto *I : PreInit->decls()) { 6970 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6971 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6972 } else { 6973 CodeGenFunction::AutoVarEmission Emission = 6974 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6975 CGF.EmitAutoVarCleanups(Emission); 6976 } 6977 } 6978 } 6979 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6980 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6981 ThreadLimitVal = 6982 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6983 } 6984 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6985 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6986 CS = Dir->getInnermostCapturedStmt(); 6987 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6988 CGF.getContext(), CS->getCapturedStmt()); 6989 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6990 } 6991 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6992 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6993 CS = Dir->getInnermostCapturedStmt(); 6994 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6995 return NumThreads; 6996 } 6997 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6998 return Bld.getInt32(1); 6999 } 7000 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7001 } 7002 case OMPD_target_teams: { 7003 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7004 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7005 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7006 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7007 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7008 ThreadLimitVal = 7009 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7010 } 7011 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7012 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7013 return NumThreads; 7014 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7015 CGF.getContext(), CS->getCapturedStmt()); 7016 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7017 if (Dir->getDirectiveKind() == OMPD_distribute) { 7018 CS = Dir->getInnermostCapturedStmt(); 7019 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7020 return NumThreads; 7021 } 7022 } 7023 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7024 } 7025 case OMPD_target_teams_distribute: 7026 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7027 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7028 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7029 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7030 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7031 ThreadLimitVal = 7032 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7033 } 7034 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7035 case OMPD_target_parallel: 7036 case OMPD_target_parallel_for: 7037 case OMPD_target_parallel_for_simd: 7038 case OMPD_target_teams_distribute_parallel_for: 7039 case OMPD_target_teams_distribute_parallel_for_simd: { 7040 llvm::Value *CondVal = nullptr; 7041 // Handle if clause. If if clause present, the number of threads is 7042 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7043 if (D.hasClausesOfKind<OMPIfClause>()) { 7044 const OMPIfClause *IfClause = nullptr; 7045 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7046 if (C->getNameModifier() == OMPD_unknown || 7047 C->getNameModifier() == OMPD_parallel) { 7048 IfClause = C; 7049 break; 7050 } 7051 } 7052 if (IfClause) { 7053 const Expr *Cond = IfClause->getCondition(); 7054 bool Result; 7055 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7056 if (!Result) 7057 return Bld.getInt32(1); 7058 } else { 7059 CodeGenFunction::RunCleanupsScope Scope(CGF); 7060 CondVal = CGF.EvaluateExprAsBool(Cond); 7061 } 7062 } 7063 } 7064 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7065 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7066 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7067 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7068 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7069 ThreadLimitVal = 7070 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7071 } 7072 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7073 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7074 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7075 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7076 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7077 NumThreadsVal = 7078 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7079 ThreadLimitVal = ThreadLimitVal 7080 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7081 ThreadLimitVal), 7082 NumThreadsVal, ThreadLimitVal) 7083 : NumThreadsVal; 7084 } 7085 if (!ThreadLimitVal) 7086 ThreadLimitVal = Bld.getInt32(0); 7087 if (CondVal) 7088 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7089 return ThreadLimitVal; 7090 } 7091 case OMPD_target_teams_distribute_simd: 7092 case OMPD_target_simd: 7093 return Bld.getInt32(1); 7094 case OMPD_parallel: 7095 case OMPD_for: 7096 case OMPD_parallel_for: 7097 case OMPD_parallel_master: 7098 case OMPD_parallel_sections: 7099 case OMPD_for_simd: 7100 case OMPD_parallel_for_simd: 7101 case OMPD_cancel: 7102 case OMPD_cancellation_point: 7103 case OMPD_ordered: 7104 case OMPD_threadprivate: 7105 case OMPD_allocate: 7106 case OMPD_task: 7107 case OMPD_simd: 7108 case OMPD_tile: 7109 case OMPD_unroll: 7110 case OMPD_sections: 7111 case OMPD_section: 7112 case OMPD_single: 7113 case OMPD_master: 7114 case OMPD_critical: 7115 case OMPD_taskyield: 7116 case OMPD_barrier: 7117 case OMPD_taskwait: 7118 case OMPD_taskgroup: 7119 case OMPD_atomic: 7120 case OMPD_flush: 7121 case OMPD_depobj: 7122 case OMPD_scan: 7123 case OMPD_teams: 7124 case OMPD_target_data: 7125 case OMPD_target_exit_data: 7126 case OMPD_target_enter_data: 7127 case OMPD_distribute: 7128 case OMPD_distribute_simd: 7129 case OMPD_distribute_parallel_for: 7130 case OMPD_distribute_parallel_for_simd: 7131 case OMPD_teams_distribute: 7132 case OMPD_teams_distribute_simd: 7133 case OMPD_teams_distribute_parallel_for: 7134 case OMPD_teams_distribute_parallel_for_simd: 7135 case OMPD_target_update: 7136 case OMPD_declare_simd: 7137 case OMPD_declare_variant: 7138 case OMPD_begin_declare_variant: 7139 case OMPD_end_declare_variant: 7140 case OMPD_declare_target: 7141 case OMPD_end_declare_target: 7142 case OMPD_declare_reduction: 7143 case OMPD_declare_mapper: 7144 case OMPD_taskloop: 7145 case OMPD_taskloop_simd: 7146 case OMPD_master_taskloop: 7147 case OMPD_master_taskloop_simd: 7148 case OMPD_parallel_master_taskloop: 7149 case OMPD_parallel_master_taskloop_simd: 7150 case OMPD_requires: 7151 case OMPD_metadirective: 7152 case OMPD_unknown: 7153 break; 7154 default: 7155 break; 7156 } 7157 llvm_unreachable("Unsupported directive kind."); 7158 } 7159 7160 namespace { 7161 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7162 7163 // Utility to handle information from clauses associated with a given 7164 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7165 // It provides a convenient interface to obtain the information and generate 7166 // code for that information. 7167 class MappableExprsHandler { 7168 public: 7169 /// Values for bit flags used to specify the mapping type for 7170 /// offloading. 7171 enum OpenMPOffloadMappingFlags : uint64_t { 7172 /// No flags 7173 OMP_MAP_NONE = 0x0, 7174 /// Allocate memory on the device and move data from host to device. 7175 OMP_MAP_TO = 0x01, 7176 /// Allocate memory on the device and move data from device to host. 7177 OMP_MAP_FROM = 0x02, 7178 /// Always perform the requested mapping action on the element, even 7179 /// if it was already mapped before. 7180 OMP_MAP_ALWAYS = 0x04, 7181 /// Delete the element from the device environment, ignoring the 7182 /// current reference count associated with the element. 7183 OMP_MAP_DELETE = 0x08, 7184 /// The element being mapped is a pointer-pointee pair; both the 7185 /// pointer and the pointee should be mapped. 7186 OMP_MAP_PTR_AND_OBJ = 0x10, 7187 /// This flags signals that the base address of an entry should be 7188 /// passed to the target kernel as an argument. 7189 OMP_MAP_TARGET_PARAM = 0x20, 7190 /// Signal that the runtime library has to return the device pointer 7191 /// in the current position for the data being mapped. Used when we have the 7192 /// use_device_ptr or use_device_addr clause. 7193 OMP_MAP_RETURN_PARAM = 0x40, 7194 /// This flag signals that the reference being passed is a pointer to 7195 /// private data. 7196 OMP_MAP_PRIVATE = 0x80, 7197 /// Pass the element to the device by value. 7198 OMP_MAP_LITERAL = 0x100, 7199 /// Implicit map 7200 OMP_MAP_IMPLICIT = 0x200, 7201 /// Close is a hint to the runtime to allocate memory close to 7202 /// the target device. 7203 OMP_MAP_CLOSE = 0x400, 7204 /// 0x800 is reserved for compatibility with XLC. 7205 /// Produce a runtime error if the data is not already allocated. 7206 OMP_MAP_PRESENT = 0x1000, 7207 // Increment and decrement a separate reference counter so that the data 7208 // cannot be unmapped within the associated region. Thus, this flag is 7209 // intended to be used on 'target' and 'target data' directives because they 7210 // are inherently structured. It is not intended to be used on 'target 7211 // enter data' and 'target exit data' directives because they are inherently 7212 // dynamic. 7213 // This is an OpenMP extension for the sake of OpenACC support. 7214 OMP_MAP_OMPX_HOLD = 0x2000, 7215 /// Signal that the runtime library should use args as an array of 7216 /// descriptor_dim pointers and use args_size as dims. Used when we have 7217 /// non-contiguous list items in target update directive 7218 OMP_MAP_NON_CONTIG = 0x100000000000, 7219 /// The 16 MSBs of the flags indicate whether the entry is member of some 7220 /// struct/class. 7221 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7222 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7223 }; 7224 7225 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7226 static unsigned getFlagMemberOffset() { 7227 unsigned Offset = 0; 7228 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7229 Remain = Remain >> 1) 7230 Offset++; 7231 return Offset; 7232 } 7233 7234 /// Class that holds debugging information for a data mapping to be passed to 7235 /// the runtime library. 7236 class MappingExprInfo { 7237 /// The variable declaration used for the data mapping. 7238 const ValueDecl *MapDecl = nullptr; 7239 /// The original expression used in the map clause, or null if there is 7240 /// none. 7241 const Expr *MapExpr = nullptr; 7242 7243 public: 7244 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7245 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7246 7247 const ValueDecl *getMapDecl() const { return MapDecl; } 7248 const Expr *getMapExpr() const { return MapExpr; } 7249 }; 7250 7251 /// Class that associates information with a base pointer to be passed to the 7252 /// runtime library. 7253 class BasePointerInfo { 7254 /// The base pointer. 7255 llvm::Value *Ptr = nullptr; 7256 /// The base declaration that refers to this device pointer, or null if 7257 /// there is none. 7258 const ValueDecl *DevPtrDecl = nullptr; 7259 7260 public: 7261 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7262 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7263 llvm::Value *operator*() const { return Ptr; } 7264 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7265 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7266 }; 7267 7268 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7269 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7270 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7271 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7272 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7273 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7274 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7275 7276 /// This structure contains combined information generated for mappable 7277 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7278 /// mappers, and non-contiguous information. 7279 struct MapCombinedInfoTy { 7280 struct StructNonContiguousInfo { 7281 bool IsNonContiguous = false; 7282 MapDimArrayTy Dims; 7283 MapNonContiguousArrayTy Offsets; 7284 MapNonContiguousArrayTy Counts; 7285 MapNonContiguousArrayTy Strides; 7286 }; 7287 MapExprsArrayTy Exprs; 7288 MapBaseValuesArrayTy BasePointers; 7289 MapValuesArrayTy Pointers; 7290 MapValuesArrayTy Sizes; 7291 MapFlagsArrayTy Types; 7292 MapMappersArrayTy Mappers; 7293 StructNonContiguousInfo NonContigInfo; 7294 7295 /// Append arrays in \a CurInfo. 7296 void append(MapCombinedInfoTy &CurInfo) { 7297 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7298 BasePointers.append(CurInfo.BasePointers.begin(), 7299 CurInfo.BasePointers.end()); 7300 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7301 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7302 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7303 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7304 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7305 CurInfo.NonContigInfo.Dims.end()); 7306 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7307 CurInfo.NonContigInfo.Offsets.end()); 7308 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7309 CurInfo.NonContigInfo.Counts.end()); 7310 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7311 CurInfo.NonContigInfo.Strides.end()); 7312 } 7313 }; 7314 7315 /// Map between a struct and the its lowest & highest elements which have been 7316 /// mapped. 7317 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7318 /// HE(FieldIndex, Pointer)} 7319 struct StructRangeInfoTy { 7320 MapCombinedInfoTy PreliminaryMapData; 7321 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7322 0, Address::invalid()}; 7323 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7324 0, Address::invalid()}; 7325 Address Base = Address::invalid(); 7326 Address LB = Address::invalid(); 7327 bool IsArraySection = false; 7328 bool HasCompleteRecord = false; 7329 }; 7330 7331 private: 7332 /// Kind that defines how a device pointer has to be returned. 7333 struct MapInfo { 7334 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7335 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7336 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7337 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7338 bool ReturnDevicePointer = false; 7339 bool IsImplicit = false; 7340 const ValueDecl *Mapper = nullptr; 7341 const Expr *VarRef = nullptr; 7342 bool ForDeviceAddr = false; 7343 7344 MapInfo() = default; 7345 MapInfo( 7346 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7347 OpenMPMapClauseKind MapType, 7348 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7349 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7350 bool ReturnDevicePointer, bool IsImplicit, 7351 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7352 bool ForDeviceAddr = false) 7353 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7354 MotionModifiers(MotionModifiers), 7355 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7356 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7357 }; 7358 7359 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7360 /// member and there is no map information about it, then emission of that 7361 /// entry is deferred until the whole struct has been processed. 7362 struct DeferredDevicePtrEntryTy { 7363 const Expr *IE = nullptr; 7364 const ValueDecl *VD = nullptr; 7365 bool ForDeviceAddr = false; 7366 7367 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7368 bool ForDeviceAddr) 7369 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7370 }; 7371 7372 /// The target directive from where the mappable clauses were extracted. It 7373 /// is either a executable directive or a user-defined mapper directive. 7374 llvm::PointerUnion<const OMPExecutableDirective *, 7375 const OMPDeclareMapperDecl *> 7376 CurDir; 7377 7378 /// Function the directive is being generated for. 7379 CodeGenFunction &CGF; 7380 7381 /// Set of all first private variables in the current directive. 7382 /// bool data is set to true if the variable is implicitly marked as 7383 /// firstprivate, false otherwise. 7384 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7385 7386 /// Map between device pointer declarations and their expression components. 7387 /// The key value for declarations in 'this' is null. 7388 llvm::DenseMap< 7389 const ValueDecl *, 7390 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7391 DevPointersMap; 7392 7393 /// Map between lambda declarations and their map type. 7394 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7395 7396 llvm::Value *getExprTypeSize(const Expr *E) const { 7397 QualType ExprTy = E->getType().getCanonicalType(); 7398 7399 // Calculate the size for array shaping expression. 7400 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7401 llvm::Value *Size = 7402 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7403 for (const Expr *SE : OAE->getDimensions()) { 7404 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7405 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7406 CGF.getContext().getSizeType(), 7407 SE->getExprLoc()); 7408 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7409 } 7410 return Size; 7411 } 7412 7413 // Reference types are ignored for mapping purposes. 7414 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7415 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7416 7417 // Given that an array section is considered a built-in type, we need to 7418 // do the calculation based on the length of the section instead of relying 7419 // on CGF.getTypeSize(E->getType()). 7420 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7421 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7422 OAE->getBase()->IgnoreParenImpCasts()) 7423 .getCanonicalType(); 7424 7425 // If there is no length associated with the expression and lower bound is 7426 // not specified too, that means we are using the whole length of the 7427 // base. 7428 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7429 !OAE->getLowerBound()) 7430 return CGF.getTypeSize(BaseTy); 7431 7432 llvm::Value *ElemSize; 7433 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7434 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7435 } else { 7436 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7437 assert(ATy && "Expecting array type if not a pointer type."); 7438 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7439 } 7440 7441 // If we don't have a length at this point, that is because we have an 7442 // array section with a single element. 7443 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7444 return ElemSize; 7445 7446 if (const Expr *LenExpr = OAE->getLength()) { 7447 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7448 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7449 CGF.getContext().getSizeType(), 7450 LenExpr->getExprLoc()); 7451 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7452 } 7453 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7454 OAE->getLowerBound() && "expected array_section[lb:]."); 7455 // Size = sizetype - lb * elemtype; 7456 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7457 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7458 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7459 CGF.getContext().getSizeType(), 7460 OAE->getLowerBound()->getExprLoc()); 7461 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7462 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7463 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7464 LengthVal = CGF.Builder.CreateSelect( 7465 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7466 return LengthVal; 7467 } 7468 return CGF.getTypeSize(ExprTy); 7469 } 7470 7471 /// Return the corresponding bits for a given map clause modifier. Add 7472 /// a flag marking the map as a pointer if requested. Add a flag marking the 7473 /// map as the first one of a series of maps that relate to the same map 7474 /// expression. 7475 OpenMPOffloadMappingFlags getMapTypeBits( 7476 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7477 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7478 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7479 OpenMPOffloadMappingFlags Bits = 7480 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7481 switch (MapType) { 7482 case OMPC_MAP_alloc: 7483 case OMPC_MAP_release: 7484 // alloc and release is the default behavior in the runtime library, i.e. 7485 // if we don't pass any bits alloc/release that is what the runtime is 7486 // going to do. Therefore, we don't need to signal anything for these two 7487 // type modifiers. 7488 break; 7489 case OMPC_MAP_to: 7490 Bits |= OMP_MAP_TO; 7491 break; 7492 case OMPC_MAP_from: 7493 Bits |= OMP_MAP_FROM; 7494 break; 7495 case OMPC_MAP_tofrom: 7496 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7497 break; 7498 case OMPC_MAP_delete: 7499 Bits |= OMP_MAP_DELETE; 7500 break; 7501 case OMPC_MAP_unknown: 7502 llvm_unreachable("Unexpected map type!"); 7503 } 7504 if (AddPtrFlag) 7505 Bits |= OMP_MAP_PTR_AND_OBJ; 7506 if (AddIsTargetParamFlag) 7507 Bits |= OMP_MAP_TARGET_PARAM; 7508 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7509 Bits |= OMP_MAP_ALWAYS; 7510 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7511 Bits |= OMP_MAP_CLOSE; 7512 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7513 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7514 Bits |= OMP_MAP_PRESENT; 7515 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7516 Bits |= OMP_MAP_OMPX_HOLD; 7517 if (IsNonContiguous) 7518 Bits |= OMP_MAP_NON_CONTIG; 7519 return Bits; 7520 } 7521 7522 /// Return true if the provided expression is a final array section. A 7523 /// final array section, is one whose length can't be proved to be one. 7524 bool isFinalArraySectionExpression(const Expr *E) const { 7525 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7526 7527 // It is not an array section and therefore not a unity-size one. 7528 if (!OASE) 7529 return false; 7530 7531 // An array section with no colon always refer to a single element. 7532 if (OASE->getColonLocFirst().isInvalid()) 7533 return false; 7534 7535 const Expr *Length = OASE->getLength(); 7536 7537 // If we don't have a length we have to check if the array has size 1 7538 // for this dimension. Also, we should always expect a length if the 7539 // base type is pointer. 7540 if (!Length) { 7541 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7542 OASE->getBase()->IgnoreParenImpCasts()) 7543 .getCanonicalType(); 7544 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7545 return ATy->getSize().getSExtValue() != 1; 7546 // If we don't have a constant dimension length, we have to consider 7547 // the current section as having any size, so it is not necessarily 7548 // unitary. If it happen to be unity size, that's user fault. 7549 return true; 7550 } 7551 7552 // Check if the length evaluates to 1. 7553 Expr::EvalResult Result; 7554 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7555 return true; // Can have more that size 1. 7556 7557 llvm::APSInt ConstLength = Result.Val.getInt(); 7558 return ConstLength.getSExtValue() != 1; 7559 } 7560 7561 /// Generate the base pointers, section pointers, sizes, map type bits, and 7562 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7563 /// map type, map or motion modifiers, and expression components. 7564 /// \a IsFirstComponent should be set to true if the provided set of 7565 /// components is the first associated with a capture. 7566 void generateInfoForComponentList( 7567 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7568 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7569 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7570 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7571 bool IsFirstComponentList, bool IsImplicit, 7572 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7573 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7574 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7575 OverlappedElements = llvm::None) const { 7576 // The following summarizes what has to be generated for each map and the 7577 // types below. The generated information is expressed in this order: 7578 // base pointer, section pointer, size, flags 7579 // (to add to the ones that come from the map type and modifier). 7580 // 7581 // double d; 7582 // int i[100]; 7583 // float *p; 7584 // 7585 // struct S1 { 7586 // int i; 7587 // float f[50]; 7588 // } 7589 // struct S2 { 7590 // int i; 7591 // float f[50]; 7592 // S1 s; 7593 // double *p; 7594 // struct S2 *ps; 7595 // int &ref; 7596 // } 7597 // S2 s; 7598 // S2 *ps; 7599 // 7600 // map(d) 7601 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7602 // 7603 // map(i) 7604 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7605 // 7606 // map(i[1:23]) 7607 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7608 // 7609 // map(p) 7610 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7611 // 7612 // map(p[1:24]) 7613 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7614 // in unified shared memory mode or for local pointers 7615 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7616 // 7617 // map(s) 7618 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7619 // 7620 // map(s.i) 7621 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7622 // 7623 // map(s.s.f) 7624 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7625 // 7626 // map(s.p) 7627 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7628 // 7629 // map(to: s.p[:22]) 7630 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7631 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7632 // &(s.p), &(s.p[0]), 22*sizeof(double), 7633 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7634 // (*) alloc space for struct members, only this is a target parameter 7635 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7636 // optimizes this entry out, same in the examples below) 7637 // (***) map the pointee (map: to) 7638 // 7639 // map(to: s.ref) 7640 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7641 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7642 // (*) alloc space for struct members, only this is a target parameter 7643 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7644 // optimizes this entry out, same in the examples below) 7645 // (***) map the pointee (map: to) 7646 // 7647 // map(s.ps) 7648 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7649 // 7650 // map(from: s.ps->s.i) 7651 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7652 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7653 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7654 // 7655 // map(to: s.ps->ps) 7656 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7657 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7658 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7659 // 7660 // map(s.ps->ps->ps) 7661 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7662 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7663 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7664 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7665 // 7666 // map(to: s.ps->ps->s.f[:22]) 7667 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7668 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7669 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7670 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7671 // 7672 // map(ps) 7673 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7674 // 7675 // map(ps->i) 7676 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7677 // 7678 // map(ps->s.f) 7679 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7680 // 7681 // map(from: ps->p) 7682 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7683 // 7684 // map(to: ps->p[:22]) 7685 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7686 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7687 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7688 // 7689 // map(ps->ps) 7690 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7691 // 7692 // map(from: ps->ps->s.i) 7693 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7694 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7695 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7696 // 7697 // map(from: ps->ps->ps) 7698 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7699 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7700 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7701 // 7702 // map(ps->ps->ps->ps) 7703 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7704 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7705 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7706 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7707 // 7708 // map(to: ps->ps->ps->s.f[:22]) 7709 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7710 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7711 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7712 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7713 // 7714 // map(to: s.f[:22]) map(from: s.p[:33]) 7715 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7716 // sizeof(double*) (**), TARGET_PARAM 7717 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7718 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7719 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7720 // (*) allocate contiguous space needed to fit all mapped members even if 7721 // we allocate space for members not mapped (in this example, 7722 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7723 // them as well because they fall between &s.f[0] and &s.p) 7724 // 7725 // map(from: s.f[:22]) map(to: ps->p[:33]) 7726 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7727 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7728 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7729 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7730 // (*) the struct this entry pertains to is the 2nd element in the list of 7731 // arguments, hence MEMBER_OF(2) 7732 // 7733 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7734 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7735 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7736 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7737 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7738 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7739 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7740 // (*) the struct this entry pertains to is the 4th element in the list 7741 // of arguments, hence MEMBER_OF(4) 7742 7743 // Track if the map information being generated is the first for a capture. 7744 bool IsCaptureFirstInfo = IsFirstComponentList; 7745 // When the variable is on a declare target link or in a to clause with 7746 // unified memory, a reference is needed to hold the host/device address 7747 // of the variable. 7748 bool RequiresReference = false; 7749 7750 // Scan the components from the base to the complete expression. 7751 auto CI = Components.rbegin(); 7752 auto CE = Components.rend(); 7753 auto I = CI; 7754 7755 // Track if the map information being generated is the first for a list of 7756 // components. 7757 bool IsExpressionFirstInfo = true; 7758 bool FirstPointerInComplexData = false; 7759 Address BP = Address::invalid(); 7760 const Expr *AssocExpr = I->getAssociatedExpression(); 7761 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7762 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7763 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7764 7765 if (isa<MemberExpr>(AssocExpr)) { 7766 // The base is the 'this' pointer. The content of the pointer is going 7767 // to be the base of the field being mapped. 7768 BP = CGF.LoadCXXThisAddress(); 7769 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7770 (OASE && 7771 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7772 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7773 } else if (OAShE && 7774 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7775 BP = Address( 7776 CGF.EmitScalarExpr(OAShE->getBase()), 7777 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7778 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7779 } else { 7780 // The base is the reference to the variable. 7781 // BP = &Var. 7782 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7783 if (const auto *VD = 7784 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7785 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7786 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7787 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7788 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7789 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7790 RequiresReference = true; 7791 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7792 } 7793 } 7794 } 7795 7796 // If the variable is a pointer and is being dereferenced (i.e. is not 7797 // the last component), the base has to be the pointer itself, not its 7798 // reference. References are ignored for mapping purposes. 7799 QualType Ty = 7800 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7801 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7802 // No need to generate individual map information for the pointer, it 7803 // can be associated with the combined storage if shared memory mode is 7804 // active or the base declaration is not global variable. 7805 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7806 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7807 !VD || VD->hasLocalStorage()) 7808 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7809 else 7810 FirstPointerInComplexData = true; 7811 ++I; 7812 } 7813 } 7814 7815 // Track whether a component of the list should be marked as MEMBER_OF some 7816 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7817 // in a component list should be marked as MEMBER_OF, all subsequent entries 7818 // do not belong to the base struct. E.g. 7819 // struct S2 s; 7820 // s.ps->ps->ps->f[:] 7821 // (1) (2) (3) (4) 7822 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7823 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7824 // is the pointee of ps(2) which is not member of struct s, so it should not 7825 // be marked as such (it is still PTR_AND_OBJ). 7826 // The variable is initialized to false so that PTR_AND_OBJ entries which 7827 // are not struct members are not considered (e.g. array of pointers to 7828 // data). 7829 bool ShouldBeMemberOf = false; 7830 7831 // Variable keeping track of whether or not we have encountered a component 7832 // in the component list which is a member expression. Useful when we have a 7833 // pointer or a final array section, in which case it is the previous 7834 // component in the list which tells us whether we have a member expression. 7835 // E.g. X.f[:] 7836 // While processing the final array section "[:]" it is "f" which tells us 7837 // whether we are dealing with a member of a declared struct. 7838 const MemberExpr *EncounteredME = nullptr; 7839 7840 // Track for the total number of dimension. Start from one for the dummy 7841 // dimension. 7842 uint64_t DimSize = 1; 7843 7844 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7845 bool IsPrevMemberReference = false; 7846 7847 for (; I != CE; ++I) { 7848 // If the current component is member of a struct (parent struct) mark it. 7849 if (!EncounteredME) { 7850 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7851 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7852 // as MEMBER_OF the parent struct. 7853 if (EncounteredME) { 7854 ShouldBeMemberOf = true; 7855 // Do not emit as complex pointer if this is actually not array-like 7856 // expression. 7857 if (FirstPointerInComplexData) { 7858 QualType Ty = std::prev(I) 7859 ->getAssociatedDeclaration() 7860 ->getType() 7861 .getNonReferenceType(); 7862 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7863 FirstPointerInComplexData = false; 7864 } 7865 } 7866 } 7867 7868 auto Next = std::next(I); 7869 7870 // We need to generate the addresses and sizes if this is the last 7871 // component, if the component is a pointer or if it is an array section 7872 // whose length can't be proved to be one. If this is a pointer, it 7873 // becomes the base address for the following components. 7874 7875 // A final array section, is one whose length can't be proved to be one. 7876 // If the map item is non-contiguous then we don't treat any array section 7877 // as final array section. 7878 bool IsFinalArraySection = 7879 !IsNonContiguous && 7880 isFinalArraySectionExpression(I->getAssociatedExpression()); 7881 7882 // If we have a declaration for the mapping use that, otherwise use 7883 // the base declaration of the map clause. 7884 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7885 ? I->getAssociatedDeclaration() 7886 : BaseDecl; 7887 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7888 : MapExpr; 7889 7890 // Get information on whether the element is a pointer. Have to do a 7891 // special treatment for array sections given that they are built-in 7892 // types. 7893 const auto *OASE = 7894 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7895 const auto *OAShE = 7896 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7897 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7898 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7899 bool IsPointer = 7900 OAShE || 7901 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7902 .getCanonicalType() 7903 ->isAnyPointerType()) || 7904 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7905 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7906 MapDecl && 7907 MapDecl->getType()->isLValueReferenceType(); 7908 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7909 7910 if (OASE) 7911 ++DimSize; 7912 7913 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7914 IsFinalArraySection) { 7915 // If this is not the last component, we expect the pointer to be 7916 // associated with an array expression or member expression. 7917 assert((Next == CE || 7918 isa<MemberExpr>(Next->getAssociatedExpression()) || 7919 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7920 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7921 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7922 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7923 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7924 "Unexpected expression"); 7925 7926 Address LB = Address::invalid(); 7927 Address LowestElem = Address::invalid(); 7928 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7929 const MemberExpr *E) { 7930 const Expr *BaseExpr = E->getBase(); 7931 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7932 // scalar. 7933 LValue BaseLV; 7934 if (E->isArrow()) { 7935 LValueBaseInfo BaseInfo; 7936 TBAAAccessInfo TBAAInfo; 7937 Address Addr = 7938 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7939 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7940 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7941 } else { 7942 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7943 } 7944 return BaseLV; 7945 }; 7946 if (OAShE) { 7947 LowestElem = LB = 7948 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7949 CGF.ConvertTypeForMem( 7950 OAShE->getBase()->getType()->getPointeeType()), 7951 CGF.getContext().getTypeAlignInChars( 7952 OAShE->getBase()->getType())); 7953 } else if (IsMemberReference) { 7954 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7955 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7956 LowestElem = CGF.EmitLValueForFieldInitialization( 7957 BaseLVal, cast<FieldDecl>(MapDecl)) 7958 .getAddress(CGF); 7959 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7960 .getAddress(CGF); 7961 } else { 7962 LowestElem = LB = 7963 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7964 .getAddress(CGF); 7965 } 7966 7967 // If this component is a pointer inside the base struct then we don't 7968 // need to create any entry for it - it will be combined with the object 7969 // it is pointing to into a single PTR_AND_OBJ entry. 7970 bool IsMemberPointerOrAddr = 7971 EncounteredME && 7972 (((IsPointer || ForDeviceAddr) && 7973 I->getAssociatedExpression() == EncounteredME) || 7974 (IsPrevMemberReference && !IsPointer) || 7975 (IsMemberReference && Next != CE && 7976 !Next->getAssociatedExpression()->getType()->isPointerType())); 7977 if (!OverlappedElements.empty() && Next == CE) { 7978 // Handle base element with the info for overlapped elements. 7979 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7980 assert(!IsPointer && 7981 "Unexpected base element with the pointer type."); 7982 // Mark the whole struct as the struct that requires allocation on the 7983 // device. 7984 PartialStruct.LowestElem = {0, LowestElem}; 7985 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7986 I->getAssociatedExpression()->getType()); 7987 Address HB = CGF.Builder.CreateConstGEP( 7988 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7989 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7990 TypeSize.getQuantity() - 1); 7991 PartialStruct.HighestElem = { 7992 std::numeric_limits<decltype( 7993 PartialStruct.HighestElem.first)>::max(), 7994 HB}; 7995 PartialStruct.Base = BP; 7996 PartialStruct.LB = LB; 7997 assert( 7998 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7999 "Overlapped elements must be used only once for the variable."); 8000 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8001 // Emit data for non-overlapped data. 8002 OpenMPOffloadMappingFlags Flags = 8003 OMP_MAP_MEMBER_OF | 8004 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8005 /*AddPtrFlag=*/false, 8006 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8007 llvm::Value *Size = nullptr; 8008 // Do bitcopy of all non-overlapped structure elements. 8009 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8010 Component : OverlappedElements) { 8011 Address ComponentLB = Address::invalid(); 8012 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8013 Component) { 8014 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8015 const auto *FD = dyn_cast<FieldDecl>(VD); 8016 if (FD && FD->getType()->isLValueReferenceType()) { 8017 const auto *ME = 8018 cast<MemberExpr>(MC.getAssociatedExpression()); 8019 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8020 ComponentLB = 8021 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8022 .getAddress(CGF); 8023 } else { 8024 ComponentLB = 8025 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8026 .getAddress(CGF); 8027 } 8028 Size = CGF.Builder.CreatePtrDiff( 8029 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8030 CGF.EmitCastToVoidPtr(LB.getPointer())); 8031 break; 8032 } 8033 } 8034 assert(Size && "Failed to determine structure size"); 8035 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8036 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8037 CombinedInfo.Pointers.push_back(LB.getPointer()); 8038 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8039 Size, CGF.Int64Ty, /*isSigned=*/true)); 8040 CombinedInfo.Types.push_back(Flags); 8041 CombinedInfo.Mappers.push_back(nullptr); 8042 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8043 : 1); 8044 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8045 } 8046 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8047 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8048 CombinedInfo.Pointers.push_back(LB.getPointer()); 8049 Size = CGF.Builder.CreatePtrDiff( 8050 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8051 CGF.EmitCastToVoidPtr(LB.getPointer())); 8052 CombinedInfo.Sizes.push_back( 8053 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8054 CombinedInfo.Types.push_back(Flags); 8055 CombinedInfo.Mappers.push_back(nullptr); 8056 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8057 : 1); 8058 break; 8059 } 8060 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8061 if (!IsMemberPointerOrAddr || 8062 (Next == CE && MapType != OMPC_MAP_unknown)) { 8063 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8064 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8065 CombinedInfo.Pointers.push_back(LB.getPointer()); 8066 CombinedInfo.Sizes.push_back( 8067 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8068 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8069 : 1); 8070 8071 // If Mapper is valid, the last component inherits the mapper. 8072 bool HasMapper = Mapper && Next == CE; 8073 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8074 8075 // We need to add a pointer flag for each map that comes from the 8076 // same expression except for the first one. We also need to signal 8077 // this map is the first one that relates with the current capture 8078 // (there is a set of entries for each capture). 8079 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8080 MapType, MapModifiers, MotionModifiers, IsImplicit, 8081 !IsExpressionFirstInfo || RequiresReference || 8082 FirstPointerInComplexData || IsMemberReference, 8083 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8084 8085 if (!IsExpressionFirstInfo || IsMemberReference) { 8086 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8087 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8088 if (IsPointer || (IsMemberReference && Next != CE)) 8089 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8090 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8091 8092 if (ShouldBeMemberOf) { 8093 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8094 // should be later updated with the correct value of MEMBER_OF. 8095 Flags |= OMP_MAP_MEMBER_OF; 8096 // From now on, all subsequent PTR_AND_OBJ entries should not be 8097 // marked as MEMBER_OF. 8098 ShouldBeMemberOf = false; 8099 } 8100 } 8101 8102 CombinedInfo.Types.push_back(Flags); 8103 } 8104 8105 // If we have encountered a member expression so far, keep track of the 8106 // mapped member. If the parent is "*this", then the value declaration 8107 // is nullptr. 8108 if (EncounteredME) { 8109 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8110 unsigned FieldIndex = FD->getFieldIndex(); 8111 8112 // Update info about the lowest and highest elements for this struct 8113 if (!PartialStruct.Base.isValid()) { 8114 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8115 if (IsFinalArraySection) { 8116 Address HB = 8117 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8118 .getAddress(CGF); 8119 PartialStruct.HighestElem = {FieldIndex, HB}; 8120 } else { 8121 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8122 } 8123 PartialStruct.Base = BP; 8124 PartialStruct.LB = BP; 8125 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8126 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8127 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8128 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8129 } 8130 } 8131 8132 // Need to emit combined struct for array sections. 8133 if (IsFinalArraySection || IsNonContiguous) 8134 PartialStruct.IsArraySection = true; 8135 8136 // If we have a final array section, we are done with this expression. 8137 if (IsFinalArraySection) 8138 break; 8139 8140 // The pointer becomes the base for the next element. 8141 if (Next != CE) 8142 BP = IsMemberReference ? LowestElem : LB; 8143 8144 IsExpressionFirstInfo = false; 8145 IsCaptureFirstInfo = false; 8146 FirstPointerInComplexData = false; 8147 IsPrevMemberReference = IsMemberReference; 8148 } else if (FirstPointerInComplexData) { 8149 QualType Ty = Components.rbegin() 8150 ->getAssociatedDeclaration() 8151 ->getType() 8152 .getNonReferenceType(); 8153 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8154 FirstPointerInComplexData = false; 8155 } 8156 } 8157 // If ran into the whole component - allocate the space for the whole 8158 // record. 8159 if (!EncounteredME) 8160 PartialStruct.HasCompleteRecord = true; 8161 8162 if (!IsNonContiguous) 8163 return; 8164 8165 const ASTContext &Context = CGF.getContext(); 8166 8167 // For supporting stride in array section, we need to initialize the first 8168 // dimension size as 1, first offset as 0, and first count as 1 8169 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8170 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8171 MapValuesArrayTy CurStrides; 8172 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8173 uint64_t ElementTypeSize; 8174 8175 // Collect Size information for each dimension and get the element size as 8176 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8177 // should be [10, 10] and the first stride is 4 btyes. 8178 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8179 Components) { 8180 const Expr *AssocExpr = Component.getAssociatedExpression(); 8181 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8182 8183 if (!OASE) 8184 continue; 8185 8186 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8187 auto *CAT = Context.getAsConstantArrayType(Ty); 8188 auto *VAT = Context.getAsVariableArrayType(Ty); 8189 8190 // We need all the dimension size except for the last dimension. 8191 assert((VAT || CAT || &Component == &*Components.begin()) && 8192 "Should be either ConstantArray or VariableArray if not the " 8193 "first Component"); 8194 8195 // Get element size if CurStrides is empty. 8196 if (CurStrides.empty()) { 8197 const Type *ElementType = nullptr; 8198 if (CAT) 8199 ElementType = CAT->getElementType().getTypePtr(); 8200 else if (VAT) 8201 ElementType = VAT->getElementType().getTypePtr(); 8202 else 8203 assert(&Component == &*Components.begin() && 8204 "Only expect pointer (non CAT or VAT) when this is the " 8205 "first Component"); 8206 // If ElementType is null, then it means the base is a pointer 8207 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8208 // for next iteration. 8209 if (ElementType) { 8210 // For the case that having pointer as base, we need to remove one 8211 // level of indirection. 8212 if (&Component != &*Components.begin()) 8213 ElementType = ElementType->getPointeeOrArrayElementType(); 8214 ElementTypeSize = 8215 Context.getTypeSizeInChars(ElementType).getQuantity(); 8216 CurStrides.push_back( 8217 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8218 } 8219 } 8220 // Get dimension value except for the last dimension since we don't need 8221 // it. 8222 if (DimSizes.size() < Components.size() - 1) { 8223 if (CAT) 8224 DimSizes.push_back(llvm::ConstantInt::get( 8225 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8226 else if (VAT) 8227 DimSizes.push_back(CGF.Builder.CreateIntCast( 8228 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8229 /*IsSigned=*/false)); 8230 } 8231 } 8232 8233 // Skip the dummy dimension since we have already have its information. 8234 auto *DI = DimSizes.begin() + 1; 8235 // Product of dimension. 8236 llvm::Value *DimProd = 8237 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8238 8239 // Collect info for non-contiguous. Notice that offset, count, and stride 8240 // are only meaningful for array-section, so we insert a null for anything 8241 // other than array-section. 8242 // Also, the size of offset, count, and stride are not the same as 8243 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8244 // count, and stride are the same as the number of non-contiguous 8245 // declaration in target update to/from clause. 8246 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8247 Components) { 8248 const Expr *AssocExpr = Component.getAssociatedExpression(); 8249 8250 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8251 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8252 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8253 /*isSigned=*/false); 8254 CurOffsets.push_back(Offset); 8255 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8256 CurStrides.push_back(CurStrides.back()); 8257 continue; 8258 } 8259 8260 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8261 8262 if (!OASE) 8263 continue; 8264 8265 // Offset 8266 const Expr *OffsetExpr = OASE->getLowerBound(); 8267 llvm::Value *Offset = nullptr; 8268 if (!OffsetExpr) { 8269 // If offset is absent, then we just set it to zero. 8270 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8271 } else { 8272 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8273 CGF.Int64Ty, 8274 /*isSigned=*/false); 8275 } 8276 CurOffsets.push_back(Offset); 8277 8278 // Count 8279 const Expr *CountExpr = OASE->getLength(); 8280 llvm::Value *Count = nullptr; 8281 if (!CountExpr) { 8282 // In Clang, once a high dimension is an array section, we construct all 8283 // the lower dimension as array section, however, for case like 8284 // arr[0:2][2], Clang construct the inner dimension as an array section 8285 // but it actually is not in an array section form according to spec. 8286 if (!OASE->getColonLocFirst().isValid() && 8287 !OASE->getColonLocSecond().isValid()) { 8288 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8289 } else { 8290 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8291 // When the length is absent it defaults to ⌈(size − 8292 // lower-bound)/stride⌉, where size is the size of the array 8293 // dimension. 8294 const Expr *StrideExpr = OASE->getStride(); 8295 llvm::Value *Stride = 8296 StrideExpr 8297 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8298 CGF.Int64Ty, /*isSigned=*/false) 8299 : nullptr; 8300 if (Stride) 8301 Count = CGF.Builder.CreateUDiv( 8302 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8303 else 8304 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8305 } 8306 } else { 8307 Count = CGF.EmitScalarExpr(CountExpr); 8308 } 8309 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8310 CurCounts.push_back(Count); 8311 8312 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8313 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8314 // Offset Count Stride 8315 // D0 0 1 4 (int) <- dummy dimension 8316 // D1 0 2 8 (2 * (1) * 4) 8317 // D2 1 2 20 (1 * (1 * 5) * 4) 8318 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8319 const Expr *StrideExpr = OASE->getStride(); 8320 llvm::Value *Stride = 8321 StrideExpr 8322 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8323 CGF.Int64Ty, /*isSigned=*/false) 8324 : nullptr; 8325 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8326 if (Stride) 8327 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8328 else 8329 CurStrides.push_back(DimProd); 8330 if (DI != DimSizes.end()) 8331 ++DI; 8332 } 8333 8334 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8335 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8336 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8337 } 8338 8339 /// Return the adjusted map modifiers if the declaration a capture refers to 8340 /// appears in a first-private clause. This is expected to be used only with 8341 /// directives that start with 'target'. 8342 MappableExprsHandler::OpenMPOffloadMappingFlags 8343 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8344 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8345 8346 // A first private variable captured by reference will use only the 8347 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8348 // declaration is known as first-private in this handler. 8349 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8350 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8351 return MappableExprsHandler::OMP_MAP_TO | 8352 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8353 return MappableExprsHandler::OMP_MAP_PRIVATE | 8354 MappableExprsHandler::OMP_MAP_TO; 8355 } 8356 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8357 if (I != LambdasMap.end()) 8358 // for map(to: lambda): using user specified map type. 8359 return getMapTypeBits( 8360 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8361 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8362 /*AddPtrFlag=*/false, 8363 /*AddIsTargetParamFlag=*/false, 8364 /*isNonContiguous=*/false); 8365 return MappableExprsHandler::OMP_MAP_TO | 8366 MappableExprsHandler::OMP_MAP_FROM; 8367 } 8368 8369 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8370 // Rotate by getFlagMemberOffset() bits. 8371 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8372 << getFlagMemberOffset()); 8373 } 8374 8375 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8376 OpenMPOffloadMappingFlags MemberOfFlag) { 8377 // If the entry is PTR_AND_OBJ but has not been marked with the special 8378 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8379 // marked as MEMBER_OF. 8380 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8381 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8382 return; 8383 8384 // Reset the placeholder value to prepare the flag for the assignment of the 8385 // proper MEMBER_OF value. 8386 Flags &= ~OMP_MAP_MEMBER_OF; 8387 Flags |= MemberOfFlag; 8388 } 8389 8390 void getPlainLayout(const CXXRecordDecl *RD, 8391 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8392 bool AsBase) const { 8393 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8394 8395 llvm::StructType *St = 8396 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8397 8398 unsigned NumElements = St->getNumElements(); 8399 llvm::SmallVector< 8400 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8401 RecordLayout(NumElements); 8402 8403 // Fill bases. 8404 for (const auto &I : RD->bases()) { 8405 if (I.isVirtual()) 8406 continue; 8407 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8408 // Ignore empty bases. 8409 if (Base->isEmpty() || CGF.getContext() 8410 .getASTRecordLayout(Base) 8411 .getNonVirtualSize() 8412 .isZero()) 8413 continue; 8414 8415 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8416 RecordLayout[FieldIndex] = Base; 8417 } 8418 // Fill in virtual bases. 8419 for (const auto &I : RD->vbases()) { 8420 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8421 // Ignore empty bases. 8422 if (Base->isEmpty()) 8423 continue; 8424 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8425 if (RecordLayout[FieldIndex]) 8426 continue; 8427 RecordLayout[FieldIndex] = Base; 8428 } 8429 // Fill in all the fields. 8430 assert(!RD->isUnion() && "Unexpected union."); 8431 for (const auto *Field : RD->fields()) { 8432 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8433 // will fill in later.) 8434 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8435 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8436 RecordLayout[FieldIndex] = Field; 8437 } 8438 } 8439 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8440 &Data : RecordLayout) { 8441 if (Data.isNull()) 8442 continue; 8443 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8444 getPlainLayout(Base, Layout, /*AsBase=*/true); 8445 else 8446 Layout.push_back(Data.get<const FieldDecl *>()); 8447 } 8448 } 8449 8450 /// Generate all the base pointers, section pointers, sizes, map types, and 8451 /// mappers for the extracted mappable expressions (all included in \a 8452 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8453 /// pair of the relevant declaration and index where it occurs is appended to 8454 /// the device pointers info array. 8455 void generateAllInfoForClauses( 8456 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8457 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8458 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8459 // We have to process the component lists that relate with the same 8460 // declaration in a single chunk so that we can generate the map flags 8461 // correctly. Therefore, we organize all lists in a map. 8462 enum MapKind { Present, Allocs, Other, Total }; 8463 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8464 SmallVector<SmallVector<MapInfo, 8>, 4>> 8465 Info; 8466 8467 // Helper function to fill the information map for the different supported 8468 // clauses. 8469 auto &&InfoGen = 8470 [&Info, &SkipVarSet]( 8471 const ValueDecl *D, MapKind Kind, 8472 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8473 OpenMPMapClauseKind MapType, 8474 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8475 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8476 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8477 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8478 if (SkipVarSet.contains(D)) 8479 return; 8480 auto It = Info.find(D); 8481 if (It == Info.end()) 8482 It = Info 8483 .insert(std::make_pair( 8484 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8485 .first; 8486 It->second[Kind].emplace_back( 8487 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8488 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8489 }; 8490 8491 for (const auto *Cl : Clauses) { 8492 const auto *C = dyn_cast<OMPMapClause>(Cl); 8493 if (!C) 8494 continue; 8495 MapKind Kind = Other; 8496 if (llvm::is_contained(C->getMapTypeModifiers(), 8497 OMPC_MAP_MODIFIER_present)) 8498 Kind = Present; 8499 else if (C->getMapType() == OMPC_MAP_alloc) 8500 Kind = Allocs; 8501 const auto *EI = C->getVarRefs().begin(); 8502 for (const auto L : C->component_lists()) { 8503 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8504 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8505 C->getMapTypeModifiers(), llvm::None, 8506 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8507 E); 8508 ++EI; 8509 } 8510 } 8511 for (const auto *Cl : Clauses) { 8512 const auto *C = dyn_cast<OMPToClause>(Cl); 8513 if (!C) 8514 continue; 8515 MapKind Kind = Other; 8516 if (llvm::is_contained(C->getMotionModifiers(), 8517 OMPC_MOTION_MODIFIER_present)) 8518 Kind = Present; 8519 const auto *EI = C->getVarRefs().begin(); 8520 for (const auto L : C->component_lists()) { 8521 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8522 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8523 C->isImplicit(), std::get<2>(L), *EI); 8524 ++EI; 8525 } 8526 } 8527 for (const auto *Cl : Clauses) { 8528 const auto *C = dyn_cast<OMPFromClause>(Cl); 8529 if (!C) 8530 continue; 8531 MapKind Kind = Other; 8532 if (llvm::is_contained(C->getMotionModifiers(), 8533 OMPC_MOTION_MODIFIER_present)) 8534 Kind = Present; 8535 const auto *EI = C->getVarRefs().begin(); 8536 for (const auto L : C->component_lists()) { 8537 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8538 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8539 C->isImplicit(), std::get<2>(L), *EI); 8540 ++EI; 8541 } 8542 } 8543 8544 // Look at the use_device_ptr clause information and mark the existing map 8545 // entries as such. If there is no map information for an entry in the 8546 // use_device_ptr list, we create one with map type 'alloc' and zero size 8547 // section. It is the user fault if that was not mapped before. If there is 8548 // no map information and the pointer is a struct member, then we defer the 8549 // emission of that entry until the whole struct has been processed. 8550 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8551 SmallVector<DeferredDevicePtrEntryTy, 4>> 8552 DeferredInfo; 8553 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8554 8555 for (const auto *Cl : Clauses) { 8556 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8557 if (!C) 8558 continue; 8559 for (const auto L : C->component_lists()) { 8560 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8561 std::get<1>(L); 8562 assert(!Components.empty() && 8563 "Not expecting empty list of components!"); 8564 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8565 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8566 const Expr *IE = Components.back().getAssociatedExpression(); 8567 // If the first component is a member expression, we have to look into 8568 // 'this', which maps to null in the map of map information. Otherwise 8569 // look directly for the information. 8570 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8571 8572 // We potentially have map information for this declaration already. 8573 // Look for the first set of components that refer to it. 8574 if (It != Info.end()) { 8575 bool Found = false; 8576 for (auto &Data : It->second) { 8577 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8578 return MI.Components.back().getAssociatedDeclaration() == VD; 8579 }); 8580 // If we found a map entry, signal that the pointer has to be 8581 // returned and move on to the next declaration. Exclude cases where 8582 // the base pointer is mapped as array subscript, array section or 8583 // array shaping. The base address is passed as a pointer to base in 8584 // this case and cannot be used as a base for use_device_ptr list 8585 // item. 8586 if (CI != Data.end()) { 8587 auto PrevCI = std::next(CI->Components.rbegin()); 8588 const auto *VarD = dyn_cast<VarDecl>(VD); 8589 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8590 isa<MemberExpr>(IE) || 8591 !VD->getType().getNonReferenceType()->isPointerType() || 8592 PrevCI == CI->Components.rend() || 8593 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8594 VarD->hasLocalStorage()) { 8595 CI->ReturnDevicePointer = true; 8596 Found = true; 8597 break; 8598 } 8599 } 8600 } 8601 if (Found) 8602 continue; 8603 } 8604 8605 // We didn't find any match in our map information - generate a zero 8606 // size array section - if the pointer is a struct member we defer this 8607 // action until the whole struct has been processed. 8608 if (isa<MemberExpr>(IE)) { 8609 // Insert the pointer into Info to be processed by 8610 // generateInfoForComponentList. Because it is a member pointer 8611 // without a pointee, no entry will be generated for it, therefore 8612 // we need to generate one after the whole struct has been processed. 8613 // Nonetheless, generateInfoForComponentList must be called to take 8614 // the pointer into account for the calculation of the range of the 8615 // partial struct. 8616 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8617 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8618 nullptr); 8619 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8620 } else { 8621 llvm::Value *Ptr = 8622 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8623 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8624 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8625 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8626 UseDevicePtrCombinedInfo.Sizes.push_back( 8627 llvm::Constant::getNullValue(CGF.Int64Ty)); 8628 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8629 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8630 } 8631 } 8632 } 8633 8634 // Look at the use_device_addr clause information and mark the existing map 8635 // entries as such. If there is no map information for an entry in the 8636 // use_device_addr list, we create one with map type 'alloc' and zero size 8637 // section. It is the user fault if that was not mapped before. If there is 8638 // no map information and the pointer is a struct member, then we defer the 8639 // emission of that entry until the whole struct has been processed. 8640 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8641 for (const auto *Cl : Clauses) { 8642 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8643 if (!C) 8644 continue; 8645 for (const auto L : C->component_lists()) { 8646 assert(!std::get<1>(L).empty() && 8647 "Not expecting empty list of components!"); 8648 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8649 if (!Processed.insert(VD).second) 8650 continue; 8651 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8652 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8653 // If the first component is a member expression, we have to look into 8654 // 'this', which maps to null in the map of map information. Otherwise 8655 // look directly for the information. 8656 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8657 8658 // We potentially have map information for this declaration already. 8659 // Look for the first set of components that refer to it. 8660 if (It != Info.end()) { 8661 bool Found = false; 8662 for (auto &Data : It->second) { 8663 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8664 return MI.Components.back().getAssociatedDeclaration() == VD; 8665 }); 8666 // If we found a map entry, signal that the pointer has to be 8667 // returned and move on to the next declaration. 8668 if (CI != Data.end()) { 8669 CI->ReturnDevicePointer = true; 8670 Found = true; 8671 break; 8672 } 8673 } 8674 if (Found) 8675 continue; 8676 } 8677 8678 // We didn't find any match in our map information - generate a zero 8679 // size array section - if the pointer is a struct member we defer this 8680 // action until the whole struct has been processed. 8681 if (isa<MemberExpr>(IE)) { 8682 // Insert the pointer into Info to be processed by 8683 // generateInfoForComponentList. Because it is a member pointer 8684 // without a pointee, no entry will be generated for it, therefore 8685 // we need to generate one after the whole struct has been processed. 8686 // Nonetheless, generateInfoForComponentList must be called to take 8687 // the pointer into account for the calculation of the range of the 8688 // partial struct. 8689 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8690 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8691 nullptr, nullptr, /*ForDeviceAddr=*/true); 8692 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8693 } else { 8694 llvm::Value *Ptr; 8695 if (IE->isGLValue()) 8696 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8697 else 8698 Ptr = CGF.EmitScalarExpr(IE); 8699 CombinedInfo.Exprs.push_back(VD); 8700 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8701 CombinedInfo.Pointers.push_back(Ptr); 8702 CombinedInfo.Sizes.push_back( 8703 llvm::Constant::getNullValue(CGF.Int64Ty)); 8704 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8705 CombinedInfo.Mappers.push_back(nullptr); 8706 } 8707 } 8708 } 8709 8710 for (const auto &Data : Info) { 8711 StructRangeInfoTy PartialStruct; 8712 // Temporary generated information. 8713 MapCombinedInfoTy CurInfo; 8714 const Decl *D = Data.first; 8715 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8716 for (const auto &M : Data.second) { 8717 for (const MapInfo &L : M) { 8718 assert(!L.Components.empty() && 8719 "Not expecting declaration with no component lists."); 8720 8721 // Remember the current base pointer index. 8722 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8723 CurInfo.NonContigInfo.IsNonContiguous = 8724 L.Components.back().isNonContiguous(); 8725 generateInfoForComponentList( 8726 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8727 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8728 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8729 8730 // If this entry relates with a device pointer, set the relevant 8731 // declaration and add the 'return pointer' flag. 8732 if (L.ReturnDevicePointer) { 8733 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8734 "Unexpected number of mapped base pointers."); 8735 8736 const ValueDecl *RelevantVD = 8737 L.Components.back().getAssociatedDeclaration(); 8738 assert(RelevantVD && 8739 "No relevant declaration related with device pointer??"); 8740 8741 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8742 RelevantVD); 8743 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8744 } 8745 } 8746 } 8747 8748 // Append any pending zero-length pointers which are struct members and 8749 // used with use_device_ptr or use_device_addr. 8750 auto CI = DeferredInfo.find(Data.first); 8751 if (CI != DeferredInfo.end()) { 8752 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8753 llvm::Value *BasePtr; 8754 llvm::Value *Ptr; 8755 if (L.ForDeviceAddr) { 8756 if (L.IE->isGLValue()) 8757 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8758 else 8759 Ptr = this->CGF.EmitScalarExpr(L.IE); 8760 BasePtr = Ptr; 8761 // Entry is RETURN_PARAM. Also, set the placeholder value 8762 // MEMBER_OF=FFFF so that the entry is later updated with the 8763 // correct value of MEMBER_OF. 8764 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8765 } else { 8766 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8767 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8768 L.IE->getExprLoc()); 8769 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8770 // placeholder value MEMBER_OF=FFFF so that the entry is later 8771 // updated with the correct value of MEMBER_OF. 8772 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8773 OMP_MAP_MEMBER_OF); 8774 } 8775 CurInfo.Exprs.push_back(L.VD); 8776 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8777 CurInfo.Pointers.push_back(Ptr); 8778 CurInfo.Sizes.push_back( 8779 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8780 CurInfo.Mappers.push_back(nullptr); 8781 } 8782 } 8783 // If there is an entry in PartialStruct it means we have a struct with 8784 // individual members mapped. Emit an extra combined entry. 8785 if (PartialStruct.Base.isValid()) { 8786 CurInfo.NonContigInfo.Dims.push_back(0); 8787 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8788 } 8789 8790 // We need to append the results of this capture to what we already 8791 // have. 8792 CombinedInfo.append(CurInfo); 8793 } 8794 // Append data for use_device_ptr clauses. 8795 CombinedInfo.append(UseDevicePtrCombinedInfo); 8796 } 8797 8798 public: 8799 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8800 : CurDir(&Dir), CGF(CGF) { 8801 // Extract firstprivate clause information. 8802 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8803 for (const auto *D : C->varlists()) 8804 FirstPrivateDecls.try_emplace( 8805 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8806 // Extract implicit firstprivates from uses_allocators clauses. 8807 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8808 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8809 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8810 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8811 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8812 /*Implicit=*/true); 8813 else if (const auto *VD = dyn_cast<VarDecl>( 8814 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8815 ->getDecl())) 8816 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8817 } 8818 } 8819 // Extract device pointer clause information. 8820 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8821 for (auto L : C->component_lists()) 8822 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8823 // Extract map information. 8824 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8825 if (C->getMapType() != OMPC_MAP_to) 8826 continue; 8827 for (auto L : C->component_lists()) { 8828 const ValueDecl *VD = std::get<0>(L); 8829 const auto *RD = VD ? VD->getType() 8830 .getCanonicalType() 8831 .getNonReferenceType() 8832 ->getAsCXXRecordDecl() 8833 : nullptr; 8834 if (RD && RD->isLambda()) 8835 LambdasMap.try_emplace(std::get<0>(L), C); 8836 } 8837 } 8838 } 8839 8840 /// Constructor for the declare mapper directive. 8841 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8842 : CurDir(&Dir), CGF(CGF) {} 8843 8844 /// Generate code for the combined entry if we have a partially mapped struct 8845 /// and take care of the mapping flags of the arguments corresponding to 8846 /// individual struct members. 8847 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8848 MapFlagsArrayTy &CurTypes, 8849 const StructRangeInfoTy &PartialStruct, 8850 const ValueDecl *VD = nullptr, 8851 bool NotTargetParams = true) const { 8852 if (CurTypes.size() == 1 && 8853 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8854 !PartialStruct.IsArraySection) 8855 return; 8856 Address LBAddr = PartialStruct.LowestElem.second; 8857 Address HBAddr = PartialStruct.HighestElem.second; 8858 if (PartialStruct.HasCompleteRecord) { 8859 LBAddr = PartialStruct.LB; 8860 HBAddr = PartialStruct.LB; 8861 } 8862 CombinedInfo.Exprs.push_back(VD); 8863 // Base is the base of the struct 8864 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8865 // Pointer is the address of the lowest element 8866 llvm::Value *LB = LBAddr.getPointer(); 8867 CombinedInfo.Pointers.push_back(LB); 8868 // There should not be a mapper for a combined entry. 8869 CombinedInfo.Mappers.push_back(nullptr); 8870 // Size is (addr of {highest+1} element) - (addr of lowest element) 8871 llvm::Value *HB = HBAddr.getPointer(); 8872 llvm::Value *HAddr = 8873 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8874 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8875 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8876 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8877 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8878 /*isSigned=*/false); 8879 CombinedInfo.Sizes.push_back(Size); 8880 // Map type is always TARGET_PARAM, if generate info for captures. 8881 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8882 : OMP_MAP_TARGET_PARAM); 8883 // If any element has the present modifier, then make sure the runtime 8884 // doesn't attempt to allocate the struct. 8885 if (CurTypes.end() != 8886 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8887 return Type & OMP_MAP_PRESENT; 8888 })) 8889 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8890 // Remove TARGET_PARAM flag from the first element 8891 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8892 // If any element has the ompx_hold modifier, then make sure the runtime 8893 // uses the hold reference count for the struct as a whole so that it won't 8894 // be unmapped by an extra dynamic reference count decrement. Add it to all 8895 // elements as well so the runtime knows which reference count to check 8896 // when determining whether it's time for device-to-host transfers of 8897 // individual elements. 8898 if (CurTypes.end() != 8899 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8900 return Type & OMP_MAP_OMPX_HOLD; 8901 })) { 8902 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8903 for (auto &M : CurTypes) 8904 M |= OMP_MAP_OMPX_HOLD; 8905 } 8906 8907 // All other current entries will be MEMBER_OF the combined entry 8908 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8909 // 0xFFFF in the MEMBER_OF field). 8910 OpenMPOffloadMappingFlags MemberOfFlag = 8911 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8912 for (auto &M : CurTypes) 8913 setCorrectMemberOfFlag(M, MemberOfFlag); 8914 } 8915 8916 /// Generate all the base pointers, section pointers, sizes, map types, and 8917 /// mappers for the extracted mappable expressions (all included in \a 8918 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8919 /// pair of the relevant declaration and index where it occurs is appended to 8920 /// the device pointers info array. 8921 void generateAllInfo( 8922 MapCombinedInfoTy &CombinedInfo, 8923 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8924 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8925 assert(CurDir.is<const OMPExecutableDirective *>() && 8926 "Expect a executable directive"); 8927 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8928 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8929 } 8930 8931 /// Generate all the base pointers, section pointers, sizes, map types, and 8932 /// mappers for the extracted map clauses of user-defined mapper (all included 8933 /// in \a CombinedInfo). 8934 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8935 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8936 "Expect a declare mapper directive"); 8937 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8938 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8939 } 8940 8941 /// Emit capture info for lambdas for variables captured by reference. 8942 void generateInfoForLambdaCaptures( 8943 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8944 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8945 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8946 const auto *RD = VDType->getAsCXXRecordDecl(); 8947 if (!RD || !RD->isLambda()) 8948 return; 8949 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8950 CGF.getContext().getDeclAlign(VD)); 8951 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8952 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8953 FieldDecl *ThisCapture = nullptr; 8954 RD->getCaptureFields(Captures, ThisCapture); 8955 if (ThisCapture) { 8956 LValue ThisLVal = 8957 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8958 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8959 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8960 VDLVal.getPointer(CGF)); 8961 CombinedInfo.Exprs.push_back(VD); 8962 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8963 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8964 CombinedInfo.Sizes.push_back( 8965 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8966 CGF.Int64Ty, /*isSigned=*/true)); 8967 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8968 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8969 CombinedInfo.Mappers.push_back(nullptr); 8970 } 8971 for (const LambdaCapture &LC : RD->captures()) { 8972 if (!LC.capturesVariable()) 8973 continue; 8974 const VarDecl *VD = LC.getCapturedVar(); 8975 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8976 continue; 8977 auto It = Captures.find(VD); 8978 assert(It != Captures.end() && "Found lambda capture without field."); 8979 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8980 if (LC.getCaptureKind() == LCK_ByRef) { 8981 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8982 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8983 VDLVal.getPointer(CGF)); 8984 CombinedInfo.Exprs.push_back(VD); 8985 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8986 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8987 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8988 CGF.getTypeSize( 8989 VD->getType().getCanonicalType().getNonReferenceType()), 8990 CGF.Int64Ty, /*isSigned=*/true)); 8991 } else { 8992 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8993 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8994 VDLVal.getPointer(CGF)); 8995 CombinedInfo.Exprs.push_back(VD); 8996 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8997 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8998 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8999 } 9000 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9001 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9002 CombinedInfo.Mappers.push_back(nullptr); 9003 } 9004 } 9005 9006 /// Set correct indices for lambdas captures. 9007 void adjustMemberOfForLambdaCaptures( 9008 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9009 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9010 MapFlagsArrayTy &Types) const { 9011 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9012 // Set correct member_of idx for all implicit lambda captures. 9013 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9014 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9015 continue; 9016 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9017 assert(BasePtr && "Unable to find base lambda address."); 9018 int TgtIdx = -1; 9019 for (unsigned J = I; J > 0; --J) { 9020 unsigned Idx = J - 1; 9021 if (Pointers[Idx] != BasePtr) 9022 continue; 9023 TgtIdx = Idx; 9024 break; 9025 } 9026 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9027 // All other current entries will be MEMBER_OF the combined entry 9028 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9029 // 0xFFFF in the MEMBER_OF field). 9030 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9031 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9032 } 9033 } 9034 9035 /// Generate the base pointers, section pointers, sizes, map types, and 9036 /// mappers associated to a given capture (all included in \a CombinedInfo). 9037 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9038 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9039 StructRangeInfoTy &PartialStruct) const { 9040 assert(!Cap->capturesVariableArrayType() && 9041 "Not expecting to generate map info for a variable array type!"); 9042 9043 // We need to know when we generating information for the first component 9044 const ValueDecl *VD = Cap->capturesThis() 9045 ? nullptr 9046 : Cap->getCapturedVar()->getCanonicalDecl(); 9047 9048 // for map(to: lambda): skip here, processing it in 9049 // generateDefaultMapInfo 9050 if (LambdasMap.count(VD)) 9051 return; 9052 9053 // If this declaration appears in a is_device_ptr clause we just have to 9054 // pass the pointer by value. If it is a reference to a declaration, we just 9055 // pass its value. 9056 if (DevPointersMap.count(VD)) { 9057 CombinedInfo.Exprs.push_back(VD); 9058 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9059 CombinedInfo.Pointers.push_back(Arg); 9060 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9061 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9062 /*isSigned=*/true)); 9063 CombinedInfo.Types.push_back( 9064 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9065 OMP_MAP_TARGET_PARAM); 9066 CombinedInfo.Mappers.push_back(nullptr); 9067 return; 9068 } 9069 9070 using MapData = 9071 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9072 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9073 const ValueDecl *, const Expr *>; 9074 SmallVector<MapData, 4> DeclComponentLists; 9075 assert(CurDir.is<const OMPExecutableDirective *>() && 9076 "Expect a executable directive"); 9077 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9078 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9079 const auto *EI = C->getVarRefs().begin(); 9080 for (const auto L : C->decl_component_lists(VD)) { 9081 const ValueDecl *VDecl, *Mapper; 9082 // The Expression is not correct if the mapping is implicit 9083 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9084 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9085 std::tie(VDecl, Components, Mapper) = L; 9086 assert(VDecl == VD && "We got information for the wrong declaration??"); 9087 assert(!Components.empty() && 9088 "Not expecting declaration with no component lists."); 9089 DeclComponentLists.emplace_back(Components, C->getMapType(), 9090 C->getMapTypeModifiers(), 9091 C->isImplicit(), Mapper, E); 9092 ++EI; 9093 } 9094 } 9095 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9096 const MapData &RHS) { 9097 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9098 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9099 bool HasPresent = 9100 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9101 bool HasAllocs = MapType == OMPC_MAP_alloc; 9102 MapModifiers = std::get<2>(RHS); 9103 MapType = std::get<1>(LHS); 9104 bool HasPresentR = 9105 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9106 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9107 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9108 }); 9109 9110 // Find overlapping elements (including the offset from the base element). 9111 llvm::SmallDenseMap< 9112 const MapData *, 9113 llvm::SmallVector< 9114 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9115 4> 9116 OverlappedData; 9117 size_t Count = 0; 9118 for (const MapData &L : DeclComponentLists) { 9119 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9120 OpenMPMapClauseKind MapType; 9121 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9122 bool IsImplicit; 9123 const ValueDecl *Mapper; 9124 const Expr *VarRef; 9125 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9126 L; 9127 ++Count; 9128 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9129 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9130 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9131 VarRef) = L1; 9132 auto CI = Components.rbegin(); 9133 auto CE = Components.rend(); 9134 auto SI = Components1.rbegin(); 9135 auto SE = Components1.rend(); 9136 for (; CI != CE && SI != SE; ++CI, ++SI) { 9137 if (CI->getAssociatedExpression()->getStmtClass() != 9138 SI->getAssociatedExpression()->getStmtClass()) 9139 break; 9140 // Are we dealing with different variables/fields? 9141 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9142 break; 9143 } 9144 // Found overlapping if, at least for one component, reached the head 9145 // of the components list. 9146 if (CI == CE || SI == SE) { 9147 // Ignore it if it is the same component. 9148 if (CI == CE && SI == SE) 9149 continue; 9150 const auto It = (SI == SE) ? CI : SI; 9151 // If one component is a pointer and another one is a kind of 9152 // dereference of this pointer (array subscript, section, dereference, 9153 // etc.), it is not an overlapping. 9154 // Same, if one component is a base and another component is a 9155 // dereferenced pointer memberexpr with the same base. 9156 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9157 (std::prev(It)->getAssociatedDeclaration() && 9158 std::prev(It) 9159 ->getAssociatedDeclaration() 9160 ->getType() 9161 ->isPointerType()) || 9162 (It->getAssociatedDeclaration() && 9163 It->getAssociatedDeclaration()->getType()->isPointerType() && 9164 std::next(It) != CE && std::next(It) != SE)) 9165 continue; 9166 const MapData &BaseData = CI == CE ? L : L1; 9167 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9168 SI == SE ? Components : Components1; 9169 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9170 OverlappedElements.getSecond().push_back(SubData); 9171 } 9172 } 9173 } 9174 // Sort the overlapped elements for each item. 9175 llvm::SmallVector<const FieldDecl *, 4> Layout; 9176 if (!OverlappedData.empty()) { 9177 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9178 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9179 while (BaseType != OrigType) { 9180 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9181 OrigType = BaseType->getPointeeOrArrayElementType(); 9182 } 9183 9184 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9185 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9186 else { 9187 const auto *RD = BaseType->getAsRecordDecl(); 9188 Layout.append(RD->field_begin(), RD->field_end()); 9189 } 9190 } 9191 for (auto &Pair : OverlappedData) { 9192 llvm::stable_sort( 9193 Pair.getSecond(), 9194 [&Layout]( 9195 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9196 OMPClauseMappableExprCommon::MappableExprComponentListRef 9197 Second) { 9198 auto CI = First.rbegin(); 9199 auto CE = First.rend(); 9200 auto SI = Second.rbegin(); 9201 auto SE = Second.rend(); 9202 for (; CI != CE && SI != SE; ++CI, ++SI) { 9203 if (CI->getAssociatedExpression()->getStmtClass() != 9204 SI->getAssociatedExpression()->getStmtClass()) 9205 break; 9206 // Are we dealing with different variables/fields? 9207 if (CI->getAssociatedDeclaration() != 9208 SI->getAssociatedDeclaration()) 9209 break; 9210 } 9211 9212 // Lists contain the same elements. 9213 if (CI == CE && SI == SE) 9214 return false; 9215 9216 // List with less elements is less than list with more elements. 9217 if (CI == CE || SI == SE) 9218 return CI == CE; 9219 9220 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9221 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9222 if (FD1->getParent() == FD2->getParent()) 9223 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9224 const auto *It = 9225 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9226 return FD == FD1 || FD == FD2; 9227 }); 9228 return *It == FD1; 9229 }); 9230 } 9231 9232 // Associated with a capture, because the mapping flags depend on it. 9233 // Go through all of the elements with the overlapped elements. 9234 bool IsFirstComponentList = true; 9235 for (const auto &Pair : OverlappedData) { 9236 const MapData &L = *Pair.getFirst(); 9237 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9238 OpenMPMapClauseKind MapType; 9239 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9240 bool IsImplicit; 9241 const ValueDecl *Mapper; 9242 const Expr *VarRef; 9243 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9244 L; 9245 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9246 OverlappedComponents = Pair.getSecond(); 9247 generateInfoForComponentList( 9248 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9249 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9250 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9251 IsFirstComponentList = false; 9252 } 9253 // Go through other elements without overlapped elements. 9254 for (const MapData &L : DeclComponentLists) { 9255 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9256 OpenMPMapClauseKind MapType; 9257 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9258 bool IsImplicit; 9259 const ValueDecl *Mapper; 9260 const Expr *VarRef; 9261 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9262 L; 9263 auto It = OverlappedData.find(&L); 9264 if (It == OverlappedData.end()) 9265 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9266 Components, CombinedInfo, PartialStruct, 9267 IsFirstComponentList, IsImplicit, Mapper, 9268 /*ForDeviceAddr=*/false, VD, VarRef); 9269 IsFirstComponentList = false; 9270 } 9271 } 9272 9273 /// Generate the default map information for a given capture \a CI, 9274 /// record field declaration \a RI and captured value \a CV. 9275 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9276 const FieldDecl &RI, llvm::Value *CV, 9277 MapCombinedInfoTy &CombinedInfo) const { 9278 bool IsImplicit = true; 9279 // Do the default mapping. 9280 if (CI.capturesThis()) { 9281 CombinedInfo.Exprs.push_back(nullptr); 9282 CombinedInfo.BasePointers.push_back(CV); 9283 CombinedInfo.Pointers.push_back(CV); 9284 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9285 CombinedInfo.Sizes.push_back( 9286 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9287 CGF.Int64Ty, /*isSigned=*/true)); 9288 // Default map type. 9289 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9290 } else if (CI.capturesVariableByCopy()) { 9291 const VarDecl *VD = CI.getCapturedVar(); 9292 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9293 CombinedInfo.BasePointers.push_back(CV); 9294 CombinedInfo.Pointers.push_back(CV); 9295 if (!RI.getType()->isAnyPointerType()) { 9296 // We have to signal to the runtime captures passed by value that are 9297 // not pointers. 9298 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9299 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9300 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9301 } else { 9302 // Pointers are implicitly mapped with a zero size and no flags 9303 // (other than first map that is added for all implicit maps). 9304 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9305 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9306 } 9307 auto I = FirstPrivateDecls.find(VD); 9308 if (I != FirstPrivateDecls.end()) 9309 IsImplicit = I->getSecond(); 9310 } else { 9311 assert(CI.capturesVariable() && "Expected captured reference."); 9312 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9313 QualType ElementType = PtrTy->getPointeeType(); 9314 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9315 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9316 // The default map type for a scalar/complex type is 'to' because by 9317 // default the value doesn't have to be retrieved. For an aggregate 9318 // type, the default is 'tofrom'. 9319 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9320 const VarDecl *VD = CI.getCapturedVar(); 9321 auto I = FirstPrivateDecls.find(VD); 9322 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9323 CombinedInfo.BasePointers.push_back(CV); 9324 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9325 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9326 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9327 AlignmentSource::Decl)); 9328 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9329 } else { 9330 CombinedInfo.Pointers.push_back(CV); 9331 } 9332 if (I != FirstPrivateDecls.end()) 9333 IsImplicit = I->getSecond(); 9334 } 9335 // Every default map produces a single argument which is a target parameter. 9336 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9337 9338 // Add flag stating this is an implicit map. 9339 if (IsImplicit) 9340 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9341 9342 // No user-defined mapper for default mapping. 9343 CombinedInfo.Mappers.push_back(nullptr); 9344 } 9345 }; 9346 } // anonymous namespace 9347 9348 static void emitNonContiguousDescriptor( 9349 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9350 CGOpenMPRuntime::TargetDataInfo &Info) { 9351 CodeGenModule &CGM = CGF.CGM; 9352 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9353 &NonContigInfo = CombinedInfo.NonContigInfo; 9354 9355 // Build an array of struct descriptor_dim and then assign it to 9356 // offload_args. 9357 // 9358 // struct descriptor_dim { 9359 // uint64_t offset; 9360 // uint64_t count; 9361 // uint64_t stride 9362 // }; 9363 ASTContext &C = CGF.getContext(); 9364 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9365 RecordDecl *RD; 9366 RD = C.buildImplicitRecord("descriptor_dim"); 9367 RD->startDefinition(); 9368 addFieldToRecordDecl(C, RD, Int64Ty); 9369 addFieldToRecordDecl(C, RD, Int64Ty); 9370 addFieldToRecordDecl(C, RD, Int64Ty); 9371 RD->completeDefinition(); 9372 QualType DimTy = C.getRecordType(RD); 9373 9374 enum { OffsetFD = 0, CountFD, StrideFD }; 9375 // We need two index variable here since the size of "Dims" is the same as the 9376 // size of Components, however, the size of offset, count, and stride is equal 9377 // to the size of base declaration that is non-contiguous. 9378 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9379 // Skip emitting ir if dimension size is 1 since it cannot be 9380 // non-contiguous. 9381 if (NonContigInfo.Dims[I] == 1) 9382 continue; 9383 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9384 QualType ArrayTy = 9385 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9386 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9387 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9388 unsigned RevIdx = EE - II - 1; 9389 LValue DimsLVal = CGF.MakeAddrLValue( 9390 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9391 // Offset 9392 LValue OffsetLVal = CGF.EmitLValueForField( 9393 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9394 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9395 // Count 9396 LValue CountLVal = CGF.EmitLValueForField( 9397 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9398 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9399 // Stride 9400 LValue StrideLVal = CGF.EmitLValueForField( 9401 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9402 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9403 } 9404 // args[I] = &dims 9405 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9406 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9407 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9408 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9409 Info.PointersArray, 0, I); 9410 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); 9411 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9412 ++L; 9413 } 9414 } 9415 9416 // Try to extract the base declaration from a `this->x` expression if possible. 9417 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9418 if (!E) 9419 return nullptr; 9420 9421 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9422 if (const MemberExpr *ME = 9423 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9424 return ME->getMemberDecl(); 9425 return nullptr; 9426 } 9427 9428 /// Emit a string constant containing the names of the values mapped to the 9429 /// offloading runtime library. 9430 llvm::Constant * 9431 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9432 MappableExprsHandler::MappingExprInfo &MapExprs) { 9433 9434 uint32_t SrcLocStrSize; 9435 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9436 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9437 9438 SourceLocation Loc; 9439 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9440 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9441 Loc = VD->getLocation(); 9442 else 9443 Loc = MapExprs.getMapExpr()->getExprLoc(); 9444 } else { 9445 Loc = MapExprs.getMapDecl()->getLocation(); 9446 } 9447 9448 std::string ExprName; 9449 if (MapExprs.getMapExpr()) { 9450 PrintingPolicy P(CGF.getContext().getLangOpts()); 9451 llvm::raw_string_ostream OS(ExprName); 9452 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9453 OS.flush(); 9454 } else { 9455 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9456 } 9457 9458 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9459 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9460 PLoc.getLine(), PLoc.getColumn(), 9461 SrcLocStrSize); 9462 } 9463 9464 /// Emit the arrays used to pass the captures and map information to the 9465 /// offloading runtime library. If there is no map or capture information, 9466 /// return nullptr by reference. 9467 static void emitOffloadingArrays( 9468 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9469 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9470 bool IsNonContiguous = false) { 9471 CodeGenModule &CGM = CGF.CGM; 9472 ASTContext &Ctx = CGF.getContext(); 9473 9474 // Reset the array information. 9475 Info.clearArrayInfo(); 9476 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9477 9478 if (Info.NumberOfPtrs) { 9479 // Detect if we have any capture size requiring runtime evaluation of the 9480 // size so that a constant array could be eventually used. 9481 9482 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9483 QualType PointerArrayType = Ctx.getConstantArrayType( 9484 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9485 /*IndexTypeQuals=*/0); 9486 9487 Info.BasePointersArray = 9488 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9489 Info.PointersArray = 9490 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9491 Address MappersArray = 9492 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9493 Info.MappersArray = MappersArray.getPointer(); 9494 9495 // If we don't have any VLA types or other types that require runtime 9496 // evaluation, we can use a constant array for the map sizes, otherwise we 9497 // need to fill up the arrays as we do for the pointers. 9498 QualType Int64Ty = 9499 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9500 SmallVector<llvm::Constant *> ConstSizes( 9501 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9502 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9503 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9504 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9505 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9506 if (IsNonContiguous && (CombinedInfo.Types[I] & 9507 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9508 ConstSizes[I] = llvm::ConstantInt::get( 9509 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9510 else 9511 ConstSizes[I] = CI; 9512 continue; 9513 } 9514 } 9515 RuntimeSizes.set(I); 9516 } 9517 9518 if (RuntimeSizes.all()) { 9519 QualType SizeArrayType = Ctx.getConstantArrayType( 9520 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9521 /*IndexTypeQuals=*/0); 9522 Info.SizesArray = 9523 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9524 } else { 9525 auto *SizesArrayInit = llvm::ConstantArray::get( 9526 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9527 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9528 auto *SizesArrayGbl = new llvm::GlobalVariable( 9529 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9530 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9531 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9532 if (RuntimeSizes.any()) { 9533 QualType SizeArrayType = Ctx.getConstantArrayType( 9534 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9535 /*IndexTypeQuals=*/0); 9536 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9537 llvm::Value *GblConstPtr = 9538 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9539 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9540 CGF.Builder.CreateMemCpy( 9541 Buffer, 9542 Address(GblConstPtr, CGM.Int64Ty, 9543 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9544 /*DestWidth=*/64, /*Signed=*/false))), 9545 CGF.getTypeSize(SizeArrayType)); 9546 Info.SizesArray = Buffer.getPointer(); 9547 } else { 9548 Info.SizesArray = SizesArrayGbl; 9549 } 9550 } 9551 9552 // The map types are always constant so we don't need to generate code to 9553 // fill arrays. Instead, we create an array constant. 9554 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9555 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9556 std::string MaptypesName = 9557 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9558 auto *MapTypesArrayGbl = 9559 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9560 Info.MapTypesArray = MapTypesArrayGbl; 9561 9562 // The information types are only built if there is debug information 9563 // requested. 9564 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9565 Info.MapNamesArray = llvm::Constant::getNullValue( 9566 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9567 } else { 9568 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9569 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9570 }; 9571 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9572 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9573 std::string MapnamesName = 9574 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9575 auto *MapNamesArrayGbl = 9576 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9577 Info.MapNamesArray = MapNamesArrayGbl; 9578 } 9579 9580 // If there's a present map type modifier, it must not be applied to the end 9581 // of a region, so generate a separate map type array in that case. 9582 if (Info.separateBeginEndCalls()) { 9583 bool EndMapTypesDiffer = false; 9584 for (uint64_t &Type : Mapping) { 9585 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9586 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9587 EndMapTypesDiffer = true; 9588 } 9589 } 9590 if (EndMapTypesDiffer) { 9591 MapTypesArrayGbl = 9592 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9593 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9594 } 9595 } 9596 9597 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9598 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9599 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9600 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9601 Info.BasePointersArray, 0, I); 9602 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9603 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9604 Address BPAddr(BP, BPVal->getType(), 9605 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9606 CGF.Builder.CreateStore(BPVal, BPAddr); 9607 9608 if (Info.requiresDevicePointerInfo()) 9609 if (const ValueDecl *DevVD = 9610 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9611 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9612 9613 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9614 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9615 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9616 Info.PointersArray, 0, I); 9617 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9618 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9619 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9620 CGF.Builder.CreateStore(PVal, PAddr); 9621 9622 if (RuntimeSizes.test(I)) { 9623 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9624 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9625 Info.SizesArray, 9626 /*Idx0=*/0, 9627 /*Idx1=*/I); 9628 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); 9629 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9630 CGM.Int64Ty, 9631 /*isSigned=*/true), 9632 SAddr); 9633 } 9634 9635 // Fill up the mapper array. 9636 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9637 if (CombinedInfo.Mappers[I]) { 9638 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9639 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9640 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9641 Info.HasMapper = true; 9642 } 9643 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9644 CGF.Builder.CreateStore(MFunc, MAddr); 9645 } 9646 } 9647 9648 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9649 Info.NumberOfPtrs == 0) 9650 return; 9651 9652 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9653 } 9654 9655 namespace { 9656 /// Additional arguments for emitOffloadingArraysArgument function. 9657 struct ArgumentsOptions { 9658 bool ForEndCall = false; 9659 ArgumentsOptions() = default; 9660 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9661 }; 9662 } // namespace 9663 9664 /// Emit the arguments to be passed to the runtime library based on the 9665 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9666 /// ForEndCall, emit map types to be passed for the end of the region instead of 9667 /// the beginning. 9668 static void emitOffloadingArraysArgument( 9669 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9670 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9671 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9672 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9673 const ArgumentsOptions &Options = ArgumentsOptions()) { 9674 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9675 "expected region end call to runtime only when end call is separate"); 9676 CodeGenModule &CGM = CGF.CGM; 9677 if (Info.NumberOfPtrs) { 9678 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9679 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9680 Info.BasePointersArray, 9681 /*Idx0=*/0, /*Idx1=*/0); 9682 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9683 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9684 Info.PointersArray, 9685 /*Idx0=*/0, 9686 /*Idx1=*/0); 9687 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9688 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9689 /*Idx0=*/0, /*Idx1=*/0); 9690 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9691 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9692 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9693 : Info.MapTypesArray, 9694 /*Idx0=*/0, 9695 /*Idx1=*/0); 9696 9697 // Only emit the mapper information arrays if debug information is 9698 // requested. 9699 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9700 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9701 else 9702 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9703 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9704 Info.MapNamesArray, 9705 /*Idx0=*/0, 9706 /*Idx1=*/0); 9707 // If there is no user-defined mapper, set the mapper array to nullptr to 9708 // avoid an unnecessary data privatization 9709 if (!Info.HasMapper) 9710 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9711 else 9712 MappersArrayArg = 9713 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9714 } else { 9715 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9716 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9717 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9718 MapTypesArrayArg = 9719 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9720 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9721 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9722 } 9723 } 9724 9725 /// Check for inner distribute directive. 9726 static const OMPExecutableDirective * 9727 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9728 const auto *CS = D.getInnermostCapturedStmt(); 9729 const auto *Body = 9730 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9731 const Stmt *ChildStmt = 9732 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9733 9734 if (const auto *NestedDir = 9735 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9736 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9737 switch (D.getDirectiveKind()) { 9738 case OMPD_target: 9739 if (isOpenMPDistributeDirective(DKind)) 9740 return NestedDir; 9741 if (DKind == OMPD_teams) { 9742 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9743 /*IgnoreCaptured=*/true); 9744 if (!Body) 9745 return nullptr; 9746 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9747 if (const auto *NND = 9748 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9749 DKind = NND->getDirectiveKind(); 9750 if (isOpenMPDistributeDirective(DKind)) 9751 return NND; 9752 } 9753 } 9754 return nullptr; 9755 case OMPD_target_teams: 9756 if (isOpenMPDistributeDirective(DKind)) 9757 return NestedDir; 9758 return nullptr; 9759 case OMPD_target_parallel: 9760 case OMPD_target_simd: 9761 case OMPD_target_parallel_for: 9762 case OMPD_target_parallel_for_simd: 9763 return nullptr; 9764 case OMPD_target_teams_distribute: 9765 case OMPD_target_teams_distribute_simd: 9766 case OMPD_target_teams_distribute_parallel_for: 9767 case OMPD_target_teams_distribute_parallel_for_simd: 9768 case OMPD_parallel: 9769 case OMPD_for: 9770 case OMPD_parallel_for: 9771 case OMPD_parallel_master: 9772 case OMPD_parallel_sections: 9773 case OMPD_for_simd: 9774 case OMPD_parallel_for_simd: 9775 case OMPD_cancel: 9776 case OMPD_cancellation_point: 9777 case OMPD_ordered: 9778 case OMPD_threadprivate: 9779 case OMPD_allocate: 9780 case OMPD_task: 9781 case OMPD_simd: 9782 case OMPD_tile: 9783 case OMPD_unroll: 9784 case OMPD_sections: 9785 case OMPD_section: 9786 case OMPD_single: 9787 case OMPD_master: 9788 case OMPD_critical: 9789 case OMPD_taskyield: 9790 case OMPD_barrier: 9791 case OMPD_taskwait: 9792 case OMPD_taskgroup: 9793 case OMPD_atomic: 9794 case OMPD_flush: 9795 case OMPD_depobj: 9796 case OMPD_scan: 9797 case OMPD_teams: 9798 case OMPD_target_data: 9799 case OMPD_target_exit_data: 9800 case OMPD_target_enter_data: 9801 case OMPD_distribute: 9802 case OMPD_distribute_simd: 9803 case OMPD_distribute_parallel_for: 9804 case OMPD_distribute_parallel_for_simd: 9805 case OMPD_teams_distribute: 9806 case OMPD_teams_distribute_simd: 9807 case OMPD_teams_distribute_parallel_for: 9808 case OMPD_teams_distribute_parallel_for_simd: 9809 case OMPD_target_update: 9810 case OMPD_declare_simd: 9811 case OMPD_declare_variant: 9812 case OMPD_begin_declare_variant: 9813 case OMPD_end_declare_variant: 9814 case OMPD_declare_target: 9815 case OMPD_end_declare_target: 9816 case OMPD_declare_reduction: 9817 case OMPD_declare_mapper: 9818 case OMPD_taskloop: 9819 case OMPD_taskloop_simd: 9820 case OMPD_master_taskloop: 9821 case OMPD_master_taskloop_simd: 9822 case OMPD_parallel_master_taskloop: 9823 case OMPD_parallel_master_taskloop_simd: 9824 case OMPD_requires: 9825 case OMPD_metadirective: 9826 case OMPD_unknown: 9827 default: 9828 llvm_unreachable("Unexpected directive."); 9829 } 9830 } 9831 9832 return nullptr; 9833 } 9834 9835 /// Emit the user-defined mapper function. The code generation follows the 9836 /// pattern in the example below. 9837 /// \code 9838 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9839 /// void *base, void *begin, 9840 /// int64_t size, int64_t type, 9841 /// void *name = nullptr) { 9842 /// // Allocate space for an array section first or add a base/begin for 9843 /// // pointer dereference. 9844 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9845 /// !maptype.IsDelete) 9846 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9847 /// size*sizeof(Ty), clearToFromMember(type)); 9848 /// // Map members. 9849 /// for (unsigned i = 0; i < size; i++) { 9850 /// // For each component specified by this mapper: 9851 /// for (auto c : begin[i]->all_components) { 9852 /// if (c.hasMapper()) 9853 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9854 /// c.arg_type, c.arg_name); 9855 /// else 9856 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9857 /// c.arg_begin, c.arg_size, c.arg_type, 9858 /// c.arg_name); 9859 /// } 9860 /// } 9861 /// // Delete the array section. 9862 /// if (size > 1 && maptype.IsDelete) 9863 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9864 /// size*sizeof(Ty), clearToFromMember(type)); 9865 /// } 9866 /// \endcode 9867 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9868 CodeGenFunction *CGF) { 9869 if (UDMMap.count(D) > 0) 9870 return; 9871 ASTContext &C = CGM.getContext(); 9872 QualType Ty = D->getType(); 9873 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9874 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9875 auto *MapperVarDecl = 9876 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9877 SourceLocation Loc = D->getLocation(); 9878 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9879 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9880 9881 // Prepare mapper function arguments and attributes. 9882 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9883 C.VoidPtrTy, ImplicitParamDecl::Other); 9884 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9885 ImplicitParamDecl::Other); 9886 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9887 C.VoidPtrTy, ImplicitParamDecl::Other); 9888 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9889 ImplicitParamDecl::Other); 9890 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9891 ImplicitParamDecl::Other); 9892 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9893 ImplicitParamDecl::Other); 9894 FunctionArgList Args; 9895 Args.push_back(&HandleArg); 9896 Args.push_back(&BaseArg); 9897 Args.push_back(&BeginArg); 9898 Args.push_back(&SizeArg); 9899 Args.push_back(&TypeArg); 9900 Args.push_back(&NameArg); 9901 const CGFunctionInfo &FnInfo = 9902 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9903 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9904 SmallString<64> TyStr; 9905 llvm::raw_svector_ostream Out(TyStr); 9906 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9907 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9908 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9909 Name, &CGM.getModule()); 9910 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9911 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9912 // Start the mapper function code generation. 9913 CodeGenFunction MapperCGF(CGM); 9914 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9915 // Compute the starting and end addresses of array elements. 9916 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9917 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9918 C.getPointerType(Int64Ty), Loc); 9919 // Prepare common arguments for array initiation and deletion. 9920 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9921 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9922 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9923 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9924 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9925 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9926 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9927 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9928 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9929 // Convert the size in bytes into the number of array elements. 9930 Size = MapperCGF.Builder.CreateExactUDiv( 9931 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9932 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9933 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9934 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9935 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9936 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9937 C.getPointerType(Int64Ty), Loc); 9938 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9939 MapperCGF.GetAddrOfLocalVar(&NameArg), 9940 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9941 9942 // Emit array initiation if this is an array section and \p MapType indicates 9943 // that memory allocation is required. 9944 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9945 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9946 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9947 9948 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9949 9950 // Emit the loop header block. 9951 MapperCGF.EmitBlock(HeadBB); 9952 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9953 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9954 // Evaluate whether the initial condition is satisfied. 9955 llvm::Value *IsEmpty = 9956 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9957 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9958 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9959 9960 // Emit the loop body block. 9961 MapperCGF.EmitBlock(BodyBB); 9962 llvm::BasicBlock *LastBB = BodyBB; 9963 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9964 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9965 PtrPHI->addIncoming(PtrBegin, EntryBB); 9966 Address PtrCurrent(PtrPHI, ElemTy, 9967 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9968 .getAlignment() 9969 .alignmentOfArrayElement(ElementSize)); 9970 // Privatize the declared variable of mapper to be the current array element. 9971 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9972 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9973 (void)Scope.Privatize(); 9974 9975 // Get map clause information. Fill up the arrays with all mapped variables. 9976 MappableExprsHandler::MapCombinedInfoTy Info; 9977 MappableExprsHandler MEHandler(*D, MapperCGF); 9978 MEHandler.generateAllInfoForMapper(Info); 9979 9980 // Call the runtime API __tgt_mapper_num_components to get the number of 9981 // pre-existing components. 9982 llvm::Value *OffloadingArgs[] = {Handle}; 9983 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9984 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9985 OMPRTL___tgt_mapper_num_components), 9986 OffloadingArgs); 9987 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9988 PreviousSize, 9989 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9990 9991 // Fill up the runtime mapper handle for all components. 9992 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9993 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9994 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9995 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9996 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9997 llvm::Value *CurSizeArg = Info.Sizes[I]; 9998 llvm::Value *CurNameArg = 9999 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10000 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10001 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10002 10003 // Extract the MEMBER_OF field from the map type. 10004 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10005 llvm::Value *MemberMapType = 10006 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10007 10008 // Combine the map type inherited from user-defined mapper with that 10009 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10010 // bits of the \a MapType, which is the input argument of the mapper 10011 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10012 // bits of MemberMapType. 10013 // [OpenMP 5.0], 1.2.6. map-type decay. 10014 // | alloc | to | from | tofrom | release | delete 10015 // ---------------------------------------------------------- 10016 // alloc | alloc | alloc | alloc | alloc | release | delete 10017 // to | alloc | to | alloc | to | release | delete 10018 // from | alloc | alloc | from | from | release | delete 10019 // tofrom | alloc | to | from | tofrom | release | delete 10020 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10021 MapType, 10022 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10023 MappableExprsHandler::OMP_MAP_FROM)); 10024 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10025 llvm::BasicBlock *AllocElseBB = 10026 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10027 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10028 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10029 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10030 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10031 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10032 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10033 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10034 MapperCGF.EmitBlock(AllocBB); 10035 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10036 MemberMapType, 10037 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10038 MappableExprsHandler::OMP_MAP_FROM))); 10039 MapperCGF.Builder.CreateBr(EndBB); 10040 MapperCGF.EmitBlock(AllocElseBB); 10041 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10042 LeftToFrom, 10043 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10044 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10045 // In case of to, clear OMP_MAP_FROM. 10046 MapperCGF.EmitBlock(ToBB); 10047 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10048 MemberMapType, 10049 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10050 MapperCGF.Builder.CreateBr(EndBB); 10051 MapperCGF.EmitBlock(ToElseBB); 10052 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10053 LeftToFrom, 10054 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10055 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10056 // In case of from, clear OMP_MAP_TO. 10057 MapperCGF.EmitBlock(FromBB); 10058 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10059 MemberMapType, 10060 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10061 // In case of tofrom, do nothing. 10062 MapperCGF.EmitBlock(EndBB); 10063 LastBB = EndBB; 10064 llvm::PHINode *CurMapType = 10065 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10066 CurMapType->addIncoming(AllocMapType, AllocBB); 10067 CurMapType->addIncoming(ToMapType, ToBB); 10068 CurMapType->addIncoming(FromMapType, FromBB); 10069 CurMapType->addIncoming(MemberMapType, ToElseBB); 10070 10071 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10072 CurSizeArg, CurMapType, CurNameArg}; 10073 if (Info.Mappers[I]) { 10074 // Call the corresponding mapper function. 10075 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10076 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10077 assert(MapperFunc && "Expect a valid mapper function is available."); 10078 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10079 } else { 10080 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10081 // data structure. 10082 MapperCGF.EmitRuntimeCall( 10083 OMPBuilder.getOrCreateRuntimeFunction( 10084 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10085 OffloadingArgs); 10086 } 10087 } 10088 10089 // Update the pointer to point to the next element that needs to be mapped, 10090 // and check whether we have mapped all elements. 10091 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10092 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10093 PtrPHI->addIncoming(PtrNext, LastBB); 10094 llvm::Value *IsDone = 10095 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10096 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10097 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10098 10099 MapperCGF.EmitBlock(ExitBB); 10100 // Emit array deletion if this is an array section and \p MapType indicates 10101 // that deletion is required. 10102 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10103 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10104 10105 // Emit the function exit block. 10106 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10107 MapperCGF.FinishFunction(); 10108 UDMMap.try_emplace(D, Fn); 10109 if (CGF) { 10110 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10111 Decls.second.push_back(D); 10112 } 10113 } 10114 10115 /// Emit the array initialization or deletion portion for user-defined mapper 10116 /// code generation. First, it evaluates whether an array section is mapped and 10117 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10118 /// true, and \a MapType indicates to not delete this array, array 10119 /// initialization code is generated. If \a IsInit is false, and \a MapType 10120 /// indicates to not this array, array deletion code is generated. 10121 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10122 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10123 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10124 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10125 bool IsInit) { 10126 StringRef Prefix = IsInit ? ".init" : ".del"; 10127 10128 // Evaluate if this is an array section. 10129 llvm::BasicBlock *BodyBB = 10130 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10131 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10132 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10133 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10134 MapType, 10135 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10136 llvm::Value *DeleteCond; 10137 llvm::Value *Cond; 10138 if (IsInit) { 10139 // base != begin? 10140 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10141 // IsPtrAndObj? 10142 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10143 MapType, 10144 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10145 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10146 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10147 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10148 DeleteCond = MapperCGF.Builder.CreateIsNull( 10149 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10150 } else { 10151 Cond = IsArray; 10152 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10153 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10154 } 10155 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10156 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10157 10158 MapperCGF.EmitBlock(BodyBB); 10159 // Get the array size by multiplying element size and element number (i.e., \p 10160 // Size). 10161 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10162 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10163 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10164 // memory allocation/deletion purpose only. 10165 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10166 MapType, 10167 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10168 MappableExprsHandler::OMP_MAP_FROM))); 10169 MapTypeArg = MapperCGF.Builder.CreateOr( 10170 MapTypeArg, 10171 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10172 10173 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10174 // data structure. 10175 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10176 ArraySize, MapTypeArg, MapName}; 10177 MapperCGF.EmitRuntimeCall( 10178 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10179 OMPRTL___tgt_push_mapper_component), 10180 OffloadingArgs); 10181 } 10182 10183 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10184 const OMPDeclareMapperDecl *D) { 10185 auto I = UDMMap.find(D); 10186 if (I != UDMMap.end()) 10187 return I->second; 10188 emitUserDefinedMapper(D); 10189 return UDMMap.lookup(D); 10190 } 10191 10192 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10193 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10194 llvm::Value *DeviceID, 10195 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10196 const OMPLoopDirective &D)> 10197 SizeEmitter) { 10198 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10199 const OMPExecutableDirective *TD = &D; 10200 // Get nested teams distribute kind directive, if any. 10201 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10202 TD = getNestedDistributeDirective(CGM.getContext(), D); 10203 if (!TD) 10204 return; 10205 const auto *LD = cast<OMPLoopDirective>(TD); 10206 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10207 PrePostActionTy &) { 10208 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10209 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10210 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10211 CGF.EmitRuntimeCall( 10212 OMPBuilder.getOrCreateRuntimeFunction( 10213 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10214 Args); 10215 } 10216 }; 10217 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10218 } 10219 10220 void CGOpenMPRuntime::emitTargetCall( 10221 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10222 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10223 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10224 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10225 const OMPLoopDirective &D)> 10226 SizeEmitter) { 10227 if (!CGF.HaveInsertPoint()) 10228 return; 10229 10230 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10231 CGM.getLangOpts().OpenMPOffloadMandatory; 10232 10233 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10234 10235 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10236 D.hasClausesOfKind<OMPNowaitClause>(); 10237 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10238 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10239 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10240 PrePostActionTy &) { 10241 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10242 }; 10243 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10244 10245 CodeGenFunction::OMPTargetDataInfo InputInfo; 10246 llvm::Value *MapTypesArray = nullptr; 10247 llvm::Value *MapNamesArray = nullptr; 10248 // Generate code for the host fallback function. 10249 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10250 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10251 if (OffloadingMandatory) { 10252 CGF.Builder.CreateUnreachable(); 10253 } else { 10254 if (RequiresOuterTask) { 10255 CapturedVars.clear(); 10256 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10257 } 10258 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10259 } 10260 }; 10261 // Fill up the pointer arrays and transfer execution to the device. 10262 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10263 &MapNamesArray, SizeEmitter, 10264 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10265 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10266 // Reverse offloading is not supported, so just execute on the host. 10267 FallbackGen(CGF); 10268 return; 10269 } 10270 10271 // On top of the arrays that were filled up, the target offloading call 10272 // takes as arguments the device id as well as the host pointer. The host 10273 // pointer is used by the runtime library to identify the current target 10274 // region, so it only has to be unique and not necessarily point to 10275 // anything. It could be the pointer to the outlined function that 10276 // implements the target region, but we aren't using that so that the 10277 // compiler doesn't need to keep that, and could therefore inline the host 10278 // function if proven worthwhile during optimization. 10279 10280 // From this point on, we need to have an ID of the target region defined. 10281 assert(OutlinedFnID && "Invalid outlined function ID!"); 10282 (void)OutlinedFnID; 10283 10284 // Emit device ID if any. 10285 llvm::Value *DeviceID; 10286 if (Device.getPointer()) { 10287 assert((Device.getInt() == OMPC_DEVICE_unknown || 10288 Device.getInt() == OMPC_DEVICE_device_num) && 10289 "Expected device_num modifier."); 10290 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10291 DeviceID = 10292 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10293 } else { 10294 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10295 } 10296 10297 // Emit the number of elements in the offloading arrays. 10298 llvm::Value *PointerNum = 10299 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10300 10301 // Return value of the runtime offloading call. 10302 llvm::Value *Return; 10303 10304 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10305 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10306 10307 // Source location for the ident struct 10308 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10309 10310 // Emit tripcount for the target loop-based directive. 10311 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10312 10313 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10314 // The target region is an outlined function launched by the runtime 10315 // via calls __tgt_target() or __tgt_target_teams(). 10316 // 10317 // __tgt_target() launches a target region with one team and one thread, 10318 // executing a serial region. This master thread may in turn launch 10319 // more threads within its team upon encountering a parallel region, 10320 // however, no additional teams can be launched on the device. 10321 // 10322 // __tgt_target_teams() launches a target region with one or more teams, 10323 // each with one or more threads. This call is required for target 10324 // constructs such as: 10325 // 'target teams' 10326 // 'target' / 'teams' 10327 // 'target teams distribute parallel for' 10328 // 'target parallel' 10329 // and so on. 10330 // 10331 // Note that on the host and CPU targets, the runtime implementation of 10332 // these calls simply call the outlined function without forking threads. 10333 // The outlined functions themselves have runtime calls to 10334 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10335 // the compiler in emitTeamsCall() and emitParallelCall(). 10336 // 10337 // In contrast, on the NVPTX target, the implementation of 10338 // __tgt_target_teams() launches a GPU kernel with the requested number 10339 // of teams and threads so no additional calls to the runtime are required. 10340 if (NumTeams) { 10341 // If we have NumTeams defined this means that we have an enclosed teams 10342 // region. Therefore we also expect to have NumThreads defined. These two 10343 // values should be defined in the presence of a teams directive, 10344 // regardless of having any clauses associated. If the user is using teams 10345 // but no clauses, these two values will be the default that should be 10346 // passed to the runtime library - a 32-bit integer with the value zero. 10347 assert(NumThreads && "Thread limit expression should be available along " 10348 "with number of teams."); 10349 SmallVector<llvm::Value *> OffloadingArgs = { 10350 RTLoc, 10351 DeviceID, 10352 OutlinedFnID, 10353 PointerNum, 10354 InputInfo.BasePointersArray.getPointer(), 10355 InputInfo.PointersArray.getPointer(), 10356 InputInfo.SizesArray.getPointer(), 10357 MapTypesArray, 10358 MapNamesArray, 10359 InputInfo.MappersArray.getPointer(), 10360 NumTeams, 10361 NumThreads}; 10362 if (HasNowait) { 10363 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10364 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10365 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10366 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10367 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10368 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10369 } 10370 Return = CGF.EmitRuntimeCall( 10371 OMPBuilder.getOrCreateRuntimeFunction( 10372 CGM.getModule(), HasNowait 10373 ? OMPRTL___tgt_target_teams_nowait_mapper 10374 : OMPRTL___tgt_target_teams_mapper), 10375 OffloadingArgs); 10376 } else { 10377 SmallVector<llvm::Value *> OffloadingArgs = { 10378 RTLoc, 10379 DeviceID, 10380 OutlinedFnID, 10381 PointerNum, 10382 InputInfo.BasePointersArray.getPointer(), 10383 InputInfo.PointersArray.getPointer(), 10384 InputInfo.SizesArray.getPointer(), 10385 MapTypesArray, 10386 MapNamesArray, 10387 InputInfo.MappersArray.getPointer()}; 10388 if (HasNowait) { 10389 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10390 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10391 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10392 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10393 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10394 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10395 } 10396 Return = CGF.EmitRuntimeCall( 10397 OMPBuilder.getOrCreateRuntimeFunction( 10398 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10399 : OMPRTL___tgt_target_mapper), 10400 OffloadingArgs); 10401 } 10402 10403 // Check the error code and execute the host version if required. 10404 llvm::BasicBlock *OffloadFailedBlock = 10405 CGF.createBasicBlock("omp_offload.failed"); 10406 llvm::BasicBlock *OffloadContBlock = 10407 CGF.createBasicBlock("omp_offload.cont"); 10408 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10409 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10410 10411 CGF.EmitBlock(OffloadFailedBlock); 10412 FallbackGen(CGF); 10413 10414 CGF.EmitBranch(OffloadContBlock); 10415 10416 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10417 }; 10418 10419 // Notify that the host version must be executed. 10420 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10421 FallbackGen(CGF); 10422 }; 10423 10424 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10425 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10426 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10427 // Fill up the arrays with all the captured variables. 10428 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10429 10430 // Get mappable expression information. 10431 MappableExprsHandler MEHandler(D, CGF); 10432 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10433 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10434 10435 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10436 auto *CV = CapturedVars.begin(); 10437 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10438 CE = CS.capture_end(); 10439 CI != CE; ++CI, ++RI, ++CV) { 10440 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10441 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10442 10443 // VLA sizes are passed to the outlined region by copy and do not have map 10444 // information associated. 10445 if (CI->capturesVariableArrayType()) { 10446 CurInfo.Exprs.push_back(nullptr); 10447 CurInfo.BasePointers.push_back(*CV); 10448 CurInfo.Pointers.push_back(*CV); 10449 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10450 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10451 // Copy to the device as an argument. No need to retrieve it. 10452 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10453 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10454 MappableExprsHandler::OMP_MAP_IMPLICIT); 10455 CurInfo.Mappers.push_back(nullptr); 10456 } else { 10457 // If we have any information in the map clause, we use it, otherwise we 10458 // just do a default mapping. 10459 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10460 if (!CI->capturesThis()) 10461 MappedVarSet.insert(CI->getCapturedVar()); 10462 else 10463 MappedVarSet.insert(nullptr); 10464 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10465 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10466 // Generate correct mapping for variables captured by reference in 10467 // lambdas. 10468 if (CI->capturesVariable()) 10469 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10470 CurInfo, LambdaPointers); 10471 } 10472 // We expect to have at least an element of information for this capture. 10473 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10474 "Non-existing map pointer for capture!"); 10475 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10476 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10477 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10478 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10479 "Inconsistent map information sizes!"); 10480 10481 // If there is an entry in PartialStruct it means we have a struct with 10482 // individual members mapped. Emit an extra combined entry. 10483 if (PartialStruct.Base.isValid()) { 10484 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10485 MEHandler.emitCombinedEntry( 10486 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10487 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10488 } 10489 10490 // We need to append the results of this capture to what we already have. 10491 CombinedInfo.append(CurInfo); 10492 } 10493 // Adjust MEMBER_OF flags for the lambdas captures. 10494 MEHandler.adjustMemberOfForLambdaCaptures( 10495 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10496 CombinedInfo.Types); 10497 // Map any list items in a map clause that were not captures because they 10498 // weren't referenced within the construct. 10499 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10500 10501 TargetDataInfo Info; 10502 // Fill up the arrays and create the arguments. 10503 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10504 emitOffloadingArraysArgument( 10505 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10506 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10507 {/*ForEndCall=*/false}); 10508 10509 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10510 InputInfo.BasePointersArray = 10511 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10512 InputInfo.PointersArray = 10513 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10514 InputInfo.SizesArray = 10515 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10516 InputInfo.MappersArray = 10517 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10518 MapTypesArray = Info.MapTypesArray; 10519 MapNamesArray = Info.MapNamesArray; 10520 if (RequiresOuterTask) 10521 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10522 else 10523 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10524 }; 10525 10526 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10527 CodeGenFunction &CGF, PrePostActionTy &) { 10528 if (RequiresOuterTask) { 10529 CodeGenFunction::OMPTargetDataInfo InputInfo; 10530 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10531 } else { 10532 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10533 } 10534 }; 10535 10536 // If we have a target function ID it means that we need to support 10537 // offloading, otherwise, just execute on the host. We need to execute on host 10538 // regardless of the conditional in the if clause if, e.g., the user do not 10539 // specify target triples. 10540 if (OutlinedFnID) { 10541 if (IfCond) { 10542 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10543 } else { 10544 RegionCodeGenTy ThenRCG(TargetThenGen); 10545 ThenRCG(CGF); 10546 } 10547 } else { 10548 RegionCodeGenTy ElseRCG(TargetElseGen); 10549 ElseRCG(CGF); 10550 } 10551 } 10552 10553 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10554 StringRef ParentName) { 10555 if (!S) 10556 return; 10557 10558 // Codegen OMP target directives that offload compute to the device. 10559 bool RequiresDeviceCodegen = 10560 isa<OMPExecutableDirective>(S) && 10561 isOpenMPTargetExecutionDirective( 10562 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10563 10564 if (RequiresDeviceCodegen) { 10565 const auto &E = *cast<OMPExecutableDirective>(S); 10566 unsigned DeviceID; 10567 unsigned FileID; 10568 unsigned Line; 10569 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10570 FileID, Line); 10571 10572 // Is this a target region that should not be emitted as an entry point? If 10573 // so just signal we are done with this target region. 10574 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10575 ParentName, Line)) 10576 return; 10577 10578 switch (E.getDirectiveKind()) { 10579 case OMPD_target: 10580 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10581 cast<OMPTargetDirective>(E)); 10582 break; 10583 case OMPD_target_parallel: 10584 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10585 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10586 break; 10587 case OMPD_target_teams: 10588 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10589 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10590 break; 10591 case OMPD_target_teams_distribute: 10592 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10593 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10594 break; 10595 case OMPD_target_teams_distribute_simd: 10596 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10597 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10598 break; 10599 case OMPD_target_parallel_for: 10600 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10601 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10602 break; 10603 case OMPD_target_parallel_for_simd: 10604 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10605 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10606 break; 10607 case OMPD_target_simd: 10608 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10609 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10610 break; 10611 case OMPD_target_teams_distribute_parallel_for: 10612 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10613 CGM, ParentName, 10614 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10615 break; 10616 case OMPD_target_teams_distribute_parallel_for_simd: 10617 CodeGenFunction:: 10618 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10619 CGM, ParentName, 10620 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10621 break; 10622 case OMPD_parallel: 10623 case OMPD_for: 10624 case OMPD_parallel_for: 10625 case OMPD_parallel_master: 10626 case OMPD_parallel_sections: 10627 case OMPD_for_simd: 10628 case OMPD_parallel_for_simd: 10629 case OMPD_cancel: 10630 case OMPD_cancellation_point: 10631 case OMPD_ordered: 10632 case OMPD_threadprivate: 10633 case OMPD_allocate: 10634 case OMPD_task: 10635 case OMPD_simd: 10636 case OMPD_tile: 10637 case OMPD_unroll: 10638 case OMPD_sections: 10639 case OMPD_section: 10640 case OMPD_single: 10641 case OMPD_master: 10642 case OMPD_critical: 10643 case OMPD_taskyield: 10644 case OMPD_barrier: 10645 case OMPD_taskwait: 10646 case OMPD_taskgroup: 10647 case OMPD_atomic: 10648 case OMPD_flush: 10649 case OMPD_depobj: 10650 case OMPD_scan: 10651 case OMPD_teams: 10652 case OMPD_target_data: 10653 case OMPD_target_exit_data: 10654 case OMPD_target_enter_data: 10655 case OMPD_distribute: 10656 case OMPD_distribute_simd: 10657 case OMPD_distribute_parallel_for: 10658 case OMPD_distribute_parallel_for_simd: 10659 case OMPD_teams_distribute: 10660 case OMPD_teams_distribute_simd: 10661 case OMPD_teams_distribute_parallel_for: 10662 case OMPD_teams_distribute_parallel_for_simd: 10663 case OMPD_target_update: 10664 case OMPD_declare_simd: 10665 case OMPD_declare_variant: 10666 case OMPD_begin_declare_variant: 10667 case OMPD_end_declare_variant: 10668 case OMPD_declare_target: 10669 case OMPD_end_declare_target: 10670 case OMPD_declare_reduction: 10671 case OMPD_declare_mapper: 10672 case OMPD_taskloop: 10673 case OMPD_taskloop_simd: 10674 case OMPD_master_taskloop: 10675 case OMPD_master_taskloop_simd: 10676 case OMPD_parallel_master_taskloop: 10677 case OMPD_parallel_master_taskloop_simd: 10678 case OMPD_requires: 10679 case OMPD_metadirective: 10680 case OMPD_unknown: 10681 default: 10682 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10683 } 10684 return; 10685 } 10686 10687 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10688 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10689 return; 10690 10691 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10692 return; 10693 } 10694 10695 // If this is a lambda function, look into its body. 10696 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10697 S = L->getBody(); 10698 10699 // Keep looking for target regions recursively. 10700 for (const Stmt *II : S->children()) 10701 scanForTargetRegionsFunctions(II, ParentName); 10702 } 10703 10704 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10705 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10706 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10707 if (!DevTy) 10708 return false; 10709 // Do not emit device_type(nohost) functions for the host. 10710 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10711 return true; 10712 // Do not emit device_type(host) functions for the device. 10713 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10714 return true; 10715 return false; 10716 } 10717 10718 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10719 // If emitting code for the host, we do not process FD here. Instead we do 10720 // the normal code generation. 10721 if (!CGM.getLangOpts().OpenMPIsDevice) { 10722 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10723 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10724 CGM.getLangOpts().OpenMPIsDevice)) 10725 return true; 10726 return false; 10727 } 10728 10729 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10730 // Try to detect target regions in the function. 10731 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10732 StringRef Name = CGM.getMangledName(GD); 10733 scanForTargetRegionsFunctions(FD->getBody(), Name); 10734 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10735 CGM.getLangOpts().OpenMPIsDevice)) 10736 return true; 10737 } 10738 10739 // Do not to emit function if it is not marked as declare target. 10740 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10741 AlreadyEmittedTargetDecls.count(VD) == 0; 10742 } 10743 10744 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10745 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10746 CGM.getLangOpts().OpenMPIsDevice)) 10747 return true; 10748 10749 if (!CGM.getLangOpts().OpenMPIsDevice) 10750 return false; 10751 10752 // Check if there are Ctors/Dtors in this declaration and look for target 10753 // regions in it. We use the complete variant to produce the kernel name 10754 // mangling. 10755 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10756 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10757 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10758 StringRef ParentName = 10759 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10760 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10761 } 10762 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10763 StringRef ParentName = 10764 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10765 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10766 } 10767 } 10768 10769 // Do not to emit variable if it is not marked as declare target. 10770 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10771 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10772 cast<VarDecl>(GD.getDecl())); 10773 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10774 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10775 HasRequiresUnifiedSharedMemory)) { 10776 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10777 return true; 10778 } 10779 return false; 10780 } 10781 10782 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10783 llvm::Constant *Addr) { 10784 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10785 !CGM.getLangOpts().OpenMPIsDevice) 10786 return; 10787 10788 // If we have host/nohost variables, they do not need to be registered. 10789 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10790 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10791 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10792 return; 10793 10794 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10795 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10796 if (!Res) { 10797 if (CGM.getLangOpts().OpenMPIsDevice) { 10798 // Register non-target variables being emitted in device code (debug info 10799 // may cause this). 10800 StringRef VarName = CGM.getMangledName(VD); 10801 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10802 } 10803 return; 10804 } 10805 // Register declare target variables. 10806 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10807 StringRef VarName; 10808 CharUnits VarSize; 10809 llvm::GlobalValue::LinkageTypes Linkage; 10810 10811 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10812 !HasRequiresUnifiedSharedMemory) { 10813 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10814 VarName = CGM.getMangledName(VD); 10815 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10816 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10817 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10818 } else { 10819 VarSize = CharUnits::Zero(); 10820 } 10821 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10822 // Temp solution to prevent optimizations of the internal variables. 10823 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10824 // Do not create a "ref-variable" if the original is not also available 10825 // on the host. 10826 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10827 return; 10828 std::string RefName = getName({VarName, "ref"}); 10829 if (!CGM.GetGlobalValue(RefName)) { 10830 llvm::Constant *AddrRef = 10831 getOrCreateInternalVariable(Addr->getType(), RefName); 10832 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10833 GVAddrRef->setConstant(/*Val=*/true); 10834 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10835 GVAddrRef->setInitializer(Addr); 10836 CGM.addCompilerUsedGlobal(GVAddrRef); 10837 } 10838 } 10839 } else { 10840 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10841 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10842 HasRequiresUnifiedSharedMemory)) && 10843 "Declare target attribute must link or to with unified memory."); 10844 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10845 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10846 else 10847 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10848 10849 if (CGM.getLangOpts().OpenMPIsDevice) { 10850 VarName = Addr->getName(); 10851 Addr = nullptr; 10852 } else { 10853 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10854 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10855 } 10856 VarSize = CGM.getPointerSize(); 10857 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10858 } 10859 10860 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10861 VarName, Addr, VarSize, Flags, Linkage); 10862 } 10863 10864 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10865 if (isa<FunctionDecl>(GD.getDecl()) || 10866 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10867 return emitTargetFunctions(GD); 10868 10869 return emitTargetGlobalVariable(GD); 10870 } 10871 10872 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10873 for (const VarDecl *VD : DeferredGlobalVariables) { 10874 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10875 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10876 if (!Res) 10877 continue; 10878 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10879 !HasRequiresUnifiedSharedMemory) { 10880 CGM.EmitGlobal(VD); 10881 } else { 10882 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10883 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10884 HasRequiresUnifiedSharedMemory)) && 10885 "Expected link clause or to clause with unified memory."); 10886 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10887 } 10888 } 10889 } 10890 10891 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10892 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10893 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10894 " Expected target-based directive."); 10895 } 10896 10897 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10898 for (const OMPClause *Clause : D->clauselists()) { 10899 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10900 HasRequiresUnifiedSharedMemory = true; 10901 } else if (const auto *AC = 10902 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10903 switch (AC->getAtomicDefaultMemOrderKind()) { 10904 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10905 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10906 break; 10907 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10908 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10909 break; 10910 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10911 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10912 break; 10913 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10914 break; 10915 } 10916 } 10917 } 10918 } 10919 10920 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10921 return RequiresAtomicOrdering; 10922 } 10923 10924 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10925 LangAS &AS) { 10926 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10927 return false; 10928 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10929 switch(A->getAllocatorType()) { 10930 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10931 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10932 // Not supported, fallback to the default mem space. 10933 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10934 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10935 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10936 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10937 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10938 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10939 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10940 AS = LangAS::Default; 10941 return true; 10942 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10943 llvm_unreachable("Expected predefined allocator for the variables with the " 10944 "static storage."); 10945 } 10946 return false; 10947 } 10948 10949 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10950 return HasRequiresUnifiedSharedMemory; 10951 } 10952 10953 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10954 CodeGenModule &CGM) 10955 : CGM(CGM) { 10956 if (CGM.getLangOpts().OpenMPIsDevice) { 10957 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10958 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10959 } 10960 } 10961 10962 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10963 if (CGM.getLangOpts().OpenMPIsDevice) 10964 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10965 } 10966 10967 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10968 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10969 return true; 10970 10971 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10972 // Do not to emit function if it is marked as declare target as it was already 10973 // emitted. 10974 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10975 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10976 if (auto *F = dyn_cast_or_null<llvm::Function>( 10977 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10978 return !F->isDeclaration(); 10979 return false; 10980 } 10981 return true; 10982 } 10983 10984 return !AlreadyEmittedTargetDecls.insert(D).second; 10985 } 10986 10987 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10988 // If we don't have entries or if we are emitting code for the device, we 10989 // don't need to do anything. 10990 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10991 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10992 (OffloadEntriesInfoManager.empty() && 10993 !HasEmittedDeclareTargetRegion && 10994 !HasEmittedTargetRegion)) 10995 return nullptr; 10996 10997 // Create and register the function that handles the requires directives. 10998 ASTContext &C = CGM.getContext(); 10999 11000 llvm::Function *RequiresRegFn; 11001 { 11002 CodeGenFunction CGF(CGM); 11003 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11004 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11005 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11006 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11007 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11008 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11009 // TODO: check for other requires clauses. 11010 // The requires directive takes effect only when a target region is 11011 // present in the compilation unit. Otherwise it is ignored and not 11012 // passed to the runtime. This avoids the runtime from throwing an error 11013 // for mismatching requires clauses across compilation units that don't 11014 // contain at least 1 target region. 11015 assert((HasEmittedTargetRegion || 11016 HasEmittedDeclareTargetRegion || 11017 !OffloadEntriesInfoManager.empty()) && 11018 "Target or declare target region expected."); 11019 if (HasRequiresUnifiedSharedMemory) 11020 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11021 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11022 CGM.getModule(), OMPRTL___tgt_register_requires), 11023 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11024 CGF.FinishFunction(); 11025 } 11026 return RequiresRegFn; 11027 } 11028 11029 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11030 const OMPExecutableDirective &D, 11031 SourceLocation Loc, 11032 llvm::Function *OutlinedFn, 11033 ArrayRef<llvm::Value *> CapturedVars) { 11034 if (!CGF.HaveInsertPoint()) 11035 return; 11036 11037 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11038 CodeGenFunction::RunCleanupsScope Scope(CGF); 11039 11040 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11041 llvm::Value *Args[] = { 11042 RTLoc, 11043 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11044 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11045 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11046 RealArgs.append(std::begin(Args), std::end(Args)); 11047 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11048 11049 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11050 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11051 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11052 } 11053 11054 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11055 const Expr *NumTeams, 11056 const Expr *ThreadLimit, 11057 SourceLocation Loc) { 11058 if (!CGF.HaveInsertPoint()) 11059 return; 11060 11061 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11062 11063 llvm::Value *NumTeamsVal = 11064 NumTeams 11065 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11066 CGF.CGM.Int32Ty, /* isSigned = */ true) 11067 : CGF.Builder.getInt32(0); 11068 11069 llvm::Value *ThreadLimitVal = 11070 ThreadLimit 11071 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11072 CGF.CGM.Int32Ty, /* isSigned = */ true) 11073 : CGF.Builder.getInt32(0); 11074 11075 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11076 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11077 ThreadLimitVal}; 11078 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11079 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11080 PushNumTeamsArgs); 11081 } 11082 11083 void CGOpenMPRuntime::emitTargetDataCalls( 11084 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11085 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11086 if (!CGF.HaveInsertPoint()) 11087 return; 11088 11089 // Action used to replace the default codegen action and turn privatization 11090 // off. 11091 PrePostActionTy NoPrivAction; 11092 11093 // Generate the code for the opening of the data environment. Capture all the 11094 // arguments of the runtime call by reference because they are used in the 11095 // closing of the region. 11096 auto &&BeginThenGen = [this, &D, Device, &Info, 11097 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11098 // Fill up the arrays with all the mapped variables. 11099 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11100 11101 // Get map clause information. 11102 MappableExprsHandler MEHandler(D, CGF); 11103 MEHandler.generateAllInfo(CombinedInfo); 11104 11105 // Fill up the arrays and create the arguments. 11106 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11107 /*IsNonContiguous=*/true); 11108 11109 llvm::Value *BasePointersArrayArg = nullptr; 11110 llvm::Value *PointersArrayArg = nullptr; 11111 llvm::Value *SizesArrayArg = nullptr; 11112 llvm::Value *MapTypesArrayArg = nullptr; 11113 llvm::Value *MapNamesArrayArg = nullptr; 11114 llvm::Value *MappersArrayArg = nullptr; 11115 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11116 SizesArrayArg, MapTypesArrayArg, 11117 MapNamesArrayArg, MappersArrayArg, Info); 11118 11119 // Emit device ID if any. 11120 llvm::Value *DeviceID = nullptr; 11121 if (Device) { 11122 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11123 CGF.Int64Ty, /*isSigned=*/true); 11124 } else { 11125 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11126 } 11127 11128 // Emit the number of elements in the offloading arrays. 11129 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11130 // 11131 // Source location for the ident struct 11132 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11133 11134 llvm::Value *OffloadingArgs[] = {RTLoc, 11135 DeviceID, 11136 PointerNum, 11137 BasePointersArrayArg, 11138 PointersArrayArg, 11139 SizesArrayArg, 11140 MapTypesArrayArg, 11141 MapNamesArrayArg, 11142 MappersArrayArg}; 11143 CGF.EmitRuntimeCall( 11144 OMPBuilder.getOrCreateRuntimeFunction( 11145 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11146 OffloadingArgs); 11147 11148 // If device pointer privatization is required, emit the body of the region 11149 // here. It will have to be duplicated: with and without privatization. 11150 if (!Info.CaptureDeviceAddrMap.empty()) 11151 CodeGen(CGF); 11152 }; 11153 11154 // Generate code for the closing of the data region. 11155 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11156 PrePostActionTy &) { 11157 assert(Info.isValid() && "Invalid data environment closing arguments."); 11158 11159 llvm::Value *BasePointersArrayArg = nullptr; 11160 llvm::Value *PointersArrayArg = nullptr; 11161 llvm::Value *SizesArrayArg = nullptr; 11162 llvm::Value *MapTypesArrayArg = nullptr; 11163 llvm::Value *MapNamesArrayArg = nullptr; 11164 llvm::Value *MappersArrayArg = nullptr; 11165 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11166 SizesArrayArg, MapTypesArrayArg, 11167 MapNamesArrayArg, MappersArrayArg, Info, 11168 {/*ForEndCall=*/true}); 11169 11170 // Emit device ID if any. 11171 llvm::Value *DeviceID = nullptr; 11172 if (Device) { 11173 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11174 CGF.Int64Ty, /*isSigned=*/true); 11175 } else { 11176 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11177 } 11178 11179 // Emit the number of elements in the offloading arrays. 11180 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11181 11182 // Source location for the ident struct 11183 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11184 11185 llvm::Value *OffloadingArgs[] = {RTLoc, 11186 DeviceID, 11187 PointerNum, 11188 BasePointersArrayArg, 11189 PointersArrayArg, 11190 SizesArrayArg, 11191 MapTypesArrayArg, 11192 MapNamesArrayArg, 11193 MappersArrayArg}; 11194 CGF.EmitRuntimeCall( 11195 OMPBuilder.getOrCreateRuntimeFunction( 11196 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11197 OffloadingArgs); 11198 }; 11199 11200 // If we need device pointer privatization, we need to emit the body of the 11201 // region with no privatization in the 'else' branch of the conditional. 11202 // Otherwise, we don't have to do anything. 11203 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11204 PrePostActionTy &) { 11205 if (!Info.CaptureDeviceAddrMap.empty()) { 11206 CodeGen.setAction(NoPrivAction); 11207 CodeGen(CGF); 11208 } 11209 }; 11210 11211 // We don't have to do anything to close the region if the if clause evaluates 11212 // to false. 11213 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11214 11215 if (IfCond) { 11216 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11217 } else { 11218 RegionCodeGenTy RCG(BeginThenGen); 11219 RCG(CGF); 11220 } 11221 11222 // If we don't require privatization of device pointers, we emit the body in 11223 // between the runtime calls. This avoids duplicating the body code. 11224 if (Info.CaptureDeviceAddrMap.empty()) { 11225 CodeGen.setAction(NoPrivAction); 11226 CodeGen(CGF); 11227 } 11228 11229 if (IfCond) { 11230 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11231 } else { 11232 RegionCodeGenTy RCG(EndThenGen); 11233 RCG(CGF); 11234 } 11235 } 11236 11237 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11238 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11239 const Expr *Device) { 11240 if (!CGF.HaveInsertPoint()) 11241 return; 11242 11243 assert((isa<OMPTargetEnterDataDirective>(D) || 11244 isa<OMPTargetExitDataDirective>(D) || 11245 isa<OMPTargetUpdateDirective>(D)) && 11246 "Expecting either target enter, exit data, or update directives."); 11247 11248 CodeGenFunction::OMPTargetDataInfo InputInfo; 11249 llvm::Value *MapTypesArray = nullptr; 11250 llvm::Value *MapNamesArray = nullptr; 11251 // Generate the code for the opening of the data environment. 11252 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11253 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11254 // Emit device ID if any. 11255 llvm::Value *DeviceID = nullptr; 11256 if (Device) { 11257 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11258 CGF.Int64Ty, /*isSigned=*/true); 11259 } else { 11260 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11261 } 11262 11263 // Emit the number of elements in the offloading arrays. 11264 llvm::Constant *PointerNum = 11265 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11266 11267 // Source location for the ident struct 11268 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11269 11270 llvm::Value *OffloadingArgs[] = {RTLoc, 11271 DeviceID, 11272 PointerNum, 11273 InputInfo.BasePointersArray.getPointer(), 11274 InputInfo.PointersArray.getPointer(), 11275 InputInfo.SizesArray.getPointer(), 11276 MapTypesArray, 11277 MapNamesArray, 11278 InputInfo.MappersArray.getPointer()}; 11279 11280 // Select the right runtime function call for each standalone 11281 // directive. 11282 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11283 RuntimeFunction RTLFn; 11284 switch (D.getDirectiveKind()) { 11285 case OMPD_target_enter_data: 11286 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11287 : OMPRTL___tgt_target_data_begin_mapper; 11288 break; 11289 case OMPD_target_exit_data: 11290 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11291 : OMPRTL___tgt_target_data_end_mapper; 11292 break; 11293 case OMPD_target_update: 11294 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11295 : OMPRTL___tgt_target_data_update_mapper; 11296 break; 11297 case OMPD_parallel: 11298 case OMPD_for: 11299 case OMPD_parallel_for: 11300 case OMPD_parallel_master: 11301 case OMPD_parallel_sections: 11302 case OMPD_for_simd: 11303 case OMPD_parallel_for_simd: 11304 case OMPD_cancel: 11305 case OMPD_cancellation_point: 11306 case OMPD_ordered: 11307 case OMPD_threadprivate: 11308 case OMPD_allocate: 11309 case OMPD_task: 11310 case OMPD_simd: 11311 case OMPD_tile: 11312 case OMPD_unroll: 11313 case OMPD_sections: 11314 case OMPD_section: 11315 case OMPD_single: 11316 case OMPD_master: 11317 case OMPD_critical: 11318 case OMPD_taskyield: 11319 case OMPD_barrier: 11320 case OMPD_taskwait: 11321 case OMPD_taskgroup: 11322 case OMPD_atomic: 11323 case OMPD_flush: 11324 case OMPD_depobj: 11325 case OMPD_scan: 11326 case OMPD_teams: 11327 case OMPD_target_data: 11328 case OMPD_distribute: 11329 case OMPD_distribute_simd: 11330 case OMPD_distribute_parallel_for: 11331 case OMPD_distribute_parallel_for_simd: 11332 case OMPD_teams_distribute: 11333 case OMPD_teams_distribute_simd: 11334 case OMPD_teams_distribute_parallel_for: 11335 case OMPD_teams_distribute_parallel_for_simd: 11336 case OMPD_declare_simd: 11337 case OMPD_declare_variant: 11338 case OMPD_begin_declare_variant: 11339 case OMPD_end_declare_variant: 11340 case OMPD_declare_target: 11341 case OMPD_end_declare_target: 11342 case OMPD_declare_reduction: 11343 case OMPD_declare_mapper: 11344 case OMPD_taskloop: 11345 case OMPD_taskloop_simd: 11346 case OMPD_master_taskloop: 11347 case OMPD_master_taskloop_simd: 11348 case OMPD_parallel_master_taskloop: 11349 case OMPD_parallel_master_taskloop_simd: 11350 case OMPD_target: 11351 case OMPD_target_simd: 11352 case OMPD_target_teams_distribute: 11353 case OMPD_target_teams_distribute_simd: 11354 case OMPD_target_teams_distribute_parallel_for: 11355 case OMPD_target_teams_distribute_parallel_for_simd: 11356 case OMPD_target_teams: 11357 case OMPD_target_parallel: 11358 case OMPD_target_parallel_for: 11359 case OMPD_target_parallel_for_simd: 11360 case OMPD_requires: 11361 case OMPD_metadirective: 11362 case OMPD_unknown: 11363 default: 11364 llvm_unreachable("Unexpected standalone target data directive."); 11365 break; 11366 } 11367 CGF.EmitRuntimeCall( 11368 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11369 OffloadingArgs); 11370 }; 11371 11372 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11373 &MapNamesArray](CodeGenFunction &CGF, 11374 PrePostActionTy &) { 11375 // Fill up the arrays with all the mapped variables. 11376 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11377 11378 // Get map clause information. 11379 MappableExprsHandler MEHandler(D, CGF); 11380 MEHandler.generateAllInfo(CombinedInfo); 11381 11382 TargetDataInfo Info; 11383 // Fill up the arrays and create the arguments. 11384 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11385 /*IsNonContiguous=*/true); 11386 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11387 D.hasClausesOfKind<OMPNowaitClause>(); 11388 emitOffloadingArraysArgument( 11389 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11390 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11391 {/*ForEndCall=*/false}); 11392 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11393 InputInfo.BasePointersArray = 11394 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11395 InputInfo.PointersArray = 11396 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11397 InputInfo.SizesArray = 11398 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 11399 InputInfo.MappersArray = 11400 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11401 MapTypesArray = Info.MapTypesArray; 11402 MapNamesArray = Info.MapNamesArray; 11403 if (RequiresOuterTask) 11404 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11405 else 11406 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11407 }; 11408 11409 if (IfCond) { 11410 emitIfClause(CGF, IfCond, TargetThenGen, 11411 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11412 } else { 11413 RegionCodeGenTy ThenRCG(TargetThenGen); 11414 ThenRCG(CGF); 11415 } 11416 } 11417 11418 namespace { 11419 /// Kind of parameter in a function with 'declare simd' directive. 11420 enum ParamKindTy { 11421 Linear, 11422 LinearRef, 11423 LinearUVal, 11424 LinearVal, 11425 Uniform, 11426 Vector, 11427 }; 11428 /// Attribute set of the parameter. 11429 struct ParamAttrTy { 11430 ParamKindTy Kind = Vector; 11431 llvm::APSInt StrideOrArg; 11432 llvm::APSInt Alignment; 11433 bool HasVarStride = false; 11434 }; 11435 } // namespace 11436 11437 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11438 ArrayRef<ParamAttrTy> ParamAttrs) { 11439 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11440 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11441 // of that clause. The VLEN value must be power of 2. 11442 // In other case the notion of the function`s "characteristic data type" (CDT) 11443 // is used to compute the vector length. 11444 // CDT is defined in the following order: 11445 // a) For non-void function, the CDT is the return type. 11446 // b) If the function has any non-uniform, non-linear parameters, then the 11447 // CDT is the type of the first such parameter. 11448 // c) If the CDT determined by a) or b) above is struct, union, or class 11449 // type which is pass-by-value (except for the type that maps to the 11450 // built-in complex data type), the characteristic data type is int. 11451 // d) If none of the above three cases is applicable, the CDT is int. 11452 // The VLEN is then determined based on the CDT and the size of vector 11453 // register of that ISA for which current vector version is generated. The 11454 // VLEN is computed using the formula below: 11455 // VLEN = sizeof(vector_register) / sizeof(CDT), 11456 // where vector register size specified in section 3.2.1 Registers and the 11457 // Stack Frame of original AMD64 ABI document. 11458 QualType RetType = FD->getReturnType(); 11459 if (RetType.isNull()) 11460 return 0; 11461 ASTContext &C = FD->getASTContext(); 11462 QualType CDT; 11463 if (!RetType.isNull() && !RetType->isVoidType()) { 11464 CDT = RetType; 11465 } else { 11466 unsigned Offset = 0; 11467 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11468 if (ParamAttrs[Offset].Kind == Vector) 11469 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11470 ++Offset; 11471 } 11472 if (CDT.isNull()) { 11473 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11474 if (ParamAttrs[I + Offset].Kind == Vector) { 11475 CDT = FD->getParamDecl(I)->getType(); 11476 break; 11477 } 11478 } 11479 } 11480 } 11481 if (CDT.isNull()) 11482 CDT = C.IntTy; 11483 CDT = CDT->getCanonicalTypeUnqualified(); 11484 if (CDT->isRecordType() || CDT->isUnionType()) 11485 CDT = C.IntTy; 11486 return C.getTypeSize(CDT); 11487 } 11488 11489 /// Mangle the parameter part of the vector function name according to 11490 /// their OpenMP classification. The mangling function is defined in 11491 /// section 4.5 of the AAVFABI(2021Q1). 11492 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11493 SmallString<256> Buffer; 11494 llvm::raw_svector_ostream Out(Buffer); 11495 for (const auto &ParamAttr : ParamAttrs) { 11496 switch (ParamAttr.Kind) { 11497 case Linear: 11498 Out << 'l'; 11499 break; 11500 case LinearRef: 11501 Out << 'R'; 11502 break; 11503 case LinearUVal: 11504 Out << 'U'; 11505 break; 11506 case LinearVal: 11507 Out << 'L'; 11508 break; 11509 case Uniform: 11510 Out << 'u'; 11511 break; 11512 case Vector: 11513 Out << 'v'; 11514 break; 11515 } 11516 if (ParamAttr.HasVarStride) 11517 Out << "s" << ParamAttr.StrideOrArg; 11518 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 11519 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 11520 // Don't print the step value if it is not present or if it is 11521 // equal to 1. 11522 if (ParamAttr.StrideOrArg < 0) 11523 Out << 'n' << -ParamAttr.StrideOrArg; 11524 else if (ParamAttr.StrideOrArg != 1) 11525 Out << ParamAttr.StrideOrArg; 11526 } 11527 11528 if (!!ParamAttr.Alignment) 11529 Out << 'a' << ParamAttr.Alignment; 11530 } 11531 11532 return std::string(Out.str()); 11533 } 11534 11535 static void 11536 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11537 const llvm::APSInt &VLENVal, 11538 ArrayRef<ParamAttrTy> ParamAttrs, 11539 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11540 struct ISADataTy { 11541 char ISA; 11542 unsigned VecRegSize; 11543 }; 11544 ISADataTy ISAData[] = { 11545 { 11546 'b', 128 11547 }, // SSE 11548 { 11549 'c', 256 11550 }, // AVX 11551 { 11552 'd', 256 11553 }, // AVX2 11554 { 11555 'e', 512 11556 }, // AVX512 11557 }; 11558 llvm::SmallVector<char, 2> Masked; 11559 switch (State) { 11560 case OMPDeclareSimdDeclAttr::BS_Undefined: 11561 Masked.push_back('N'); 11562 Masked.push_back('M'); 11563 break; 11564 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11565 Masked.push_back('N'); 11566 break; 11567 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11568 Masked.push_back('M'); 11569 break; 11570 } 11571 for (char Mask : Masked) { 11572 for (const ISADataTy &Data : ISAData) { 11573 SmallString<256> Buffer; 11574 llvm::raw_svector_ostream Out(Buffer); 11575 Out << "_ZGV" << Data.ISA << Mask; 11576 if (!VLENVal) { 11577 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11578 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11579 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11580 } else { 11581 Out << VLENVal; 11582 } 11583 Out << mangleVectorParameters(ParamAttrs); 11584 Out << '_' << Fn->getName(); 11585 Fn->addFnAttr(Out.str()); 11586 } 11587 } 11588 } 11589 11590 // This are the Functions that are needed to mangle the name of the 11591 // vector functions generated by the compiler, according to the rules 11592 // defined in the "Vector Function ABI specifications for AArch64", 11593 // available at 11594 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11595 11596 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). 11597 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11598 QT = QT.getCanonicalType(); 11599 11600 if (QT->isVoidType()) 11601 return false; 11602 11603 if (Kind == ParamKindTy::Uniform) 11604 return false; 11605 11606 if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef) 11607 return false; 11608 11609 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 11610 !QT->isReferenceType()) 11611 return false; 11612 11613 return true; 11614 } 11615 11616 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11617 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11618 QT = QT.getCanonicalType(); 11619 unsigned Size = C.getTypeSize(QT); 11620 11621 // Only scalars and complex within 16 bytes wide set PVB to true. 11622 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11623 return false; 11624 11625 if (QT->isFloatingType()) 11626 return true; 11627 11628 if (QT->isIntegerType()) 11629 return true; 11630 11631 if (QT->isPointerType()) 11632 return true; 11633 11634 // TODO: Add support for complex types (section 3.1.2, item 2). 11635 11636 return false; 11637 } 11638 11639 /// Computes the lane size (LS) of a return type or of an input parameter, 11640 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11641 /// TODO: Add support for references, section 3.2.1, item 1. 11642 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11643 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11644 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11645 if (getAArch64PBV(PTy, C)) 11646 return C.getTypeSize(PTy); 11647 } 11648 if (getAArch64PBV(QT, C)) 11649 return C.getTypeSize(QT); 11650 11651 return C.getTypeSize(C.getUIntPtrType()); 11652 } 11653 11654 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11655 // signature of the scalar function, as defined in 3.2.2 of the 11656 // AAVFABI. 11657 static std::tuple<unsigned, unsigned, bool> 11658 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11659 QualType RetType = FD->getReturnType().getCanonicalType(); 11660 11661 ASTContext &C = FD->getASTContext(); 11662 11663 bool OutputBecomesInput = false; 11664 11665 llvm::SmallVector<unsigned, 8> Sizes; 11666 if (!RetType->isVoidType()) { 11667 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11668 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11669 OutputBecomesInput = true; 11670 } 11671 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11672 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11673 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11674 } 11675 11676 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11677 // The LS of a function parameter / return value can only be a power 11678 // of 2, starting from 8 bits, up to 128. 11679 assert(llvm::all_of(Sizes, 11680 [](unsigned Size) { 11681 return Size == 8 || Size == 16 || Size == 32 || 11682 Size == 64 || Size == 128; 11683 }) && 11684 "Invalid size"); 11685 11686 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11687 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11688 OutputBecomesInput); 11689 } 11690 11691 // Function used to add the attribute. The parameter `VLEN` is 11692 // templated to allow the use of "x" when targeting scalable functions 11693 // for SVE. 11694 template <typename T> 11695 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11696 char ISA, StringRef ParSeq, 11697 StringRef MangledName, bool OutputBecomesInput, 11698 llvm::Function *Fn) { 11699 SmallString<256> Buffer; 11700 llvm::raw_svector_ostream Out(Buffer); 11701 Out << Prefix << ISA << LMask << VLEN; 11702 if (OutputBecomesInput) 11703 Out << "v"; 11704 Out << ParSeq << "_" << MangledName; 11705 Fn->addFnAttr(Out.str()); 11706 } 11707 11708 // Helper function to generate the Advanced SIMD names depending on 11709 // the value of the NDS when simdlen is not present. 11710 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11711 StringRef Prefix, char ISA, 11712 StringRef ParSeq, StringRef MangledName, 11713 bool OutputBecomesInput, 11714 llvm::Function *Fn) { 11715 switch (NDS) { 11716 case 8: 11717 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11718 OutputBecomesInput, Fn); 11719 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11720 OutputBecomesInput, Fn); 11721 break; 11722 case 16: 11723 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11724 OutputBecomesInput, Fn); 11725 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11726 OutputBecomesInput, Fn); 11727 break; 11728 case 32: 11729 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11730 OutputBecomesInput, Fn); 11731 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11732 OutputBecomesInput, Fn); 11733 break; 11734 case 64: 11735 case 128: 11736 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11737 OutputBecomesInput, Fn); 11738 break; 11739 default: 11740 llvm_unreachable("Scalar type is too wide."); 11741 } 11742 } 11743 11744 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11745 static void emitAArch64DeclareSimdFunction( 11746 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11747 ArrayRef<ParamAttrTy> ParamAttrs, 11748 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11749 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11750 11751 // Get basic data for building the vector signature. 11752 const auto Data = getNDSWDS(FD, ParamAttrs); 11753 const unsigned NDS = std::get<0>(Data); 11754 const unsigned WDS = std::get<1>(Data); 11755 const bool OutputBecomesInput = std::get<2>(Data); 11756 11757 // Check the values provided via `simdlen` by the user. 11758 // 1. A `simdlen(1)` doesn't produce vector signatures, 11759 if (UserVLEN == 1) { 11760 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11761 DiagnosticsEngine::Warning, 11762 "The clause simdlen(1) has no effect when targeting aarch64."); 11763 CGM.getDiags().Report(SLoc, DiagID); 11764 return; 11765 } 11766 11767 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11768 // Advanced SIMD output. 11769 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11770 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11771 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11772 "power of 2 when targeting Advanced SIMD."); 11773 CGM.getDiags().Report(SLoc, DiagID); 11774 return; 11775 } 11776 11777 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11778 // limits. 11779 if (ISA == 's' && UserVLEN != 0) { 11780 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11781 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11782 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11783 "lanes in the architectural constraints " 11784 "for SVE (min is 128-bit, max is " 11785 "2048-bit, by steps of 128-bit)"); 11786 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11787 return; 11788 } 11789 } 11790 11791 // Sort out parameter sequence. 11792 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11793 StringRef Prefix = "_ZGV"; 11794 // Generate simdlen from user input (if any). 11795 if (UserVLEN) { 11796 if (ISA == 's') { 11797 // SVE generates only a masked function. 11798 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11799 OutputBecomesInput, Fn); 11800 } else { 11801 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11802 // Advanced SIMD generates one or two functions, depending on 11803 // the `[not]inbranch` clause. 11804 switch (State) { 11805 case OMPDeclareSimdDeclAttr::BS_Undefined: 11806 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11807 OutputBecomesInput, Fn); 11808 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11809 OutputBecomesInput, Fn); 11810 break; 11811 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11812 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11813 OutputBecomesInput, Fn); 11814 break; 11815 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11816 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11817 OutputBecomesInput, Fn); 11818 break; 11819 } 11820 } 11821 } else { 11822 // If no user simdlen is provided, follow the AAVFABI rules for 11823 // generating the vector length. 11824 if (ISA == 's') { 11825 // SVE, section 3.4.1, item 1. 11826 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11827 OutputBecomesInput, Fn); 11828 } else { 11829 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11830 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11831 // two vector names depending on the use of the clause 11832 // `[not]inbranch`. 11833 switch (State) { 11834 case OMPDeclareSimdDeclAttr::BS_Undefined: 11835 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11836 OutputBecomesInput, Fn); 11837 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11838 OutputBecomesInput, Fn); 11839 break; 11840 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11841 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11842 OutputBecomesInput, Fn); 11843 break; 11844 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11845 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11846 OutputBecomesInput, Fn); 11847 break; 11848 } 11849 } 11850 } 11851 } 11852 11853 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11854 llvm::Function *Fn) { 11855 ASTContext &C = CGM.getContext(); 11856 FD = FD->getMostRecentDecl(); 11857 while (FD) { 11858 // Map params to their positions in function decl. 11859 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11860 if (isa<CXXMethodDecl>(FD)) 11861 ParamPositions.try_emplace(FD, 0); 11862 unsigned ParamPos = ParamPositions.size(); 11863 for (const ParmVarDecl *P : FD->parameters()) { 11864 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11865 ++ParamPos; 11866 } 11867 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11868 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11869 // Mark uniform parameters. 11870 for (const Expr *E : Attr->uniforms()) { 11871 E = E->IgnoreParenImpCasts(); 11872 unsigned Pos; 11873 if (isa<CXXThisExpr>(E)) { 11874 Pos = ParamPositions[FD]; 11875 } else { 11876 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11877 ->getCanonicalDecl(); 11878 auto It = ParamPositions.find(PVD); 11879 assert(It != ParamPositions.end() && "Function parameter not found"); 11880 Pos = It->second; 11881 } 11882 ParamAttrs[Pos].Kind = Uniform; 11883 } 11884 // Get alignment info. 11885 auto *NI = Attr->alignments_begin(); 11886 for (const Expr *E : Attr->aligneds()) { 11887 E = E->IgnoreParenImpCasts(); 11888 unsigned Pos; 11889 QualType ParmTy; 11890 if (isa<CXXThisExpr>(E)) { 11891 Pos = ParamPositions[FD]; 11892 ParmTy = E->getType(); 11893 } else { 11894 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11895 ->getCanonicalDecl(); 11896 auto It = ParamPositions.find(PVD); 11897 assert(It != ParamPositions.end() && "Function parameter not found"); 11898 Pos = It->second; 11899 ParmTy = PVD->getType(); 11900 } 11901 ParamAttrs[Pos].Alignment = 11902 (*NI) 11903 ? (*NI)->EvaluateKnownConstInt(C) 11904 : llvm::APSInt::getUnsigned( 11905 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11906 .getQuantity()); 11907 ++NI; 11908 } 11909 // Mark linear parameters. 11910 auto *SI = Attr->steps_begin(); 11911 auto *MI = Attr->modifiers_begin(); 11912 for (const Expr *E : Attr->linears()) { 11913 E = E->IgnoreParenImpCasts(); 11914 unsigned Pos; 11915 bool IsReferenceType = false; 11916 // Rescaling factor needed to compute the linear parameter 11917 // value in the mangled name. 11918 unsigned PtrRescalingFactor = 1; 11919 if (isa<CXXThisExpr>(E)) { 11920 Pos = ParamPositions[FD]; 11921 auto *P = cast<PointerType>(E->getType()); 11922 PtrRescalingFactor = CGM.getContext() 11923 .getTypeSizeInChars(P->getPointeeType()) 11924 .getQuantity(); 11925 } else { 11926 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11927 ->getCanonicalDecl(); 11928 auto It = ParamPositions.find(PVD); 11929 assert(It != ParamPositions.end() && "Function parameter not found"); 11930 Pos = It->second; 11931 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11932 PtrRescalingFactor = CGM.getContext() 11933 .getTypeSizeInChars(P->getPointeeType()) 11934 .getQuantity(); 11935 else if (PVD->getType()->isReferenceType()) { 11936 IsReferenceType = true; 11937 PtrRescalingFactor = 11938 CGM.getContext() 11939 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 11940 .getQuantity(); 11941 } 11942 } 11943 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11944 if (*MI == OMPC_LINEAR_ref) 11945 ParamAttr.Kind = LinearRef; 11946 else if (*MI == OMPC_LINEAR_uval) 11947 ParamAttr.Kind = LinearUVal; 11948 else if (IsReferenceType) 11949 ParamAttr.Kind = LinearVal; 11950 else 11951 ParamAttr.Kind = Linear; 11952 // Assuming a stride of 1, for `linear` without modifiers. 11953 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11954 if (*SI) { 11955 Expr::EvalResult Result; 11956 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11957 if (const auto *DRE = 11958 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11959 if (const auto *StridePVD = 11960 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11961 ParamAttr.HasVarStride = true; 11962 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11963 assert(It != ParamPositions.end() && 11964 "Function parameter not found"); 11965 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11966 } 11967 } 11968 } else { 11969 ParamAttr.StrideOrArg = Result.Val.getInt(); 11970 } 11971 } 11972 // If we are using a linear clause on a pointer, we need to 11973 // rescale the value of linear_step with the byte size of the 11974 // pointee type. 11975 if (!ParamAttr.HasVarStride && 11976 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 11977 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11978 ++SI; 11979 ++MI; 11980 } 11981 llvm::APSInt VLENVal; 11982 SourceLocation ExprLoc; 11983 const Expr *VLENExpr = Attr->getSimdlen(); 11984 if (VLENExpr) { 11985 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11986 ExprLoc = VLENExpr->getExprLoc(); 11987 } 11988 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11989 if (CGM.getTriple().isX86()) { 11990 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11991 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11992 unsigned VLEN = VLENVal.getExtValue(); 11993 StringRef MangledName = Fn->getName(); 11994 if (CGM.getTarget().hasFeature("sve")) 11995 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11996 MangledName, 's', 128, Fn, ExprLoc); 11997 if (CGM.getTarget().hasFeature("neon")) 11998 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11999 MangledName, 'n', 128, Fn, ExprLoc); 12000 } 12001 } 12002 FD = FD->getPreviousDecl(); 12003 } 12004 } 12005 12006 namespace { 12007 /// Cleanup action for doacross support. 12008 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12009 public: 12010 static const int DoacrossFinArgs = 2; 12011 12012 private: 12013 llvm::FunctionCallee RTLFn; 12014 llvm::Value *Args[DoacrossFinArgs]; 12015 12016 public: 12017 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12018 ArrayRef<llvm::Value *> CallArgs) 12019 : RTLFn(RTLFn) { 12020 assert(CallArgs.size() == DoacrossFinArgs); 12021 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12022 } 12023 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12024 if (!CGF.HaveInsertPoint()) 12025 return; 12026 CGF.EmitRuntimeCall(RTLFn, Args); 12027 } 12028 }; 12029 } // namespace 12030 12031 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12032 const OMPLoopDirective &D, 12033 ArrayRef<Expr *> NumIterations) { 12034 if (!CGF.HaveInsertPoint()) 12035 return; 12036 12037 ASTContext &C = CGM.getContext(); 12038 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12039 RecordDecl *RD; 12040 if (KmpDimTy.isNull()) { 12041 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12042 // kmp_int64 lo; // lower 12043 // kmp_int64 up; // upper 12044 // kmp_int64 st; // stride 12045 // }; 12046 RD = C.buildImplicitRecord("kmp_dim"); 12047 RD->startDefinition(); 12048 addFieldToRecordDecl(C, RD, Int64Ty); 12049 addFieldToRecordDecl(C, RD, Int64Ty); 12050 addFieldToRecordDecl(C, RD, Int64Ty); 12051 RD->completeDefinition(); 12052 KmpDimTy = C.getRecordType(RD); 12053 } else { 12054 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12055 } 12056 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12057 QualType ArrayTy = 12058 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12059 12060 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12061 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12062 enum { LowerFD = 0, UpperFD, StrideFD }; 12063 // Fill dims with data. 12064 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12065 LValue DimsLVal = CGF.MakeAddrLValue( 12066 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12067 // dims.upper = num_iterations; 12068 LValue UpperLVal = CGF.EmitLValueForField( 12069 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12070 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12071 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12072 Int64Ty, NumIterations[I]->getExprLoc()); 12073 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12074 // dims.stride = 1; 12075 LValue StrideLVal = CGF.EmitLValueForField( 12076 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12077 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12078 StrideLVal); 12079 } 12080 12081 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12082 // kmp_int32 num_dims, struct kmp_dim * dims); 12083 llvm::Value *Args[] = { 12084 emitUpdateLocation(CGF, D.getBeginLoc()), 12085 getThreadID(CGF, D.getBeginLoc()), 12086 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12087 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12088 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12089 CGM.VoidPtrTy)}; 12090 12091 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12092 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12093 CGF.EmitRuntimeCall(RTLFn, Args); 12094 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12095 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12096 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12097 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12098 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12099 llvm::makeArrayRef(FiniArgs)); 12100 } 12101 12102 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12103 const OMPDependClause *C) { 12104 QualType Int64Ty = 12105 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12106 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12107 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12108 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12109 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12110 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12111 const Expr *CounterVal = C->getLoopData(I); 12112 assert(CounterVal); 12113 llvm::Value *CntVal = CGF.EmitScalarConversion( 12114 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12115 CounterVal->getExprLoc()); 12116 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12117 /*Volatile=*/false, Int64Ty); 12118 } 12119 llvm::Value *Args[] = { 12120 emitUpdateLocation(CGF, C->getBeginLoc()), 12121 getThreadID(CGF, C->getBeginLoc()), 12122 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12123 llvm::FunctionCallee RTLFn; 12124 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12125 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12126 OMPRTL___kmpc_doacross_post); 12127 } else { 12128 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12129 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12130 OMPRTL___kmpc_doacross_wait); 12131 } 12132 CGF.EmitRuntimeCall(RTLFn, Args); 12133 } 12134 12135 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12136 llvm::FunctionCallee Callee, 12137 ArrayRef<llvm::Value *> Args) const { 12138 assert(Loc.isValid() && "Outlined function call location must be valid."); 12139 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12140 12141 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12142 if (Fn->doesNotThrow()) { 12143 CGF.EmitNounwindRuntimeCall(Fn, Args); 12144 return; 12145 } 12146 } 12147 CGF.EmitRuntimeCall(Callee, Args); 12148 } 12149 12150 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12151 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12152 ArrayRef<llvm::Value *> Args) const { 12153 emitCall(CGF, Loc, OutlinedFn, Args); 12154 } 12155 12156 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12157 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12158 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12159 HasEmittedDeclareTargetRegion = true; 12160 } 12161 12162 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12163 const VarDecl *NativeParam, 12164 const VarDecl *TargetParam) const { 12165 return CGF.GetAddrOfLocalVar(NativeParam); 12166 } 12167 12168 /// Return allocator value from expression, or return a null allocator (default 12169 /// when no allocator specified). 12170 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12171 const Expr *Allocator) { 12172 llvm::Value *AllocVal; 12173 if (Allocator) { 12174 AllocVal = CGF.EmitScalarExpr(Allocator); 12175 // According to the standard, the original allocator type is a enum 12176 // (integer). Convert to pointer type, if required. 12177 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12178 CGF.getContext().VoidPtrTy, 12179 Allocator->getExprLoc()); 12180 } else { 12181 // If no allocator specified, it defaults to the null allocator. 12182 AllocVal = llvm::Constant::getNullValue( 12183 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12184 } 12185 return AllocVal; 12186 } 12187 12188 /// Return the alignment from an allocate directive if present. 12189 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 12190 llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 12191 12192 if (!AllocateAlignment) 12193 return nullptr; 12194 12195 return llvm::ConstantInt::get(CGM.SizeTy, 12196 AllocateAlignment.getValue().getQuantity()); 12197 } 12198 12199 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12200 const VarDecl *VD) { 12201 if (!VD) 12202 return Address::invalid(); 12203 Address UntiedAddr = Address::invalid(); 12204 Address UntiedRealAddr = Address::invalid(); 12205 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12206 if (It != FunctionToUntiedTaskStackMap.end()) { 12207 const UntiedLocalVarsAddressesMap &UntiedData = 12208 UntiedLocalVarsStack[It->second]; 12209 auto I = UntiedData.find(VD); 12210 if (I != UntiedData.end()) { 12211 UntiedAddr = I->second.first; 12212 UntiedRealAddr = I->second.second; 12213 } 12214 } 12215 const VarDecl *CVD = VD->getCanonicalDecl(); 12216 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12217 // Use the default allocation. 12218 if (!isAllocatableDecl(VD)) 12219 return UntiedAddr; 12220 llvm::Value *Size; 12221 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12222 if (CVD->getType()->isVariablyModifiedType()) { 12223 Size = CGF.getTypeSize(CVD->getType()); 12224 // Align the size: ((size + align - 1) / align) * align 12225 Size = CGF.Builder.CreateNUWAdd( 12226 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12227 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12228 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12229 } else { 12230 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12231 Size = CGM.getSize(Sz.alignTo(Align)); 12232 } 12233 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12234 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12235 const Expr *Allocator = AA->getAllocator(); 12236 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12237 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 12238 SmallVector<llvm::Value *, 4> Args; 12239 Args.push_back(ThreadID); 12240 if (Alignment) 12241 Args.push_back(Alignment); 12242 Args.push_back(Size); 12243 Args.push_back(AllocVal); 12244 llvm::omp::RuntimeFunction FnID = 12245 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12246 llvm::Value *Addr = CGF.EmitRuntimeCall( 12247 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12248 getName({CVD->getName(), ".void.addr"})); 12249 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12250 CGM.getModule(), OMPRTL___kmpc_free); 12251 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12252 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12253 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12254 if (UntiedAddr.isValid()) 12255 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12256 12257 // Cleanup action for allocate support. 12258 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12259 llvm::FunctionCallee RTLFn; 12260 SourceLocation::UIntTy LocEncoding; 12261 Address Addr; 12262 const Expr *AllocExpr; 12263 12264 public: 12265 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12266 SourceLocation::UIntTy LocEncoding, Address Addr, 12267 const Expr *AllocExpr) 12268 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12269 AllocExpr(AllocExpr) {} 12270 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12271 if (!CGF.HaveInsertPoint()) 12272 return; 12273 llvm::Value *Args[3]; 12274 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12275 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12276 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12277 Addr.getPointer(), CGF.VoidPtrTy); 12278 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12279 Args[2] = AllocVal; 12280 CGF.EmitRuntimeCall(RTLFn, Args); 12281 } 12282 }; 12283 Address VDAddr = 12284 UntiedRealAddr.isValid() 12285 ? UntiedRealAddr 12286 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 12287 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12288 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12289 VDAddr, Allocator); 12290 if (UntiedRealAddr.isValid()) 12291 if (auto *Region = 12292 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12293 Region->emitUntiedSwitch(CGF); 12294 return VDAddr; 12295 } 12296 return UntiedAddr; 12297 } 12298 12299 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12300 const VarDecl *VD) const { 12301 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12302 if (It == FunctionToUntiedTaskStackMap.end()) 12303 return false; 12304 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12305 } 12306 12307 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12308 CodeGenModule &CGM, const OMPLoopDirective &S) 12309 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12310 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12311 if (!NeedToPush) 12312 return; 12313 NontemporalDeclsSet &DS = 12314 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12315 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12316 for (const Stmt *Ref : C->private_refs()) { 12317 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12318 const ValueDecl *VD; 12319 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12320 VD = DRE->getDecl(); 12321 } else { 12322 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12323 assert((ME->isImplicitCXXThis() || 12324 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12325 "Expected member of current class."); 12326 VD = ME->getMemberDecl(); 12327 } 12328 DS.insert(VD); 12329 } 12330 } 12331 } 12332 12333 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12334 if (!NeedToPush) 12335 return; 12336 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12337 } 12338 12339 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12340 CodeGenFunction &CGF, 12341 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12342 std::pair<Address, Address>> &LocalVars) 12343 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12344 if (!NeedToPush) 12345 return; 12346 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12347 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12348 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12349 } 12350 12351 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12352 if (!NeedToPush) 12353 return; 12354 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12355 } 12356 12357 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12358 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12359 12360 return llvm::any_of( 12361 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12362 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12363 } 12364 12365 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12366 const OMPExecutableDirective &S, 12367 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12368 const { 12369 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12370 // Vars in target/task regions must be excluded completely. 12371 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12372 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12373 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12374 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12375 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12376 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12377 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12378 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12379 } 12380 } 12381 // Exclude vars in private clauses. 12382 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12383 for (const Expr *Ref : C->varlists()) { 12384 if (!Ref->getType()->isScalarType()) 12385 continue; 12386 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12387 if (!DRE) 12388 continue; 12389 NeedToCheckForLPCs.insert(DRE->getDecl()); 12390 } 12391 } 12392 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12393 for (const Expr *Ref : C->varlists()) { 12394 if (!Ref->getType()->isScalarType()) 12395 continue; 12396 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12397 if (!DRE) 12398 continue; 12399 NeedToCheckForLPCs.insert(DRE->getDecl()); 12400 } 12401 } 12402 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12403 for (const Expr *Ref : C->varlists()) { 12404 if (!Ref->getType()->isScalarType()) 12405 continue; 12406 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12407 if (!DRE) 12408 continue; 12409 NeedToCheckForLPCs.insert(DRE->getDecl()); 12410 } 12411 } 12412 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12413 for (const Expr *Ref : C->varlists()) { 12414 if (!Ref->getType()->isScalarType()) 12415 continue; 12416 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12417 if (!DRE) 12418 continue; 12419 NeedToCheckForLPCs.insert(DRE->getDecl()); 12420 } 12421 } 12422 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12423 for (const Expr *Ref : C->varlists()) { 12424 if (!Ref->getType()->isScalarType()) 12425 continue; 12426 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12427 if (!DRE) 12428 continue; 12429 NeedToCheckForLPCs.insert(DRE->getDecl()); 12430 } 12431 } 12432 for (const Decl *VD : NeedToCheckForLPCs) { 12433 for (const LastprivateConditionalData &Data : 12434 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12435 if (Data.DeclToUniqueName.count(VD) > 0) { 12436 if (!Data.Disabled) 12437 NeedToAddForLPCsAsDisabled.insert(VD); 12438 break; 12439 } 12440 } 12441 } 12442 } 12443 12444 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12445 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12446 : CGM(CGF.CGM), 12447 Action((CGM.getLangOpts().OpenMP >= 50 && 12448 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12449 [](const OMPLastprivateClause *C) { 12450 return C->getKind() == 12451 OMPC_LASTPRIVATE_conditional; 12452 })) 12453 ? ActionToDo::PushAsLastprivateConditional 12454 : ActionToDo::DoNotPush) { 12455 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12456 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12457 return; 12458 assert(Action == ActionToDo::PushAsLastprivateConditional && 12459 "Expected a push action."); 12460 LastprivateConditionalData &Data = 12461 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12462 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12463 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12464 continue; 12465 12466 for (const Expr *Ref : C->varlists()) { 12467 Data.DeclToUniqueName.insert(std::make_pair( 12468 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12469 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12470 } 12471 } 12472 Data.IVLVal = IVLVal; 12473 Data.Fn = CGF.CurFn; 12474 } 12475 12476 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12477 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12478 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12479 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12480 if (CGM.getLangOpts().OpenMP < 50) 12481 return; 12482 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12483 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12484 if (!NeedToAddForLPCsAsDisabled.empty()) { 12485 Action = ActionToDo::DisableLastprivateConditional; 12486 LastprivateConditionalData &Data = 12487 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12488 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12489 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12490 Data.Fn = CGF.CurFn; 12491 Data.Disabled = true; 12492 } 12493 } 12494 12495 CGOpenMPRuntime::LastprivateConditionalRAII 12496 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12497 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12498 return LastprivateConditionalRAII(CGF, S); 12499 } 12500 12501 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12502 if (CGM.getLangOpts().OpenMP < 50) 12503 return; 12504 if (Action == ActionToDo::DisableLastprivateConditional) { 12505 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12506 "Expected list of disabled private vars."); 12507 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12508 } 12509 if (Action == ActionToDo::PushAsLastprivateConditional) { 12510 assert( 12511 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12512 "Expected list of lastprivate conditional vars."); 12513 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12514 } 12515 } 12516 12517 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12518 const VarDecl *VD) { 12519 ASTContext &C = CGM.getContext(); 12520 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12521 if (I == LastprivateConditionalToTypes.end()) 12522 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12523 QualType NewType; 12524 const FieldDecl *VDField; 12525 const FieldDecl *FiredField; 12526 LValue BaseLVal; 12527 auto VI = I->getSecond().find(VD); 12528 if (VI == I->getSecond().end()) { 12529 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12530 RD->startDefinition(); 12531 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12532 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12533 RD->completeDefinition(); 12534 NewType = C.getRecordType(RD); 12535 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12536 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12537 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12538 } else { 12539 NewType = std::get<0>(VI->getSecond()); 12540 VDField = std::get<1>(VI->getSecond()); 12541 FiredField = std::get<2>(VI->getSecond()); 12542 BaseLVal = std::get<3>(VI->getSecond()); 12543 } 12544 LValue FiredLVal = 12545 CGF.EmitLValueForField(BaseLVal, FiredField); 12546 CGF.EmitStoreOfScalar( 12547 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12548 FiredLVal); 12549 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12550 } 12551 12552 namespace { 12553 /// Checks if the lastprivate conditional variable is referenced in LHS. 12554 class LastprivateConditionalRefChecker final 12555 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12556 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12557 const Expr *FoundE = nullptr; 12558 const Decl *FoundD = nullptr; 12559 StringRef UniqueDeclName; 12560 LValue IVLVal; 12561 llvm::Function *FoundFn = nullptr; 12562 SourceLocation Loc; 12563 12564 public: 12565 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12566 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12567 llvm::reverse(LPM)) { 12568 auto It = D.DeclToUniqueName.find(E->getDecl()); 12569 if (It == D.DeclToUniqueName.end()) 12570 continue; 12571 if (D.Disabled) 12572 return false; 12573 FoundE = E; 12574 FoundD = E->getDecl()->getCanonicalDecl(); 12575 UniqueDeclName = It->second; 12576 IVLVal = D.IVLVal; 12577 FoundFn = D.Fn; 12578 break; 12579 } 12580 return FoundE == E; 12581 } 12582 bool VisitMemberExpr(const MemberExpr *E) { 12583 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12584 return false; 12585 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12586 llvm::reverse(LPM)) { 12587 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12588 if (It == D.DeclToUniqueName.end()) 12589 continue; 12590 if (D.Disabled) 12591 return false; 12592 FoundE = E; 12593 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12594 UniqueDeclName = It->second; 12595 IVLVal = D.IVLVal; 12596 FoundFn = D.Fn; 12597 break; 12598 } 12599 return FoundE == E; 12600 } 12601 bool VisitStmt(const Stmt *S) { 12602 for (const Stmt *Child : S->children()) { 12603 if (!Child) 12604 continue; 12605 if (const auto *E = dyn_cast<Expr>(Child)) 12606 if (!E->isGLValue()) 12607 continue; 12608 if (Visit(Child)) 12609 return true; 12610 } 12611 return false; 12612 } 12613 explicit LastprivateConditionalRefChecker( 12614 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12615 : LPM(LPM) {} 12616 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12617 getFoundData() const { 12618 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12619 } 12620 }; 12621 } // namespace 12622 12623 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12624 LValue IVLVal, 12625 StringRef UniqueDeclName, 12626 LValue LVal, 12627 SourceLocation Loc) { 12628 // Last updated loop counter for the lastprivate conditional var. 12629 // int<xx> last_iv = 0; 12630 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12631 llvm::Constant *LastIV = 12632 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12633 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12634 IVLVal.getAlignment().getAsAlign()); 12635 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12636 12637 // Last value of the lastprivate conditional. 12638 // decltype(priv_a) last_a; 12639 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12640 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12641 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12642 LValue LastLVal = CGF.MakeAddrLValue( 12643 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12644 12645 // Global loop counter. Required to handle inner parallel-for regions. 12646 // iv 12647 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12648 12649 // #pragma omp critical(a) 12650 // if (last_iv <= iv) { 12651 // last_iv = iv; 12652 // last_a = priv_a; 12653 // } 12654 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12655 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12656 Action.Enter(CGF); 12657 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12658 // (last_iv <= iv) ? Check if the variable is updated and store new 12659 // value in global var. 12660 llvm::Value *CmpRes; 12661 if (IVLVal.getType()->isSignedIntegerType()) { 12662 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12663 } else { 12664 assert(IVLVal.getType()->isUnsignedIntegerType() && 12665 "Loop iteration variable must be integer."); 12666 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12667 } 12668 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12669 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12670 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12671 // { 12672 CGF.EmitBlock(ThenBB); 12673 12674 // last_iv = iv; 12675 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12676 12677 // last_a = priv_a; 12678 switch (CGF.getEvaluationKind(LVal.getType())) { 12679 case TEK_Scalar: { 12680 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12681 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12682 break; 12683 } 12684 case TEK_Complex: { 12685 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12686 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12687 break; 12688 } 12689 case TEK_Aggregate: 12690 llvm_unreachable( 12691 "Aggregates are not supported in lastprivate conditional."); 12692 } 12693 // } 12694 CGF.EmitBranch(ExitBB); 12695 // There is no need to emit line number for unconditional branch. 12696 (void)ApplyDebugLocation::CreateEmpty(CGF); 12697 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12698 }; 12699 12700 if (CGM.getLangOpts().OpenMPSimd) { 12701 // Do not emit as a critical region as no parallel region could be emitted. 12702 RegionCodeGenTy ThenRCG(CodeGen); 12703 ThenRCG(CGF); 12704 } else { 12705 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12706 } 12707 } 12708 12709 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12710 const Expr *LHS) { 12711 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12712 return; 12713 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12714 if (!Checker.Visit(LHS)) 12715 return; 12716 const Expr *FoundE; 12717 const Decl *FoundD; 12718 StringRef UniqueDeclName; 12719 LValue IVLVal; 12720 llvm::Function *FoundFn; 12721 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12722 Checker.getFoundData(); 12723 if (FoundFn != CGF.CurFn) { 12724 // Special codegen for inner parallel regions. 12725 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12726 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12727 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12728 "Lastprivate conditional is not found in outer region."); 12729 QualType StructTy = std::get<0>(It->getSecond()); 12730 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12731 LValue PrivLVal = CGF.EmitLValue(FoundE); 12732 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12733 PrivLVal.getAddress(CGF), 12734 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12735 CGF.ConvertTypeForMem(StructTy)); 12736 LValue BaseLVal = 12737 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12738 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12739 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12740 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12741 FiredLVal, llvm::AtomicOrdering::Unordered, 12742 /*IsVolatile=*/true, /*isInit=*/false); 12743 return; 12744 } 12745 12746 // Private address of the lastprivate conditional in the current context. 12747 // priv_a 12748 LValue LVal = CGF.EmitLValue(FoundE); 12749 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12750 FoundE->getExprLoc()); 12751 } 12752 12753 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12754 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12755 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12756 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12757 return; 12758 auto Range = llvm::reverse(LastprivateConditionalStack); 12759 auto It = llvm::find_if( 12760 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12761 if (It == Range.end() || It->Fn != CGF.CurFn) 12762 return; 12763 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12764 assert(LPCI != LastprivateConditionalToTypes.end() && 12765 "Lastprivates must be registered already."); 12766 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12767 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12768 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12769 for (const auto &Pair : It->DeclToUniqueName) { 12770 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12771 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12772 continue; 12773 auto I = LPCI->getSecond().find(Pair.first); 12774 assert(I != LPCI->getSecond().end() && 12775 "Lastprivate must be rehistered already."); 12776 // bool Cmp = priv_a.Fired != 0; 12777 LValue BaseLVal = std::get<3>(I->getSecond()); 12778 LValue FiredLVal = 12779 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12780 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12781 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12782 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12783 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12784 // if (Cmp) { 12785 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12786 CGF.EmitBlock(ThenBB); 12787 Address Addr = CGF.GetAddrOfLocalVar(VD); 12788 LValue LVal; 12789 if (VD->getType()->isReferenceType()) 12790 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12791 AlignmentSource::Decl); 12792 else 12793 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12794 AlignmentSource::Decl); 12795 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12796 D.getBeginLoc()); 12797 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12798 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12799 // } 12800 } 12801 } 12802 12803 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12804 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12805 SourceLocation Loc) { 12806 if (CGF.getLangOpts().OpenMP < 50) 12807 return; 12808 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12809 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12810 "Unknown lastprivate conditional variable."); 12811 StringRef UniqueName = It->second; 12812 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12813 // The variable was not updated in the region - exit. 12814 if (!GV) 12815 return; 12816 LValue LPLVal = CGF.MakeAddrLValue( 12817 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12818 PrivLVal.getType().getNonReferenceType()); 12819 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12820 CGF.EmitStoreOfScalar(Res, PrivLVal); 12821 } 12822 12823 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12824 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12825 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12826 llvm_unreachable("Not supported in SIMD-only mode"); 12827 } 12828 12829 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12830 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12831 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12832 llvm_unreachable("Not supported in SIMD-only mode"); 12833 } 12834 12835 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12836 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12837 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12838 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12839 bool Tied, unsigned &NumberOfParts) { 12840 llvm_unreachable("Not supported in SIMD-only mode"); 12841 } 12842 12843 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12844 SourceLocation Loc, 12845 llvm::Function *OutlinedFn, 12846 ArrayRef<llvm::Value *> CapturedVars, 12847 const Expr *IfCond, 12848 llvm::Value *NumThreads) { 12849 llvm_unreachable("Not supported in SIMD-only mode"); 12850 } 12851 12852 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12853 CodeGenFunction &CGF, StringRef CriticalName, 12854 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12855 const Expr *Hint) { 12856 llvm_unreachable("Not supported in SIMD-only mode"); 12857 } 12858 12859 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12860 const RegionCodeGenTy &MasterOpGen, 12861 SourceLocation Loc) { 12862 llvm_unreachable("Not supported in SIMD-only mode"); 12863 } 12864 12865 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12866 const RegionCodeGenTy &MasterOpGen, 12867 SourceLocation Loc, 12868 const Expr *Filter) { 12869 llvm_unreachable("Not supported in SIMD-only mode"); 12870 } 12871 12872 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12873 SourceLocation Loc) { 12874 llvm_unreachable("Not supported in SIMD-only mode"); 12875 } 12876 12877 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12878 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12879 SourceLocation Loc) { 12880 llvm_unreachable("Not supported in SIMD-only mode"); 12881 } 12882 12883 void CGOpenMPSIMDRuntime::emitSingleRegion( 12884 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12885 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12886 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12887 ArrayRef<const Expr *> AssignmentOps) { 12888 llvm_unreachable("Not supported in SIMD-only mode"); 12889 } 12890 12891 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12892 const RegionCodeGenTy &OrderedOpGen, 12893 SourceLocation Loc, 12894 bool IsThreads) { 12895 llvm_unreachable("Not supported in SIMD-only mode"); 12896 } 12897 12898 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12899 SourceLocation Loc, 12900 OpenMPDirectiveKind Kind, 12901 bool EmitChecks, 12902 bool ForceSimpleCall) { 12903 llvm_unreachable("Not supported in SIMD-only mode"); 12904 } 12905 12906 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12907 CodeGenFunction &CGF, SourceLocation Loc, 12908 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12909 bool Ordered, const DispatchRTInput &DispatchValues) { 12910 llvm_unreachable("Not supported in SIMD-only mode"); 12911 } 12912 12913 void CGOpenMPSIMDRuntime::emitForStaticInit( 12914 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12915 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12916 llvm_unreachable("Not supported in SIMD-only mode"); 12917 } 12918 12919 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12920 CodeGenFunction &CGF, SourceLocation Loc, 12921 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12922 llvm_unreachable("Not supported in SIMD-only mode"); 12923 } 12924 12925 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12926 SourceLocation Loc, 12927 unsigned IVSize, 12928 bool IVSigned) { 12929 llvm_unreachable("Not supported in SIMD-only mode"); 12930 } 12931 12932 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12933 SourceLocation Loc, 12934 OpenMPDirectiveKind DKind) { 12935 llvm_unreachable("Not supported in SIMD-only mode"); 12936 } 12937 12938 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12939 SourceLocation Loc, 12940 unsigned IVSize, bool IVSigned, 12941 Address IL, Address LB, 12942 Address UB, Address ST) { 12943 llvm_unreachable("Not supported in SIMD-only mode"); 12944 } 12945 12946 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12947 llvm::Value *NumThreads, 12948 SourceLocation Loc) { 12949 llvm_unreachable("Not supported in SIMD-only mode"); 12950 } 12951 12952 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12953 ProcBindKind ProcBind, 12954 SourceLocation Loc) { 12955 llvm_unreachable("Not supported in SIMD-only mode"); 12956 } 12957 12958 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12959 const VarDecl *VD, 12960 Address VDAddr, 12961 SourceLocation Loc) { 12962 llvm_unreachable("Not supported in SIMD-only mode"); 12963 } 12964 12965 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12966 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12967 CodeGenFunction *CGF) { 12968 llvm_unreachable("Not supported in SIMD-only mode"); 12969 } 12970 12971 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12972 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12973 llvm_unreachable("Not supported in SIMD-only mode"); 12974 } 12975 12976 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12977 ArrayRef<const Expr *> Vars, 12978 SourceLocation Loc, 12979 llvm::AtomicOrdering AO) { 12980 llvm_unreachable("Not supported in SIMD-only mode"); 12981 } 12982 12983 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12984 const OMPExecutableDirective &D, 12985 llvm::Function *TaskFunction, 12986 QualType SharedsTy, Address Shareds, 12987 const Expr *IfCond, 12988 const OMPTaskDataTy &Data) { 12989 llvm_unreachable("Not supported in SIMD-only mode"); 12990 } 12991 12992 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12993 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12994 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12995 const Expr *IfCond, const OMPTaskDataTy &Data) { 12996 llvm_unreachable("Not supported in SIMD-only mode"); 12997 } 12998 12999 void CGOpenMPSIMDRuntime::emitReduction( 13000 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13001 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13002 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13003 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13004 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13005 ReductionOps, Options); 13006 } 13007 13008 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13009 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13010 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13011 llvm_unreachable("Not supported in SIMD-only mode"); 13012 } 13013 13014 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13015 SourceLocation Loc, 13016 bool IsWorksharingReduction) { 13017 llvm_unreachable("Not supported in SIMD-only mode"); 13018 } 13019 13020 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13021 SourceLocation Loc, 13022 ReductionCodeGen &RCG, 13023 unsigned N) { 13024 llvm_unreachable("Not supported in SIMD-only mode"); 13025 } 13026 13027 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13028 SourceLocation Loc, 13029 llvm::Value *ReductionsPtr, 13030 LValue SharedLVal) { 13031 llvm_unreachable("Not supported in SIMD-only mode"); 13032 } 13033 13034 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13035 SourceLocation Loc, 13036 const OMPTaskDataTy &Data) { 13037 llvm_unreachable("Not supported in SIMD-only mode"); 13038 } 13039 13040 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13041 CodeGenFunction &CGF, SourceLocation Loc, 13042 OpenMPDirectiveKind CancelRegion) { 13043 llvm_unreachable("Not supported in SIMD-only mode"); 13044 } 13045 13046 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13047 SourceLocation Loc, const Expr *IfCond, 13048 OpenMPDirectiveKind CancelRegion) { 13049 llvm_unreachable("Not supported in SIMD-only mode"); 13050 } 13051 13052 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13053 const OMPExecutableDirective &D, StringRef ParentName, 13054 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13055 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13056 llvm_unreachable("Not supported in SIMD-only mode"); 13057 } 13058 13059 void CGOpenMPSIMDRuntime::emitTargetCall( 13060 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13061 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13062 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13063 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13064 const OMPLoopDirective &D)> 13065 SizeEmitter) { 13066 llvm_unreachable("Not supported in SIMD-only mode"); 13067 } 13068 13069 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13070 llvm_unreachable("Not supported in SIMD-only mode"); 13071 } 13072 13073 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13074 llvm_unreachable("Not supported in SIMD-only mode"); 13075 } 13076 13077 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13078 return false; 13079 } 13080 13081 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13082 const OMPExecutableDirective &D, 13083 SourceLocation Loc, 13084 llvm::Function *OutlinedFn, 13085 ArrayRef<llvm::Value *> CapturedVars) { 13086 llvm_unreachable("Not supported in SIMD-only mode"); 13087 } 13088 13089 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13090 const Expr *NumTeams, 13091 const Expr *ThreadLimit, 13092 SourceLocation Loc) { 13093 llvm_unreachable("Not supported in SIMD-only mode"); 13094 } 13095 13096 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13097 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13098 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13099 llvm_unreachable("Not supported in SIMD-only mode"); 13100 } 13101 13102 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13103 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13104 const Expr *Device) { 13105 llvm_unreachable("Not supported in SIMD-only mode"); 13106 } 13107 13108 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13109 const OMPLoopDirective &D, 13110 ArrayRef<Expr *> NumIterations) { 13111 llvm_unreachable("Not supported in SIMD-only mode"); 13112 } 13113 13114 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13115 const OMPDependClause *C) { 13116 llvm_unreachable("Not supported in SIMD-only mode"); 13117 } 13118 13119 const VarDecl * 13120 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13121 const VarDecl *NativeParam) const { 13122 llvm_unreachable("Not supported in SIMD-only mode"); 13123 } 13124 13125 Address 13126 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13127 const VarDecl *NativeParam, 13128 const VarDecl *TargetParam) const { 13129 llvm_unreachable("Not supported in SIMD-only mode"); 13130 } 13131