1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/Value.h" 39 #include "llvm/Support/AtomicOrdering.h" 40 #include "llvm/Support/Format.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cassert> 43 #include <numeric> 44 45 using namespace clang; 46 using namespace CodeGen; 47 using namespace llvm::omp; 48 49 namespace { 50 /// Base class for handling code generation inside OpenMP regions. 51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 52 public: 53 /// Kinds of OpenMP regions used in codegen. 54 enum CGOpenMPRegionKind { 55 /// Region with outlined function for standalone 'parallel' 56 /// directive. 57 ParallelOutlinedRegion, 58 /// Region with outlined function for standalone 'task' directive. 59 TaskOutlinedRegion, 60 /// Region for constructs that do not require function outlining, 61 /// like 'for', 'sections', 'atomic' etc. directives. 62 InlinedRegion, 63 /// Region with outlined function for standalone 'target' directive. 64 TargetRegion, 65 }; 66 67 CGOpenMPRegionInfo(const CapturedStmt &CS, 68 const CGOpenMPRegionKind RegionKind, 69 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 70 bool HasCancel) 71 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 72 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 73 74 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 75 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 76 bool HasCancel) 77 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 78 Kind(Kind), HasCancel(HasCancel) {} 79 80 /// Get a variable or parameter for storing global thread id 81 /// inside OpenMP construct. 82 virtual const VarDecl *getThreadIDVariable() const = 0; 83 84 /// Emit the captured statement body. 85 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 86 87 /// Get an LValue for the current ThreadID variable. 88 /// \return LValue for thread id variable. This LValue always has type int32*. 89 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 90 91 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 92 93 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 94 95 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 96 97 bool hasCancel() const { return HasCancel; } 98 99 static bool classof(const CGCapturedStmtInfo *Info) { 100 return Info->getKind() == CR_OpenMP; 101 } 102 103 ~CGOpenMPRegionInfo() override = default; 104 105 protected: 106 CGOpenMPRegionKind RegionKind; 107 RegionCodeGenTy CodeGen; 108 OpenMPDirectiveKind Kind; 109 bool HasCancel; 110 }; 111 112 /// API for captured statement code generation in OpenMP constructs. 113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 114 public: 115 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 116 const RegionCodeGenTy &CodeGen, 117 OpenMPDirectiveKind Kind, bool HasCancel, 118 StringRef HelperName) 119 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 120 HasCancel), 121 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 122 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 123 } 124 125 /// Get a variable or parameter for storing global thread id 126 /// inside OpenMP construct. 127 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 128 129 /// Get the name of the capture helper. 130 StringRef getHelperName() const override { return HelperName; } 131 132 static bool classof(const CGCapturedStmtInfo *Info) { 133 return CGOpenMPRegionInfo::classof(Info) && 134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 135 ParallelOutlinedRegion; 136 } 137 138 private: 139 /// A variable or parameter storing global thread id for OpenMP 140 /// constructs. 141 const VarDecl *ThreadIDVar; 142 StringRef HelperName; 143 }; 144 145 /// API for captured statement code generation in OpenMP constructs. 146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 147 public: 148 class UntiedTaskActionTy final : public PrePostActionTy { 149 bool Untied; 150 const VarDecl *PartIDVar; 151 const RegionCodeGenTy UntiedCodeGen; 152 llvm::SwitchInst *UntiedSwitch = nullptr; 153 154 public: 155 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 156 const RegionCodeGenTy &UntiedCodeGen) 157 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 158 void Enter(CodeGenFunction &CGF) override { 159 if (Untied) { 160 // Emit task switching point. 161 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 162 CGF.GetAddrOfLocalVar(PartIDVar), 163 PartIDVar->getType()->castAs<PointerType>()); 164 llvm::Value *Res = 165 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 166 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 167 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 168 CGF.EmitBlock(DoneBB); 169 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 170 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 171 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 172 CGF.Builder.GetInsertBlock()); 173 emitUntiedSwitch(CGF); 174 } 175 } 176 void emitUntiedSwitch(CodeGenFunction &CGF) const { 177 if (Untied) { 178 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 179 CGF.GetAddrOfLocalVar(PartIDVar), 180 PartIDVar->getType()->castAs<PointerType>()); 181 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 182 PartIdLVal); 183 UntiedCodeGen(CGF); 184 CodeGenFunction::JumpDest CurPoint = 185 CGF.getJumpDestInCurrentScope(".untied.next."); 186 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 187 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 188 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 189 CGF.Builder.GetInsertBlock()); 190 CGF.EmitBranchThroughCleanup(CurPoint); 191 CGF.EmitBlock(CurPoint.getBlock()); 192 } 193 } 194 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 195 }; 196 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 197 const VarDecl *ThreadIDVar, 198 const RegionCodeGenTy &CodeGen, 199 OpenMPDirectiveKind Kind, bool HasCancel, 200 const UntiedTaskActionTy &Action) 201 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 202 ThreadIDVar(ThreadIDVar), Action(Action) { 203 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 204 } 205 206 /// Get a variable or parameter for storing global thread id 207 /// inside OpenMP construct. 208 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 209 210 /// Get an LValue for the current ThreadID variable. 211 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 212 213 /// Get the name of the capture helper. 214 StringRef getHelperName() const override { return ".omp_outlined."; } 215 216 void emitUntiedSwitch(CodeGenFunction &CGF) override { 217 Action.emitUntiedSwitch(CGF); 218 } 219 220 static bool classof(const CGCapturedStmtInfo *Info) { 221 return CGOpenMPRegionInfo::classof(Info) && 222 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 223 TaskOutlinedRegion; 224 } 225 226 private: 227 /// A variable or parameter storing global thread id for OpenMP 228 /// constructs. 229 const VarDecl *ThreadIDVar; 230 /// Action for emitting code for untied tasks. 231 const UntiedTaskActionTy &Action; 232 }; 233 234 /// API for inlined captured statement code generation in OpenMP 235 /// constructs. 236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 237 public: 238 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 239 const RegionCodeGenTy &CodeGen, 240 OpenMPDirectiveKind Kind, bool HasCancel) 241 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 242 OldCSI(OldCSI), 243 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 244 245 // Retrieve the value of the context parameter. 246 llvm::Value *getContextValue() const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->getContextValue(); 249 llvm_unreachable("No context value for inlined OpenMP region"); 250 } 251 252 void setContextValue(llvm::Value *V) override { 253 if (OuterRegionInfo) { 254 OuterRegionInfo->setContextValue(V); 255 return; 256 } 257 llvm_unreachable("No context value for inlined OpenMP region"); 258 } 259 260 /// Lookup the captured field decl for a variable. 261 const FieldDecl *lookup(const VarDecl *VD) const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->lookup(VD); 264 // If there is no outer outlined region,no need to lookup in a list of 265 // captured variables, we can use the original one. 266 return nullptr; 267 } 268 269 FieldDecl *getThisFieldDecl() const override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThisFieldDecl(); 272 return nullptr; 273 } 274 275 /// Get a variable or parameter for storing global thread id 276 /// inside OpenMP construct. 277 const VarDecl *getThreadIDVariable() const override { 278 if (OuterRegionInfo) 279 return OuterRegionInfo->getThreadIDVariable(); 280 return nullptr; 281 } 282 283 /// Get an LValue for the current ThreadID variable. 284 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 285 if (OuterRegionInfo) 286 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 287 llvm_unreachable("No LValue for inlined OpenMP construct"); 288 } 289 290 /// Get the name of the capture helper. 291 StringRef getHelperName() const override { 292 if (auto *OuterRegionInfo = getOldCSI()) 293 return OuterRegionInfo->getHelperName(); 294 llvm_unreachable("No helper name for inlined OpenMP construct"); 295 } 296 297 void emitUntiedSwitch(CodeGenFunction &CGF) override { 298 if (OuterRegionInfo) 299 OuterRegionInfo->emitUntiedSwitch(CGF); 300 } 301 302 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 303 304 static bool classof(const CGCapturedStmtInfo *Info) { 305 return CGOpenMPRegionInfo::classof(Info) && 306 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 307 } 308 309 ~CGOpenMPInlinedRegionInfo() override = default; 310 311 private: 312 /// CodeGen info about outer OpenMP region. 313 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 314 CGOpenMPRegionInfo *OuterRegionInfo; 315 }; 316 317 /// API for captured statement code generation in OpenMP target 318 /// constructs. For this captures, implicit parameters are used instead of the 319 /// captured fields. The name of the target region has to be unique in a given 320 /// application so it is provided by the client, because only the client has 321 /// the information to generate that. 322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 323 public: 324 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 325 const RegionCodeGenTy &CodeGen, StringRef HelperName) 326 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 327 /*HasCancel=*/false), 328 HelperName(HelperName) {} 329 330 /// This is unused for target regions because each starts executing 331 /// with a single thread. 332 const VarDecl *getThreadIDVariable() const override { return nullptr; } 333 334 /// Get the name of the capture helper. 335 StringRef getHelperName() const override { return HelperName; } 336 337 static bool classof(const CGCapturedStmtInfo *Info) { 338 return CGOpenMPRegionInfo::classof(Info) && 339 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 340 } 341 342 private: 343 StringRef HelperName; 344 }; 345 346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 347 llvm_unreachable("No codegen for expressions"); 348 } 349 /// API for generation of expressions captured in a innermost OpenMP 350 /// region. 351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 352 public: 353 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 354 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 355 OMPD_unknown, 356 /*HasCancel=*/false), 357 PrivScope(CGF) { 358 // Make sure the globals captured in the provided statement are local by 359 // using the privatization logic. We assume the same variable is not 360 // captured more than once. 361 for (const auto &C : CS.captures()) { 362 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 363 continue; 364 365 const VarDecl *VD = C.getCapturedVar(); 366 if (VD->isLocalVarDeclOrParm()) 367 continue; 368 369 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 370 /*RefersToEnclosingVariableOrCapture=*/false, 371 VD->getType().getNonReferenceType(), VK_LValue, 372 C.getLocation()); 373 PrivScope.addPrivate( 374 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 638 [=]() { return Private; }); 639 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 640 [=]() { return Original; }); 641 (void)PrivateScope.Privatize(); 642 RValue Func = RValue::get(Reduction.second); 643 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 644 CGF.EmitIgnoredExpr(InitOp); 645 } else { 646 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 647 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 648 auto *GV = new llvm::GlobalVariable( 649 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 650 llvm::GlobalValue::PrivateLinkage, Init, Name); 651 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 652 RValue InitRVal; 653 switch (CGF.getEvaluationKind(Ty)) { 654 case TEK_Scalar: 655 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 656 break; 657 case TEK_Complex: 658 InitRVal = 659 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 660 break; 661 case TEK_Aggregate: { 662 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 663 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 664 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 665 /*IsInitializer=*/false); 666 return; 667 } 668 } 669 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 670 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 671 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 672 /*IsInitializer=*/false); 673 } 674 } 675 676 /// Emit initialization of arrays of complex types. 677 /// \param DestAddr Address of the array. 678 /// \param Type Type of array. 679 /// \param Init Initial expression of array. 680 /// \param SrcAddr Address of the original array. 681 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 682 QualType Type, bool EmitDeclareReductionInit, 683 const Expr *Init, 684 const OMPDeclareReductionDecl *DRD, 685 Address SrcAddr = Address::invalid()) { 686 // Perform element-by-element initialization. 687 QualType ElementTy; 688 689 // Drill down to the base element type on both arrays. 690 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 691 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = 702 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 703 // The basic structure here is a while-do loop. 704 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 705 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 706 llvm::Value *IsEmpty = 707 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 708 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 709 710 // Enter the loop body, making that address the current address. 711 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 712 CGF.EmitBlock(BodyBB); 713 714 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 715 716 llvm::PHINode *SrcElementPHI = nullptr; 717 Address SrcElementCurrent = Address::invalid(); 718 if (DRD) { 719 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 720 "omp.arraycpy.srcElementPast"); 721 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 722 SrcElementCurrent = 723 Address(SrcElementPHI, SrcAddr.getElementType(), 724 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 725 } 726 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 727 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 728 DestElementPHI->addIncoming(DestBegin, EntryBB); 729 Address DestElementCurrent = 730 Address(DestElementPHI, DestAddr.getElementType(), 731 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 732 733 // Emit copy. 734 { 735 CodeGenFunction::RunCleanupsScope InitScope(CGF); 736 if (EmitDeclareReductionInit) { 737 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 738 SrcElementCurrent, ElementTy); 739 } else 740 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 741 /*IsInitializer=*/false); 742 } 743 744 if (DRD) { 745 // Shift the address forward by one element. 746 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 747 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 748 "omp.arraycpy.dest.element"); 749 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 750 } 751 752 // Shift the address forward by one element. 753 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 754 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 755 "omp.arraycpy.dest.element"); 756 // Check whether we've reached the end. 757 llvm::Value *Done = 758 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 759 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 760 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 761 762 // Done. 763 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 767 return CGF.EmitOMPSharedLValue(E); 768 } 769 770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 771 const Expr *E) { 772 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 773 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 774 return LValue(); 775 } 776 777 void ReductionCodeGen::emitAggregateInitialization( 778 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 779 const OMPDeclareReductionDecl *DRD) { 780 // Emit VarDecl with copy init for arrays. 781 // Get the address of the original variable captured in current 782 // captured region. 783 const auto *PrivateVD = 784 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 785 bool EmitDeclareReductionInit = 786 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 787 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 788 EmitDeclareReductionInit, 789 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 790 : PrivateVD->getInit(), 791 DRD, SharedAddr); 792 } 793 794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 795 ArrayRef<const Expr *> Origs, 796 ArrayRef<const Expr *> Privates, 797 ArrayRef<const Expr *> ReductionOps) { 798 ClausesData.reserve(Shareds.size()); 799 SharedAddresses.reserve(Shareds.size()); 800 Sizes.reserve(Shareds.size()); 801 BaseDecls.reserve(Shareds.size()); 802 const auto *IOrig = Origs.begin(); 803 const auto *IPriv = Privates.begin(); 804 const auto *IRed = ReductionOps.begin(); 805 for (const Expr *Ref : Shareds) { 806 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 807 std::advance(IOrig, 1); 808 std::advance(IPriv, 1); 809 std::advance(IRed, 1); 810 } 811 } 812 813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 814 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 815 "Number of generated lvalues must be exactly N."); 816 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 818 SharedAddresses.emplace_back(First, Second); 819 if (ClausesData[N].Shared == ClausesData[N].Ref) { 820 OrigAddresses.emplace_back(First, Second); 821 } else { 822 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 823 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 824 OrigAddresses.emplace_back(First, Second); 825 } 826 } 827 828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 829 const auto *PrivateVD = 830 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 831 QualType PrivateType = PrivateVD->getType(); 832 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 833 if (!PrivateType->isVariablyModifiedType()) { 834 Sizes.emplace_back( 835 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 836 nullptr); 837 return; 838 } 839 llvm::Value *Size; 840 llvm::Value *SizeInChars; 841 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 842 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 843 if (AsArraySection) { 844 Size = CGF.Builder.CreatePtrDiff(ElemType, 845 OrigAddresses[N].second.getPointer(CGF), 846 OrigAddresses[N].first.getPointer(CGF)); 847 Size = CGF.Builder.CreateNUWAdd( 848 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 849 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 850 } else { 851 SizeInChars = 852 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 853 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 854 } 855 Sizes.emplace_back(SizeInChars, Size); 856 CodeGenFunction::OpaqueValueMapping OpaqueMap( 857 CGF, 858 cast<OpaqueValueExpr>( 859 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 860 RValue::get(Size)); 861 CGF.EmitVariablyModifiedType(PrivateType); 862 } 863 864 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 865 llvm::Value *Size) { 866 const auto *PrivateVD = 867 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 868 QualType PrivateType = PrivateVD->getType(); 869 if (!PrivateType->isVariablyModifiedType()) { 870 assert(!Size && !Sizes[N].second && 871 "Size should be nullptr for non-variably modified reduction " 872 "items."); 873 return; 874 } 875 CodeGenFunction::OpaqueValueMapping OpaqueMap( 876 CGF, 877 cast<OpaqueValueExpr>( 878 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 879 RValue::get(Size)); 880 CGF.EmitVariablyModifiedType(PrivateType); 881 } 882 883 void ReductionCodeGen::emitInitialization( 884 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 885 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 886 assert(SharedAddresses.size() > N && "No variable was generated"); 887 const auto *PrivateVD = 888 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 889 const OMPDeclareReductionDecl *DRD = 890 getReductionInit(ClausesData[N].ReductionOp); 891 QualType PrivateType = PrivateVD->getType(); 892 PrivateAddr = CGF.Builder.CreateElementBitCast( 893 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 894 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 895 if (DRD && DRD->getInitializer()) 896 (void)DefaultInit(CGF); 897 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 898 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 899 (void)DefaultInit(CGF); 900 QualType SharedType = SharedAddresses[N].first.getType(); 901 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 902 PrivateAddr, SharedAddr, SharedType); 903 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 904 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 905 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 906 PrivateVD->getType().getQualifiers(), 907 /*IsInitializer=*/false); 908 } 909 } 910 911 bool ReductionCodeGen::needCleanups(unsigned N) { 912 const auto *PrivateVD = 913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 914 QualType PrivateType = PrivateVD->getType(); 915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 916 return DTorKind != QualType::DK_none; 917 } 918 919 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 920 Address PrivateAddr) { 921 const auto *PrivateVD = 922 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 923 QualType PrivateType = PrivateVD->getType(); 924 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 925 if (needCleanups(N)) { 926 PrivateAddr = CGF.Builder.CreateElementBitCast( 927 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 928 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 929 } 930 } 931 932 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 933 LValue BaseLV) { 934 BaseTy = BaseTy.getNonReferenceType(); 935 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 936 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 937 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 938 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 939 } else { 940 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 941 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 942 } 943 BaseTy = BaseTy->getPointeeType(); 944 } 945 return CGF.MakeAddrLValue( 946 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 947 CGF.ConvertTypeForMem(ElTy)), 948 BaseLV.getType(), BaseLV.getBaseInfo(), 949 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 950 } 951 952 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 953 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 954 llvm::Value *Addr) { 955 Address Tmp = Address::invalid(); 956 Address TopTmp = Address::invalid(); 957 Address MostTopTmp = Address::invalid(); 958 BaseTy = BaseTy.getNonReferenceType(); 959 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 960 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 961 Tmp = CGF.CreateMemTemp(BaseTy); 962 if (TopTmp.isValid()) 963 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 964 else 965 MostTopTmp = Tmp; 966 TopTmp = Tmp; 967 BaseTy = BaseTy->getPointeeType(); 968 } 969 llvm::Type *Ty = BaseLVType; 970 if (Tmp.isValid()) 971 Ty = Tmp.getElementType(); 972 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 973 if (Tmp.isValid()) { 974 CGF.Builder.CreateStore(Addr, Tmp); 975 return MostTopTmp; 976 } 977 return Address::deprecated(Addr, BaseLVAlignment); 978 } 979 980 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 981 const VarDecl *OrigVD = nullptr; 982 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 983 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 984 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 985 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 986 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 987 Base = TempASE->getBase()->IgnoreParenImpCasts(); 988 DE = cast<DeclRefExpr>(Base); 989 OrigVD = cast<VarDecl>(DE->getDecl()); 990 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 991 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 992 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 993 Base = TempASE->getBase()->IgnoreParenImpCasts(); 994 DE = cast<DeclRefExpr>(Base); 995 OrigVD = cast<VarDecl>(DE->getDecl()); 996 } 997 return OrigVD; 998 } 999 1000 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1001 Address PrivateAddr) { 1002 const DeclRefExpr *DE; 1003 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1004 BaseDecls.emplace_back(OrigVD); 1005 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1006 LValue BaseLValue = 1007 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue); 1009 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1010 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1011 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1012 SharedAddr.getPointer()); 1013 llvm::Value *PrivatePointer = 1014 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1015 PrivateAddr.getPointer(), SharedAddr.getType()); 1016 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1017 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1018 return castToBase(CGF, OrigVD->getType(), 1019 SharedAddresses[N].first.getType(), 1020 OriginalBaseLValue.getAddress(CGF).getType(), 1021 OriginalBaseLValue.getAlignment(), Ptr); 1022 } 1023 BaseDecls.emplace_back( 1024 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1025 return PrivateAddr; 1026 } 1027 1028 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1029 const OMPDeclareReductionDecl *DRD = 1030 getReductionInit(ClausesData[N].ReductionOp); 1031 return DRD && DRD->getInitializer(); 1032 } 1033 1034 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1035 return CGF.EmitLoadOfPointerLValue( 1036 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1037 getThreadIDVariable()->getType()->castAs<PointerType>()); 1038 } 1039 1040 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1041 if (!CGF.HaveInsertPoint()) 1042 return; 1043 // 1.2.2 OpenMP Language Terminology 1044 // Structured block - An executable statement with a single entry at the 1045 // top and a single exit at the bottom. 1046 // The point of exit cannot be a branch out of the structured block. 1047 // longjmp() and throw() must not violate the entry/exit criteria. 1048 CGF.EHStack.pushTerminate(); 1049 if (S) 1050 CGF.incrementProfileCounter(S); 1051 CodeGen(CGF); 1052 CGF.EHStack.popTerminate(); 1053 } 1054 1055 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1056 CodeGenFunction &CGF) { 1057 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1058 getThreadIDVariable()->getType(), 1059 AlignmentSource::Decl); 1060 } 1061 1062 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1063 QualType FieldTy) { 1064 auto *Field = FieldDecl::Create( 1065 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1066 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1067 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1068 Field->setAccess(AS_public); 1069 DC->addDecl(Field); 1070 return Field; 1071 } 1072 1073 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1074 StringRef Separator) 1075 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1076 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1077 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1078 1079 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1080 OMPBuilder.initialize(); 1081 loadOffloadInfoMetadata(); 1082 } 1083 1084 void CGOpenMPRuntime::clear() { 1085 InternalVars.clear(); 1086 // Clean non-target variable declarations possibly used only in debug info. 1087 for (const auto &Data : EmittedNonTargetVariables) { 1088 if (!Data.getValue().pointsToAliveValue()) 1089 continue; 1090 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1091 if (!GV) 1092 continue; 1093 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1094 continue; 1095 GV->eraseFromParent(); 1096 } 1097 } 1098 1099 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1100 SmallString<128> Buffer; 1101 llvm::raw_svector_ostream OS(Buffer); 1102 StringRef Sep = FirstSeparator; 1103 for (StringRef Part : Parts) { 1104 OS << Sep << Part; 1105 Sep = Separator; 1106 } 1107 return std::string(OS.str()); 1108 } 1109 1110 static llvm::Function * 1111 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1112 const Expr *CombinerInitializer, const VarDecl *In, 1113 const VarDecl *Out, bool IsCombiner) { 1114 // void .omp_combiner.(Ty *in, Ty *out); 1115 ASTContext &C = CGM.getContext(); 1116 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1117 FunctionArgList Args; 1118 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1119 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1120 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1121 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1122 Args.push_back(&OmpOutParm); 1123 Args.push_back(&OmpInParm); 1124 const CGFunctionInfo &FnInfo = 1125 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1126 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1127 std::string Name = CGM.getOpenMPRuntime().getName( 1128 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1129 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1130 Name, &CGM.getModule()); 1131 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1132 if (CGM.getLangOpts().Optimize) { 1133 Fn->removeFnAttr(llvm::Attribute::NoInline); 1134 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1135 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1136 } 1137 CodeGenFunction CGF(CGM); 1138 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1139 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1140 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1141 Out->getLocation()); 1142 CodeGenFunction::OMPPrivateScope Scope(CGF); 1143 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1144 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1145 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1146 .getAddress(CGF); 1147 }); 1148 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1149 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1150 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1151 .getAddress(CGF); 1152 }); 1153 (void)Scope.Privatize(); 1154 if (!IsCombiner && Out->hasInit() && 1155 !CGF.isTrivialInitializer(Out->getInit())) { 1156 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1157 Out->getType().getQualifiers(), 1158 /*IsInitializer=*/true); 1159 } 1160 if (CombinerInitializer) 1161 CGF.EmitIgnoredExpr(CombinerInitializer); 1162 Scope.ForceCleanup(); 1163 CGF.FinishFunction(); 1164 return Fn; 1165 } 1166 1167 void CGOpenMPRuntime::emitUserDefinedReduction( 1168 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1169 if (UDRMap.count(D) > 0) 1170 return; 1171 llvm::Function *Combiner = emitCombinerOrInitializer( 1172 CGM, D->getType(), D->getCombiner(), 1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1174 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1175 /*IsCombiner=*/true); 1176 llvm::Function *Initializer = nullptr; 1177 if (const Expr *Init = D->getInitializer()) { 1178 Initializer = emitCombinerOrInitializer( 1179 CGM, D->getType(), 1180 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1181 : nullptr, 1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1183 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1184 /*IsCombiner=*/false); 1185 } 1186 UDRMap.try_emplace(D, Combiner, Initializer); 1187 if (CGF) { 1188 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1189 Decls.second.push_back(D); 1190 } 1191 } 1192 1193 std::pair<llvm::Function *, llvm::Function *> 1194 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1195 auto I = UDRMap.find(D); 1196 if (I != UDRMap.end()) 1197 return I->second; 1198 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1199 return UDRMap.lookup(D); 1200 } 1201 1202 namespace { 1203 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1204 // Builder if one is present. 1205 struct PushAndPopStackRAII { 1206 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1207 bool HasCancel, llvm::omp::Directive Kind) 1208 : OMPBuilder(OMPBuilder) { 1209 if (!OMPBuilder) 1210 return; 1211 1212 // The following callback is the crucial part of clangs cleanup process. 1213 // 1214 // NOTE: 1215 // Once the OpenMPIRBuilder is used to create parallel regions (and 1216 // similar), the cancellation destination (Dest below) is determined via 1217 // IP. That means if we have variables to finalize we split the block at IP, 1218 // use the new block (=BB) as destination to build a JumpDest (via 1219 // getJumpDestInCurrentScope(BB)) which then is fed to 1220 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1221 // to push & pop an FinalizationInfo object. 1222 // The FiniCB will still be needed but at the point where the 1223 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1224 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1225 assert(IP.getBlock()->end() == IP.getPoint() && 1226 "Clang CG should cause non-terminated block!"); 1227 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1228 CGF.Builder.restoreIP(IP); 1229 CodeGenFunction::JumpDest Dest = 1230 CGF.getOMPCancelDestination(OMPD_parallel); 1231 CGF.EmitBranchThroughCleanup(Dest); 1232 }; 1233 1234 // TODO: Remove this once we emit parallel regions through the 1235 // OpenMPIRBuilder as it can do this setup internally. 1236 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1237 OMPBuilder->pushFinalizationCB(std::move(FI)); 1238 } 1239 ~PushAndPopStackRAII() { 1240 if (OMPBuilder) 1241 OMPBuilder->popFinalizationCB(); 1242 } 1243 llvm::OpenMPIRBuilder *OMPBuilder; 1244 }; 1245 } // namespace 1246 1247 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1248 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1249 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1250 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1251 assert(ThreadIDVar->getType()->isPointerType() && 1252 "thread id variable must be of type kmp_int32 *"); 1253 CodeGenFunction CGF(CGM, true); 1254 bool HasCancel = false; 1255 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1256 HasCancel = OPD->hasCancel(); 1257 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1258 HasCancel = OPD->hasCancel(); 1259 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1260 HasCancel = OPSD->hasCancel(); 1261 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1264 HasCancel = OPFD->hasCancel(); 1265 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1266 HasCancel = OPFD->hasCancel(); 1267 else if (const auto *OPFD = 1268 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = 1271 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1272 HasCancel = OPFD->hasCancel(); 1273 1274 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1275 // parallel region to make cancellation barriers work properly. 1276 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1277 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1278 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1279 HasCancel, OutlinedHelperName); 1280 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1281 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1282 } 1283 1284 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1285 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1287 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1288 return emitParallelOrTeamsOutlinedFunction( 1289 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1290 } 1291 1292 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1293 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1294 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1295 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1296 return emitParallelOrTeamsOutlinedFunction( 1297 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1298 } 1299 1300 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1301 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1302 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1303 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1304 bool Tied, unsigned &NumberOfParts) { 1305 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1306 PrePostActionTy &) { 1307 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1308 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1309 llvm::Value *TaskArgs[] = { 1310 UpLoc, ThreadID, 1311 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1312 TaskTVar->getType()->castAs<PointerType>()) 1313 .getPointer(CGF)}; 1314 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1315 CGM.getModule(), OMPRTL___kmpc_omp_task), 1316 TaskArgs); 1317 }; 1318 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1319 UntiedCodeGen); 1320 CodeGen.setAction(Action); 1321 assert(!ThreadIDVar->getType()->isPointerType() && 1322 "thread id variable must be of type kmp_int32 for tasks"); 1323 const OpenMPDirectiveKind Region = 1324 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1325 : OMPD_task; 1326 const CapturedStmt *CS = D.getCapturedStmt(Region); 1327 bool HasCancel = false; 1328 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1329 HasCancel = TD->hasCancel(); 1330 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1333 HasCancel = TD->hasCancel(); 1334 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 1337 CodeGenFunction CGF(CGM, true); 1338 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1339 InnermostKind, HasCancel, Action); 1340 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1341 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1342 if (!Tied) 1343 NumberOfParts = Action.getNumberOfParts(); 1344 return Res; 1345 } 1346 1347 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1348 const RecordDecl *RD, const CGRecordLayout &RL, 1349 ArrayRef<llvm::Constant *> Data) { 1350 llvm::StructType *StructTy = RL.getLLVMType(); 1351 unsigned PrevIdx = 0; 1352 ConstantInitBuilder CIBuilder(CGM); 1353 const auto *DI = Data.begin(); 1354 for (const FieldDecl *FD : RD->fields()) { 1355 unsigned Idx = RL.getLLVMFieldNo(FD); 1356 // Fill the alignment. 1357 for (unsigned I = PrevIdx; I < Idx; ++I) 1358 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1359 PrevIdx = Idx + 1; 1360 Fields.add(*DI); 1361 ++DI; 1362 } 1363 } 1364 1365 template <class... As> 1366 static llvm::GlobalVariable * 1367 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1368 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1369 As &&... Args) { 1370 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1371 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1372 ConstantInitBuilder CIBuilder(CGM); 1373 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1374 buildStructValue(Fields, CGM, RD, RL, Data); 1375 return Fields.finishAndCreateGlobal( 1376 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1377 std::forward<As>(Args)...); 1378 } 1379 1380 template <typename T> 1381 static void 1382 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1383 ArrayRef<llvm::Constant *> Data, 1384 T &Parent) { 1385 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1386 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1387 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1388 buildStructValue(Fields, CGM, RD, RL, Data); 1389 Fields.finishAndAddTo(Parent); 1390 } 1391 1392 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1393 bool AtCurrentPoint) { 1394 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1395 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1396 1397 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1398 if (AtCurrentPoint) { 1399 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1400 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1401 } else { 1402 Elem.second.ServiceInsertPt = 1403 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1404 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1405 } 1406 } 1407 1408 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1409 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1410 if (Elem.second.ServiceInsertPt) { 1411 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1412 Elem.second.ServiceInsertPt = nullptr; 1413 Ptr->eraseFromParent(); 1414 } 1415 } 1416 1417 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1418 SourceLocation Loc, 1419 SmallString<128> &Buffer) { 1420 llvm::raw_svector_ostream OS(Buffer); 1421 // Build debug location 1422 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1423 OS << ";" << PLoc.getFilename() << ";"; 1424 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1425 OS << FD->getQualifiedNameAsString(); 1426 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1427 return OS.str(); 1428 } 1429 1430 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1431 SourceLocation Loc, 1432 unsigned Flags) { 1433 uint32_t SrcLocStrSize; 1434 llvm::Constant *SrcLocStr; 1435 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1436 Loc.isInvalid()) { 1437 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1438 } else { 1439 std::string FunctionName; 1440 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1441 FunctionName = FD->getQualifiedNameAsString(); 1442 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1443 const char *FileName = PLoc.getFilename(); 1444 unsigned Line = PLoc.getLine(); 1445 unsigned Column = PLoc.getColumn(); 1446 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1447 Column, SrcLocStrSize); 1448 } 1449 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1450 return OMPBuilder.getOrCreateIdent( 1451 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1452 } 1453 1454 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1455 SourceLocation Loc) { 1456 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1457 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1458 // the clang invariants used below might be broken. 1459 if (CGM.getLangOpts().OpenMPIRBuilder) { 1460 SmallString<128> Buffer; 1461 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1462 uint32_t SrcLocStrSize; 1463 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1464 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1465 return OMPBuilder.getOrCreateThreadID( 1466 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1467 } 1468 1469 llvm::Value *ThreadID = nullptr; 1470 // Check whether we've already cached a load of the thread id in this 1471 // function. 1472 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1473 if (I != OpenMPLocThreadIDMap.end()) { 1474 ThreadID = I->second.ThreadID; 1475 if (ThreadID != nullptr) 1476 return ThreadID; 1477 } 1478 // If exceptions are enabled, do not use parameter to avoid possible crash. 1479 if (auto *OMPRegionInfo = 1480 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1481 if (OMPRegionInfo->getThreadIDVariable()) { 1482 // Check if this an outlined function with thread id passed as argument. 1483 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1484 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1485 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1486 !CGF.getLangOpts().CXXExceptions || 1487 CGF.Builder.GetInsertBlock() == TopBlock || 1488 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1489 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1490 TopBlock || 1491 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1492 CGF.Builder.GetInsertBlock()) { 1493 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1494 // If value loaded in entry block, cache it and use it everywhere in 1495 // function. 1496 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 Elem.second.ThreadID = ThreadID; 1499 } 1500 return ThreadID; 1501 } 1502 } 1503 } 1504 1505 // This is not an outlined function region - need to call __kmpc_int32 1506 // kmpc_global_thread_num(ident_t *loc). 1507 // Generate thread id value and cache this value for use across the 1508 // function. 1509 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1510 if (!Elem.second.ServiceInsertPt) 1511 setLocThreadIdInsertPt(CGF); 1512 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1513 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1514 llvm::CallInst *Call = CGF.Builder.CreateCall( 1515 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1516 OMPRTL___kmpc_global_thread_num), 1517 emitUpdateLocation(CGF, Loc)); 1518 Call->setCallingConv(CGF.getRuntimeCC()); 1519 Elem.second.ThreadID = Call; 1520 return Call; 1521 } 1522 1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1524 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1525 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1526 clearLocThreadIdInsertPt(CGF); 1527 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1528 } 1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1530 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1531 UDRMap.erase(D); 1532 FunctionUDRMap.erase(CGF.CurFn); 1533 } 1534 auto I = FunctionUDMMap.find(CGF.CurFn); 1535 if (I != FunctionUDMMap.end()) { 1536 for(const auto *D : I->second) 1537 UDMMap.erase(D); 1538 FunctionUDMMap.erase(I); 1539 } 1540 LastprivateConditionalToTypes.erase(CGF.CurFn); 1541 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1542 } 1543 1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1545 return OMPBuilder.IdentPtr; 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1549 if (!Kmpc_MicroTy) { 1550 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1551 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1552 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1553 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1554 } 1555 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1556 } 1557 1558 llvm::FunctionCallee 1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1560 bool IsGPUDistribute) { 1561 assert((IVSize == 32 || IVSize == 64) && 1562 "IV size is not compatible with the omp runtime"); 1563 StringRef Name; 1564 if (IsGPUDistribute) 1565 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1566 : "__kmpc_distribute_static_init_4u") 1567 : (IVSigned ? "__kmpc_distribute_static_init_8" 1568 : "__kmpc_distribute_static_init_8u"); 1569 else 1570 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1571 : "__kmpc_for_static_init_4u") 1572 : (IVSigned ? "__kmpc_for_static_init_8" 1573 : "__kmpc_for_static_init_8u"); 1574 1575 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1576 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1577 llvm::Type *TypeParams[] = { 1578 getIdentTyPointerTy(), // loc 1579 CGM.Int32Ty, // tid 1580 CGM.Int32Ty, // schedtype 1581 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1582 PtrTy, // p_lower 1583 PtrTy, // p_upper 1584 PtrTy, // p_stride 1585 ITy, // incr 1586 ITy // chunk 1587 }; 1588 auto *FnTy = 1589 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1590 return CGM.CreateRuntimeFunction(FnTy, Name); 1591 } 1592 1593 llvm::FunctionCallee 1594 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1595 assert((IVSize == 32 || IVSize == 64) && 1596 "IV size is not compatible with the omp runtime"); 1597 StringRef Name = 1598 IVSize == 32 1599 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1600 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1601 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1602 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1603 CGM.Int32Ty, // tid 1604 CGM.Int32Ty, // schedtype 1605 ITy, // lower 1606 ITy, // upper 1607 ITy, // stride 1608 ITy // chunk 1609 }; 1610 auto *FnTy = 1611 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1612 return CGM.CreateRuntimeFunction(FnTy, Name); 1613 } 1614 1615 llvm::FunctionCallee 1616 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1617 assert((IVSize == 32 || IVSize == 64) && 1618 "IV size is not compatible with the omp runtime"); 1619 StringRef Name = 1620 IVSize == 32 1621 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1622 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1623 llvm::Type *TypeParams[] = { 1624 getIdentTyPointerTy(), // loc 1625 CGM.Int32Ty, // tid 1626 }; 1627 auto *FnTy = 1628 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1629 return CGM.CreateRuntimeFunction(FnTy, Name); 1630 } 1631 1632 llvm::FunctionCallee 1633 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1634 assert((IVSize == 32 || IVSize == 64) && 1635 "IV size is not compatible with the omp runtime"); 1636 StringRef Name = 1637 IVSize == 32 1638 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1639 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1640 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1641 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1642 llvm::Type *TypeParams[] = { 1643 getIdentTyPointerTy(), // loc 1644 CGM.Int32Ty, // tid 1645 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1646 PtrTy, // p_lower 1647 PtrTy, // p_upper 1648 PtrTy // p_stride 1649 }; 1650 auto *FnTy = 1651 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1652 return CGM.CreateRuntimeFunction(FnTy, Name); 1653 } 1654 1655 /// Obtain information that uniquely identifies a target entry. This 1656 /// consists of the file and device IDs as well as line number associated with 1657 /// the relevant entry source location. 1658 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1659 unsigned &DeviceID, unsigned &FileID, 1660 unsigned &LineNum) { 1661 SourceManager &SM = C.getSourceManager(); 1662 1663 // The loc should be always valid and have a file ID (the user cannot use 1664 // #pragma directives in macros) 1665 1666 assert(Loc.isValid() && "Source location is expected to be always valid."); 1667 1668 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1669 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1670 1671 llvm::sys::fs::UniqueID ID; 1672 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1673 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1674 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1675 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1676 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1677 << PLoc.getFilename() << EC.message(); 1678 } 1679 1680 DeviceID = ID.getDevice(); 1681 FileID = ID.getFile(); 1682 LineNum = PLoc.getLine(); 1683 } 1684 1685 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1686 if (CGM.getLangOpts().OpenMPSimd) 1687 return Address::invalid(); 1688 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1689 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1690 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1691 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1692 HasRequiresUnifiedSharedMemory))) { 1693 SmallString<64> PtrName; 1694 { 1695 llvm::raw_svector_ostream OS(PtrName); 1696 OS << CGM.getMangledName(GlobalDecl(VD)); 1697 if (!VD->isExternallyVisible()) { 1698 unsigned DeviceID, FileID, Line; 1699 getTargetEntryUniqueInfo(CGM.getContext(), 1700 VD->getCanonicalDecl()->getBeginLoc(), 1701 DeviceID, FileID, Line); 1702 OS << llvm::format("_%x", FileID); 1703 } 1704 OS << "_decl_tgt_ref_ptr"; 1705 } 1706 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1707 if (!Ptr) { 1708 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1709 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1710 PtrName); 1711 1712 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1713 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1714 1715 if (!CGM.getLangOpts().OpenMPIsDevice) 1716 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1717 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1718 } 1719 return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD)); 1720 } 1721 return Address::invalid(); 1722 } 1723 1724 llvm::Constant * 1725 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1726 assert(!CGM.getLangOpts().OpenMPUseTLS || 1727 !CGM.getContext().getTargetInfo().isTLSSupported()); 1728 // Lookup the entry, lazily creating it if necessary. 1729 std::string Suffix = getName({"cache", ""}); 1730 return getOrCreateInternalVariable( 1731 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1732 } 1733 1734 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1735 const VarDecl *VD, 1736 Address VDAddr, 1737 SourceLocation Loc) { 1738 if (CGM.getLangOpts().OpenMPUseTLS && 1739 CGM.getContext().getTargetInfo().isTLSSupported()) 1740 return VDAddr; 1741 1742 llvm::Type *VarTy = VDAddr.getElementType(); 1743 llvm::Value *Args[] = { 1744 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1745 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1746 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1747 getOrCreateThreadPrivateCache(VD)}; 1748 return Address::deprecated( 1749 CGF.EmitRuntimeCall( 1750 OMPBuilder.getOrCreateRuntimeFunction( 1751 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1752 Args), 1753 VDAddr.getAlignment()); 1754 } 1755 1756 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1757 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1758 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1759 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1760 // library. 1761 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1762 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1763 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1764 OMPLoc); 1765 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1766 // to register constructor/destructor for variable. 1767 llvm::Value *Args[] = { 1768 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1769 Ctor, CopyCtor, Dtor}; 1770 CGF.EmitRuntimeCall( 1771 OMPBuilder.getOrCreateRuntimeFunction( 1772 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1773 Args); 1774 } 1775 1776 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1777 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1778 bool PerformInit, CodeGenFunction *CGF) { 1779 if (CGM.getLangOpts().OpenMPUseTLS && 1780 CGM.getContext().getTargetInfo().isTLSSupported()) 1781 return nullptr; 1782 1783 VD = VD->getDefinition(CGM.getContext()); 1784 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1785 QualType ASTTy = VD->getType(); 1786 1787 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1788 const Expr *Init = VD->getAnyInitializer(); 1789 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1790 // Generate function that re-emits the declaration's initializer into the 1791 // threadprivate copy of the variable VD 1792 CodeGenFunction CtorCGF(CGM); 1793 FunctionArgList Args; 1794 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1795 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1796 ImplicitParamDecl::Other); 1797 Args.push_back(&Dst); 1798 1799 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1800 CGM.getContext().VoidPtrTy, Args); 1801 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1802 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1803 llvm::Function *Fn = 1804 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1805 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1806 Args, Loc, Loc); 1807 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1808 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1809 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1810 Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment()); 1811 Arg = CtorCGF.Builder.CreateElementBitCast( 1812 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1813 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1814 /*IsInitializer=*/true); 1815 ArgVal = CtorCGF.EmitLoadOfScalar( 1816 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1817 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1818 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1819 CtorCGF.FinishFunction(); 1820 Ctor = Fn; 1821 } 1822 if (VD->getType().isDestructedType() != QualType::DK_none) { 1823 // Generate function that emits destructor call for the threadprivate copy 1824 // of the variable VD 1825 CodeGenFunction DtorCGF(CGM); 1826 FunctionArgList Args; 1827 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1828 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1829 ImplicitParamDecl::Other); 1830 Args.push_back(&Dst); 1831 1832 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1833 CGM.getContext().VoidTy, Args); 1834 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1835 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1836 llvm::Function *Fn = 1837 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1838 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1839 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1840 Loc, Loc); 1841 // Create a scope with an artificial location for the body of this function. 1842 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1843 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1844 DtorCGF.GetAddrOfLocalVar(&Dst), 1845 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1846 DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()), 1847 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1848 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1849 DtorCGF.FinishFunction(); 1850 Dtor = Fn; 1851 } 1852 // Do not emit init function if it is not required. 1853 if (!Ctor && !Dtor) 1854 return nullptr; 1855 1856 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1857 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1858 /*isVarArg=*/false) 1859 ->getPointerTo(); 1860 // Copying constructor for the threadprivate variable. 1861 // Must be NULL - reserved by runtime, but currently it requires that this 1862 // parameter is always NULL. Otherwise it fires assertion. 1863 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1864 if (Ctor == nullptr) { 1865 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1866 /*isVarArg=*/false) 1867 ->getPointerTo(); 1868 Ctor = llvm::Constant::getNullValue(CtorTy); 1869 } 1870 if (Dtor == nullptr) { 1871 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1872 /*isVarArg=*/false) 1873 ->getPointerTo(); 1874 Dtor = llvm::Constant::getNullValue(DtorTy); 1875 } 1876 if (!CGF) { 1877 auto *InitFunctionTy = 1878 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1879 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1880 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1881 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1882 CodeGenFunction InitCGF(CGM); 1883 FunctionArgList ArgList; 1884 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1885 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1886 Loc, Loc); 1887 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1888 InitCGF.FinishFunction(); 1889 return InitFunction; 1890 } 1891 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1892 } 1893 return nullptr; 1894 } 1895 1896 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1897 llvm::GlobalVariable *Addr, 1898 bool PerformInit) { 1899 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1900 !CGM.getLangOpts().OpenMPIsDevice) 1901 return false; 1902 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1903 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1904 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1905 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1906 HasRequiresUnifiedSharedMemory)) 1907 return CGM.getLangOpts().OpenMPIsDevice; 1908 VD = VD->getDefinition(CGM.getContext()); 1909 assert(VD && "Unknown VarDecl"); 1910 1911 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1912 return CGM.getLangOpts().OpenMPIsDevice; 1913 1914 QualType ASTTy = VD->getType(); 1915 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1916 1917 // Produce the unique prefix to identify the new target regions. We use 1918 // the source location of the variable declaration which we know to not 1919 // conflict with any target region. 1920 unsigned DeviceID; 1921 unsigned FileID; 1922 unsigned Line; 1923 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1924 SmallString<128> Buffer, Out; 1925 { 1926 llvm::raw_svector_ostream OS(Buffer); 1927 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1928 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1929 } 1930 1931 const Expr *Init = VD->getAnyInitializer(); 1932 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1933 llvm::Constant *Ctor; 1934 llvm::Constant *ID; 1935 if (CGM.getLangOpts().OpenMPIsDevice) { 1936 // Generate function that re-emits the declaration's initializer into 1937 // the threadprivate copy of the variable VD 1938 CodeGenFunction CtorCGF(CGM); 1939 1940 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1941 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1942 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1943 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1944 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1945 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1946 FunctionArgList(), Loc, Loc); 1947 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1948 CtorCGF.EmitAnyExprToMem( 1949 Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), 1950 Init->getType().getQualifiers(), 1951 /*IsInitializer=*/true); 1952 CtorCGF.FinishFunction(); 1953 Ctor = Fn; 1954 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1955 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1956 } else { 1957 Ctor = new llvm::GlobalVariable( 1958 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1959 llvm::GlobalValue::PrivateLinkage, 1960 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1961 ID = Ctor; 1962 } 1963 1964 // Register the information for the entry associated with the constructor. 1965 Out.clear(); 1966 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1967 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1968 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1969 } 1970 if (VD->getType().isDestructedType() != QualType::DK_none) { 1971 llvm::Constant *Dtor; 1972 llvm::Constant *ID; 1973 if (CGM.getLangOpts().OpenMPIsDevice) { 1974 // Generate function that emits destructor call for the threadprivate 1975 // copy of the variable VD 1976 CodeGenFunction DtorCGF(CGM); 1977 1978 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1979 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1980 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1981 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1982 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1983 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1984 FunctionArgList(), Loc, Loc); 1985 // Create a scope with an artificial location for the body of this 1986 // function. 1987 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1988 DtorCGF.emitDestroy( 1989 Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy, 1990 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1991 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1992 DtorCGF.FinishFunction(); 1993 Dtor = Fn; 1994 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1995 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1996 } else { 1997 Dtor = new llvm::GlobalVariable( 1998 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1999 llvm::GlobalValue::PrivateLinkage, 2000 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2001 ID = Dtor; 2002 } 2003 // Register the information for the entry associated with the destructor. 2004 Out.clear(); 2005 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2006 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2007 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2008 } 2009 return CGM.getLangOpts().OpenMPIsDevice; 2010 } 2011 2012 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2013 QualType VarType, 2014 StringRef Name) { 2015 std::string Suffix = getName({"artificial", ""}); 2016 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2017 llvm::GlobalVariable *GAddr = 2018 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2019 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2020 CGM.getTarget().isTLSSupported()) { 2021 GAddr->setThreadLocal(/*Val=*/true); 2022 return Address(GAddr, GAddr->getValueType(), 2023 CGM.getContext().getTypeAlignInChars(VarType)); 2024 } 2025 std::string CacheSuffix = getName({"cache", ""}); 2026 llvm::Value *Args[] = { 2027 emitUpdateLocation(CGF, SourceLocation()), 2028 getThreadID(CGF, SourceLocation()), 2029 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2030 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2031 /*isSigned=*/false), 2032 getOrCreateInternalVariable( 2033 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2034 return Address( 2035 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2036 CGF.EmitRuntimeCall( 2037 OMPBuilder.getOrCreateRuntimeFunction( 2038 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2039 Args), 2040 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2041 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2042 } 2043 2044 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2045 const RegionCodeGenTy &ThenGen, 2046 const RegionCodeGenTy &ElseGen) { 2047 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2048 2049 // If the condition constant folds and can be elided, try to avoid emitting 2050 // the condition and the dead arm of the if/else. 2051 bool CondConstant; 2052 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2053 if (CondConstant) 2054 ThenGen(CGF); 2055 else 2056 ElseGen(CGF); 2057 return; 2058 } 2059 2060 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2061 // emit the conditional branch. 2062 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2063 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2064 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2065 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2066 2067 // Emit the 'then' code. 2068 CGF.EmitBlock(ThenBlock); 2069 ThenGen(CGF); 2070 CGF.EmitBranch(ContBlock); 2071 // Emit the 'else' code if present. 2072 // There is no need to emit line number for unconditional branch. 2073 (void)ApplyDebugLocation::CreateEmpty(CGF); 2074 CGF.EmitBlock(ElseBlock); 2075 ElseGen(CGF); 2076 // There is no need to emit line number for unconditional branch. 2077 (void)ApplyDebugLocation::CreateEmpty(CGF); 2078 CGF.EmitBranch(ContBlock); 2079 // Emit the continuation block for code after the if. 2080 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2081 } 2082 2083 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2084 llvm::Function *OutlinedFn, 2085 ArrayRef<llvm::Value *> CapturedVars, 2086 const Expr *IfCond, 2087 llvm::Value *NumThreads) { 2088 if (!CGF.HaveInsertPoint()) 2089 return; 2090 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2091 auto &M = CGM.getModule(); 2092 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2093 this](CodeGenFunction &CGF, PrePostActionTy &) { 2094 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2095 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2096 llvm::Value *Args[] = { 2097 RTLoc, 2098 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2099 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2100 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2101 RealArgs.append(std::begin(Args), std::end(Args)); 2102 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2103 2104 llvm::FunctionCallee RTLFn = 2105 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2106 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2107 }; 2108 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2109 this](CodeGenFunction &CGF, PrePostActionTy &) { 2110 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2111 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2112 // Build calls: 2113 // __kmpc_serialized_parallel(&Loc, GTid); 2114 llvm::Value *Args[] = {RTLoc, ThreadID}; 2115 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2116 M, OMPRTL___kmpc_serialized_parallel), 2117 Args); 2118 2119 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2120 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2121 Address ZeroAddrBound = 2122 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2123 /*Name=*/".bound.zero.addr"); 2124 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2125 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2126 // ThreadId for serialized parallels is 0. 2127 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2128 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2129 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2130 2131 // Ensure we do not inline the function. This is trivially true for the ones 2132 // passed to __kmpc_fork_call but the ones called in serialized regions 2133 // could be inlined. This is not a perfect but it is closer to the invariant 2134 // we want, namely, every data environment starts with a new function. 2135 // TODO: We should pass the if condition to the runtime function and do the 2136 // handling there. Much cleaner code. 2137 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2138 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2139 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2140 2141 // __kmpc_end_serialized_parallel(&Loc, GTid); 2142 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2143 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2144 M, OMPRTL___kmpc_end_serialized_parallel), 2145 EndArgs); 2146 }; 2147 if (IfCond) { 2148 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2149 } else { 2150 RegionCodeGenTy ThenRCG(ThenGen); 2151 ThenRCG(CGF); 2152 } 2153 } 2154 2155 // If we're inside an (outlined) parallel region, use the region info's 2156 // thread-ID variable (it is passed in a first argument of the outlined function 2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2158 // regular serial code region, get thread ID by calling kmp_int32 2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2160 // return the address of that temp. 2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2162 SourceLocation Loc) { 2163 if (auto *OMPRegionInfo = 2164 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2165 if (OMPRegionInfo->getThreadIDVariable()) 2166 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2167 2168 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2169 QualType Int32Ty = 2170 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2171 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2172 CGF.EmitStoreOfScalar(ThreadID, 2173 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2174 2175 return ThreadIDTemp; 2176 } 2177 2178 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2179 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2180 SmallString<256> Buffer; 2181 llvm::raw_svector_ostream Out(Buffer); 2182 Out << Name; 2183 StringRef RuntimeName = Out.str(); 2184 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2185 if (Elem.second) { 2186 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2187 "OMP internal variable has different type than requested"); 2188 return &*Elem.second; 2189 } 2190 2191 return Elem.second = new llvm::GlobalVariable( 2192 CGM.getModule(), Ty, /*IsConstant*/ false, 2193 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2194 Elem.first(), /*InsertBefore=*/nullptr, 2195 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2196 } 2197 2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2199 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2200 std::string Name = getName({Prefix, "var"}); 2201 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2202 } 2203 2204 namespace { 2205 /// Common pre(post)-action for different OpenMP constructs. 2206 class CommonActionTy final : public PrePostActionTy { 2207 llvm::FunctionCallee EnterCallee; 2208 ArrayRef<llvm::Value *> EnterArgs; 2209 llvm::FunctionCallee ExitCallee; 2210 ArrayRef<llvm::Value *> ExitArgs; 2211 bool Conditional; 2212 llvm::BasicBlock *ContBlock = nullptr; 2213 2214 public: 2215 CommonActionTy(llvm::FunctionCallee EnterCallee, 2216 ArrayRef<llvm::Value *> EnterArgs, 2217 llvm::FunctionCallee ExitCallee, 2218 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2219 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2220 ExitArgs(ExitArgs), Conditional(Conditional) {} 2221 void Enter(CodeGenFunction &CGF) override { 2222 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2223 if (Conditional) { 2224 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2225 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2226 ContBlock = CGF.createBasicBlock("omp_if.end"); 2227 // Generate the branch (If-stmt) 2228 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2229 CGF.EmitBlock(ThenBlock); 2230 } 2231 } 2232 void Done(CodeGenFunction &CGF) { 2233 // Emit the rest of blocks/branches 2234 CGF.EmitBranch(ContBlock); 2235 CGF.EmitBlock(ContBlock, true); 2236 } 2237 void Exit(CodeGenFunction &CGF) override { 2238 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2239 } 2240 }; 2241 } // anonymous namespace 2242 2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2244 StringRef CriticalName, 2245 const RegionCodeGenTy &CriticalOpGen, 2246 SourceLocation Loc, const Expr *Hint) { 2247 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2248 // CriticalOpGen(); 2249 // __kmpc_end_critical(ident_t *, gtid, Lock); 2250 // Prepare arguments and build a call to __kmpc_critical 2251 if (!CGF.HaveInsertPoint()) 2252 return; 2253 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2254 getCriticalRegionLock(CriticalName)}; 2255 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2256 std::end(Args)); 2257 if (Hint) { 2258 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2259 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2260 } 2261 CommonActionTy Action( 2262 OMPBuilder.getOrCreateRuntimeFunction( 2263 CGM.getModule(), 2264 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2265 EnterArgs, 2266 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2267 OMPRTL___kmpc_end_critical), 2268 Args); 2269 CriticalOpGen.setAction(Action); 2270 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2271 } 2272 2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2274 const RegionCodeGenTy &MasterOpGen, 2275 SourceLocation Loc) { 2276 if (!CGF.HaveInsertPoint()) 2277 return; 2278 // if(__kmpc_master(ident_t *, gtid)) { 2279 // MasterOpGen(); 2280 // __kmpc_end_master(ident_t *, gtid); 2281 // } 2282 // Prepare arguments and build a call to __kmpc_master 2283 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2284 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_master), 2286 Args, 2287 OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_end_master), 2289 Args, 2290 /*Conditional=*/true); 2291 MasterOpGen.setAction(Action); 2292 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2293 Action.Done(CGF); 2294 } 2295 2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2297 const RegionCodeGenTy &MaskedOpGen, 2298 SourceLocation Loc, const Expr *Filter) { 2299 if (!CGF.HaveInsertPoint()) 2300 return; 2301 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2302 // MaskedOpGen(); 2303 // __kmpc_end_masked(iden_t *, gtid); 2304 // } 2305 // Prepare arguments and build a call to __kmpc_masked 2306 llvm::Value *FilterVal = Filter 2307 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2308 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2309 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2310 FilterVal}; 2311 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2312 getThreadID(CGF, Loc)}; 2313 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_masked), 2315 Args, 2316 OMPBuilder.getOrCreateRuntimeFunction( 2317 CGM.getModule(), OMPRTL___kmpc_end_masked), 2318 ArgsEnd, 2319 /*Conditional=*/true); 2320 MaskedOpGen.setAction(Action); 2321 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2322 Action.Done(CGF); 2323 } 2324 2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2326 SourceLocation Loc) { 2327 if (!CGF.HaveInsertPoint()) 2328 return; 2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2330 OMPBuilder.createTaskyield(CGF.Builder); 2331 } else { 2332 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2333 llvm::Value *Args[] = { 2334 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2335 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2336 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2337 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2338 Args); 2339 } 2340 2341 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2342 Region->emitUntiedSwitch(CGF); 2343 } 2344 2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2346 const RegionCodeGenTy &TaskgroupOpGen, 2347 SourceLocation Loc) { 2348 if (!CGF.HaveInsertPoint()) 2349 return; 2350 // __kmpc_taskgroup(ident_t *, gtid); 2351 // TaskgroupOpGen(); 2352 // __kmpc_end_taskgroup(ident_t *, gtid); 2353 // Prepare arguments and build a call to __kmpc_taskgroup 2354 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2355 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2357 Args, 2358 OMPBuilder.getOrCreateRuntimeFunction( 2359 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2360 Args); 2361 TaskgroupOpGen.setAction(Action); 2362 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2363 } 2364 2365 /// Given an array of pointers to variables, project the address of a 2366 /// given variable. 2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2368 unsigned Index, const VarDecl *Var) { 2369 // Pull out the pointer to the variable. 2370 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2371 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2372 2373 Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var)); 2374 Addr = CGF.Builder.CreateElementBitCast( 2375 Addr, CGF.ConvertTypeForMem(Var->getType())); 2376 return Addr; 2377 } 2378 2379 static llvm::Value *emitCopyprivateCopyFunction( 2380 CodeGenModule &CGM, llvm::Type *ArgsType, 2381 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2382 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2383 SourceLocation Loc) { 2384 ASTContext &C = CGM.getContext(); 2385 // void copy_func(void *LHSArg, void *RHSArg); 2386 FunctionArgList Args; 2387 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2388 ImplicitParamDecl::Other); 2389 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2390 ImplicitParamDecl::Other); 2391 Args.push_back(&LHSArg); 2392 Args.push_back(&RHSArg); 2393 const auto &CGFI = 2394 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2395 std::string Name = 2396 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2397 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2398 llvm::GlobalValue::InternalLinkage, Name, 2399 &CGM.getModule()); 2400 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2401 Fn->setDoesNotRecurse(); 2402 CodeGenFunction CGF(CGM); 2403 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2404 // Dest = (void*[n])(LHSArg); 2405 // Src = (void*[n])(RHSArg); 2406 Address LHS = Address::deprecated( 2407 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2408 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 2409 CGF.getPointerAlign()); 2410 Address RHS = Address::deprecated( 2411 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2412 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 2413 CGF.getPointerAlign()); 2414 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2415 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2416 // ... 2417 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2418 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2419 const auto *DestVar = 2420 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2421 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2422 2423 const auto *SrcVar = 2424 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2425 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2426 2427 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2428 QualType Type = VD->getType(); 2429 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2430 } 2431 CGF.FinishFunction(); 2432 return Fn; 2433 } 2434 2435 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2436 const RegionCodeGenTy &SingleOpGen, 2437 SourceLocation Loc, 2438 ArrayRef<const Expr *> CopyprivateVars, 2439 ArrayRef<const Expr *> SrcExprs, 2440 ArrayRef<const Expr *> DstExprs, 2441 ArrayRef<const Expr *> AssignmentOps) { 2442 if (!CGF.HaveInsertPoint()) 2443 return; 2444 assert(CopyprivateVars.size() == SrcExprs.size() && 2445 CopyprivateVars.size() == DstExprs.size() && 2446 CopyprivateVars.size() == AssignmentOps.size()); 2447 ASTContext &C = CGM.getContext(); 2448 // int32 did_it = 0; 2449 // if(__kmpc_single(ident_t *, gtid)) { 2450 // SingleOpGen(); 2451 // __kmpc_end_single(ident_t *, gtid); 2452 // did_it = 1; 2453 // } 2454 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2455 // <copy_func>, did_it); 2456 2457 Address DidIt = Address::invalid(); 2458 if (!CopyprivateVars.empty()) { 2459 // int32 did_it = 0; 2460 QualType KmpInt32Ty = 2461 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2462 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2463 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2464 } 2465 // Prepare arguments and build a call to __kmpc_single 2466 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2467 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2468 CGM.getModule(), OMPRTL___kmpc_single), 2469 Args, 2470 OMPBuilder.getOrCreateRuntimeFunction( 2471 CGM.getModule(), OMPRTL___kmpc_end_single), 2472 Args, 2473 /*Conditional=*/true); 2474 SingleOpGen.setAction(Action); 2475 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2476 if (DidIt.isValid()) { 2477 // did_it = 1; 2478 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2479 } 2480 Action.Done(CGF); 2481 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2482 // <copy_func>, did_it); 2483 if (DidIt.isValid()) { 2484 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2485 QualType CopyprivateArrayTy = C.getConstantArrayType( 2486 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2487 /*IndexTypeQuals=*/0); 2488 // Create a list of all private variables for copyprivate. 2489 Address CopyprivateList = 2490 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2491 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2492 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2493 CGF.Builder.CreateStore( 2494 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2495 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2496 CGF.VoidPtrTy), 2497 Elem); 2498 } 2499 // Build function that copies private values from single region to all other 2500 // threads in the corresponding parallel region. 2501 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2502 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2503 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2504 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2505 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2506 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2507 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2508 llvm::Value *Args[] = { 2509 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2510 getThreadID(CGF, Loc), // i32 <gtid> 2511 BufSize, // size_t <buf_size> 2512 CL.getPointer(), // void *<copyprivate list> 2513 CpyFn, // void (*) (void *, void *) <copy_func> 2514 DidItVal // i32 did_it 2515 }; 2516 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2517 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2518 Args); 2519 } 2520 } 2521 2522 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2523 const RegionCodeGenTy &OrderedOpGen, 2524 SourceLocation Loc, bool IsThreads) { 2525 if (!CGF.HaveInsertPoint()) 2526 return; 2527 // __kmpc_ordered(ident_t *, gtid); 2528 // OrderedOpGen(); 2529 // __kmpc_end_ordered(ident_t *, gtid); 2530 // Prepare arguments and build a call to __kmpc_ordered 2531 if (IsThreads) { 2532 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2533 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2534 CGM.getModule(), OMPRTL___kmpc_ordered), 2535 Args, 2536 OMPBuilder.getOrCreateRuntimeFunction( 2537 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2538 Args); 2539 OrderedOpGen.setAction(Action); 2540 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2541 return; 2542 } 2543 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2544 } 2545 2546 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2547 unsigned Flags; 2548 if (Kind == OMPD_for) 2549 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2550 else if (Kind == OMPD_sections) 2551 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2552 else if (Kind == OMPD_single) 2553 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2554 else if (Kind == OMPD_barrier) 2555 Flags = OMP_IDENT_BARRIER_EXPL; 2556 else 2557 Flags = OMP_IDENT_BARRIER_IMPL; 2558 return Flags; 2559 } 2560 2561 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2562 CodeGenFunction &CGF, const OMPLoopDirective &S, 2563 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2564 // Check if the loop directive is actually a doacross loop directive. In this 2565 // case choose static, 1 schedule. 2566 if (llvm::any_of( 2567 S.getClausesOfKind<OMPOrderedClause>(), 2568 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2569 ScheduleKind = OMPC_SCHEDULE_static; 2570 // Chunk size is 1 in this case. 2571 llvm::APInt ChunkSize(32, 1); 2572 ChunkExpr = IntegerLiteral::Create( 2573 CGF.getContext(), ChunkSize, 2574 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2575 SourceLocation()); 2576 } 2577 } 2578 2579 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2580 OpenMPDirectiveKind Kind, bool EmitChecks, 2581 bool ForceSimpleCall) { 2582 // Check if we should use the OMPBuilder 2583 auto *OMPRegionInfo = 2584 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2585 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2586 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2587 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2588 return; 2589 } 2590 2591 if (!CGF.HaveInsertPoint()) 2592 return; 2593 // Build call __kmpc_cancel_barrier(loc, thread_id); 2594 // Build call __kmpc_barrier(loc, thread_id); 2595 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2596 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2597 // thread_id); 2598 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2599 getThreadID(CGF, Loc)}; 2600 if (OMPRegionInfo) { 2601 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2602 llvm::Value *Result = CGF.EmitRuntimeCall( 2603 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2604 OMPRTL___kmpc_cancel_barrier), 2605 Args); 2606 if (EmitChecks) { 2607 // if (__kmpc_cancel_barrier()) { 2608 // exit from construct; 2609 // } 2610 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2611 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2612 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2613 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2614 CGF.EmitBlock(ExitBB); 2615 // exit from construct; 2616 CodeGenFunction::JumpDest CancelDestination = 2617 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2618 CGF.EmitBranchThroughCleanup(CancelDestination); 2619 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2620 } 2621 return; 2622 } 2623 } 2624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2625 CGM.getModule(), OMPRTL___kmpc_barrier), 2626 Args); 2627 } 2628 2629 /// Map the OpenMP loop schedule to the runtime enumeration. 2630 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2631 bool Chunked, bool Ordered) { 2632 switch (ScheduleKind) { 2633 case OMPC_SCHEDULE_static: 2634 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2635 : (Ordered ? OMP_ord_static : OMP_sch_static); 2636 case OMPC_SCHEDULE_dynamic: 2637 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2638 case OMPC_SCHEDULE_guided: 2639 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2640 case OMPC_SCHEDULE_runtime: 2641 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2642 case OMPC_SCHEDULE_auto: 2643 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2644 case OMPC_SCHEDULE_unknown: 2645 assert(!Chunked && "chunk was specified but schedule kind not known"); 2646 return Ordered ? OMP_ord_static : OMP_sch_static; 2647 } 2648 llvm_unreachable("Unexpected runtime schedule"); 2649 } 2650 2651 /// Map the OpenMP distribute schedule to the runtime enumeration. 2652 static OpenMPSchedType 2653 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2654 // only static is allowed for dist_schedule 2655 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2656 } 2657 2658 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2659 bool Chunked) const { 2660 OpenMPSchedType Schedule = 2661 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2662 return Schedule == OMP_sch_static; 2663 } 2664 2665 bool CGOpenMPRuntime::isStaticNonchunked( 2666 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2667 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2668 return Schedule == OMP_dist_sch_static; 2669 } 2670 2671 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2672 bool Chunked) const { 2673 OpenMPSchedType Schedule = 2674 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2675 return Schedule == OMP_sch_static_chunked; 2676 } 2677 2678 bool CGOpenMPRuntime::isStaticChunked( 2679 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2680 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2681 return Schedule == OMP_dist_sch_static_chunked; 2682 } 2683 2684 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2685 OpenMPSchedType Schedule = 2686 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2687 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2688 return Schedule != OMP_sch_static; 2689 } 2690 2691 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2692 OpenMPScheduleClauseModifier M1, 2693 OpenMPScheduleClauseModifier M2) { 2694 int Modifier = 0; 2695 switch (M1) { 2696 case OMPC_SCHEDULE_MODIFIER_monotonic: 2697 Modifier = OMP_sch_modifier_monotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2700 Modifier = OMP_sch_modifier_nonmonotonic; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_simd: 2703 if (Schedule == OMP_sch_static_chunked) 2704 Schedule = OMP_sch_static_balanced_chunked; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_last: 2707 case OMPC_SCHEDULE_MODIFIER_unknown: 2708 break; 2709 } 2710 switch (M2) { 2711 case OMPC_SCHEDULE_MODIFIER_monotonic: 2712 Modifier = OMP_sch_modifier_monotonic; 2713 break; 2714 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2715 Modifier = OMP_sch_modifier_nonmonotonic; 2716 break; 2717 case OMPC_SCHEDULE_MODIFIER_simd: 2718 if (Schedule == OMP_sch_static_chunked) 2719 Schedule = OMP_sch_static_balanced_chunked; 2720 break; 2721 case OMPC_SCHEDULE_MODIFIER_last: 2722 case OMPC_SCHEDULE_MODIFIER_unknown: 2723 break; 2724 } 2725 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2726 // If the static schedule kind is specified or if the ordered clause is 2727 // specified, and if the nonmonotonic modifier is not specified, the effect is 2728 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2729 // modifier is specified, the effect is as if the nonmonotonic modifier is 2730 // specified. 2731 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2732 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2733 Schedule == OMP_sch_static_balanced_chunked || 2734 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2735 Schedule == OMP_dist_sch_static_chunked || 2736 Schedule == OMP_dist_sch_static)) 2737 Modifier = OMP_sch_modifier_nonmonotonic; 2738 } 2739 return Schedule | Modifier; 2740 } 2741 2742 void CGOpenMPRuntime::emitForDispatchInit( 2743 CodeGenFunction &CGF, SourceLocation Loc, 2744 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2745 bool Ordered, const DispatchRTInput &DispatchValues) { 2746 if (!CGF.HaveInsertPoint()) 2747 return; 2748 OpenMPSchedType Schedule = getRuntimeSchedule( 2749 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2750 assert(Ordered || 2751 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2752 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2753 Schedule != OMP_sch_static_balanced_chunked)); 2754 // Call __kmpc_dispatch_init( 2755 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2756 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2757 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2758 2759 // If the Chunk was not specified in the clause - use default value 1. 2760 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2761 : CGF.Builder.getIntN(IVSize, 1); 2762 llvm::Value *Args[] = { 2763 emitUpdateLocation(CGF, Loc), 2764 getThreadID(CGF, Loc), 2765 CGF.Builder.getInt32(addMonoNonMonoModifier( 2766 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2767 DispatchValues.LB, // Lower 2768 DispatchValues.UB, // Upper 2769 CGF.Builder.getIntN(IVSize, 1), // Stride 2770 Chunk // Chunk 2771 }; 2772 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2773 } 2774 2775 static void emitForStaticInitCall( 2776 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2777 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2778 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2779 const CGOpenMPRuntime::StaticRTInput &Values) { 2780 if (!CGF.HaveInsertPoint()) 2781 return; 2782 2783 assert(!Values.Ordered); 2784 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2785 Schedule == OMP_sch_static_balanced_chunked || 2786 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2787 Schedule == OMP_dist_sch_static || 2788 Schedule == OMP_dist_sch_static_chunked); 2789 2790 // Call __kmpc_for_static_init( 2791 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2792 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2793 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2794 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2795 llvm::Value *Chunk = Values.Chunk; 2796 if (Chunk == nullptr) { 2797 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2798 Schedule == OMP_dist_sch_static) && 2799 "expected static non-chunked schedule"); 2800 // If the Chunk was not specified in the clause - use default value 1. 2801 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2802 } else { 2803 assert((Schedule == OMP_sch_static_chunked || 2804 Schedule == OMP_sch_static_balanced_chunked || 2805 Schedule == OMP_ord_static_chunked || 2806 Schedule == OMP_dist_sch_static_chunked) && 2807 "expected static chunked schedule"); 2808 } 2809 llvm::Value *Args[] = { 2810 UpdateLocation, 2811 ThreadId, 2812 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2813 M2)), // Schedule type 2814 Values.IL.getPointer(), // &isLastIter 2815 Values.LB.getPointer(), // &LB 2816 Values.UB.getPointer(), // &UB 2817 Values.ST.getPointer(), // &Stride 2818 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2819 Chunk // Chunk 2820 }; 2821 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2822 } 2823 2824 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2825 SourceLocation Loc, 2826 OpenMPDirectiveKind DKind, 2827 const OpenMPScheduleTy &ScheduleKind, 2828 const StaticRTInput &Values) { 2829 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2830 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2831 assert(isOpenMPWorksharingDirective(DKind) && 2832 "Expected loop-based or sections-based directive."); 2833 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2834 isOpenMPLoopDirective(DKind) 2835 ? OMP_IDENT_WORK_LOOP 2836 : OMP_IDENT_WORK_SECTIONS); 2837 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2838 llvm::FunctionCallee StaticInitFunction = 2839 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2840 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2841 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2842 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2843 } 2844 2845 void CGOpenMPRuntime::emitDistributeStaticInit( 2846 CodeGenFunction &CGF, SourceLocation Loc, 2847 OpenMPDistScheduleClauseKind SchedKind, 2848 const CGOpenMPRuntime::StaticRTInput &Values) { 2849 OpenMPSchedType ScheduleNum = 2850 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2851 llvm::Value *UpdatedLocation = 2852 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2853 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2854 llvm::FunctionCallee StaticInitFunction; 2855 bool isGPUDistribute = 2856 CGM.getLangOpts().OpenMPIsDevice && 2857 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2858 StaticInitFunction = createForStaticInitFunction( 2859 Values.IVSize, Values.IVSigned, isGPUDistribute); 2860 2861 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2862 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2863 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2864 } 2865 2866 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2867 SourceLocation Loc, 2868 OpenMPDirectiveKind DKind) { 2869 if (!CGF.HaveInsertPoint()) 2870 return; 2871 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2872 llvm::Value *Args[] = { 2873 emitUpdateLocation(CGF, Loc, 2874 isOpenMPDistributeDirective(DKind) 2875 ? OMP_IDENT_WORK_DISTRIBUTE 2876 : isOpenMPLoopDirective(DKind) 2877 ? OMP_IDENT_WORK_LOOP 2878 : OMP_IDENT_WORK_SECTIONS), 2879 getThreadID(CGF, Loc)}; 2880 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2881 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2882 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2883 CGF.EmitRuntimeCall( 2884 OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2886 Args); 2887 else 2888 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2889 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2890 Args); 2891 } 2892 2893 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2894 SourceLocation Loc, 2895 unsigned IVSize, 2896 bool IVSigned) { 2897 if (!CGF.HaveInsertPoint()) 2898 return; 2899 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2900 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2901 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2902 } 2903 2904 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2905 SourceLocation Loc, unsigned IVSize, 2906 bool IVSigned, Address IL, 2907 Address LB, Address UB, 2908 Address ST) { 2909 // Call __kmpc_dispatch_next( 2910 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2911 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2912 // kmp_int[32|64] *p_stride); 2913 llvm::Value *Args[] = { 2914 emitUpdateLocation(CGF, Loc), 2915 getThreadID(CGF, Loc), 2916 IL.getPointer(), // &isLastIter 2917 LB.getPointer(), // &Lower 2918 UB.getPointer(), // &Upper 2919 ST.getPointer() // &Stride 2920 }; 2921 llvm::Value *Call = 2922 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2923 return CGF.EmitScalarConversion( 2924 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2925 CGF.getContext().BoolTy, Loc); 2926 } 2927 2928 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2929 llvm::Value *NumThreads, 2930 SourceLocation Loc) { 2931 if (!CGF.HaveInsertPoint()) 2932 return; 2933 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2934 llvm::Value *Args[] = { 2935 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2936 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2937 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2939 Args); 2940 } 2941 2942 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2943 ProcBindKind ProcBind, 2944 SourceLocation Loc) { 2945 if (!CGF.HaveInsertPoint()) 2946 return; 2947 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2948 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2949 llvm::Value *Args[] = { 2950 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2951 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2952 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2953 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2954 Args); 2955 } 2956 2957 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2958 SourceLocation Loc, llvm::AtomicOrdering AO) { 2959 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2960 OMPBuilder.createFlush(CGF.Builder); 2961 } else { 2962 if (!CGF.HaveInsertPoint()) 2963 return; 2964 // Build call void __kmpc_flush(ident_t *loc) 2965 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2966 CGM.getModule(), OMPRTL___kmpc_flush), 2967 emitUpdateLocation(CGF, Loc)); 2968 } 2969 } 2970 2971 namespace { 2972 /// Indexes of fields for type kmp_task_t. 2973 enum KmpTaskTFields { 2974 /// List of shared variables. 2975 KmpTaskTShareds, 2976 /// Task routine. 2977 KmpTaskTRoutine, 2978 /// Partition id for the untied tasks. 2979 KmpTaskTPartId, 2980 /// Function with call of destructors for private variables. 2981 Data1, 2982 /// Task priority. 2983 Data2, 2984 /// (Taskloops only) Lower bound. 2985 KmpTaskTLowerBound, 2986 /// (Taskloops only) Upper bound. 2987 KmpTaskTUpperBound, 2988 /// (Taskloops only) Stride. 2989 KmpTaskTStride, 2990 /// (Taskloops only) Is last iteration flag. 2991 KmpTaskTLastIter, 2992 /// (Taskloops only) Reduction data. 2993 KmpTaskTReductions, 2994 }; 2995 } // anonymous namespace 2996 2997 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2998 return OffloadEntriesTargetRegion.empty() && 2999 OffloadEntriesDeviceGlobalVar.empty(); 3000 } 3001 3002 /// Initialize target region entry. 3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3004 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3005 StringRef ParentName, unsigned LineNum, 3006 unsigned Order) { 3007 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3008 "only required for the device " 3009 "code generation."); 3010 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3011 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3012 OMPTargetRegionEntryTargetRegion); 3013 ++OffloadingEntriesNum; 3014 } 3015 3016 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3017 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3018 StringRef ParentName, unsigned LineNum, 3019 llvm::Constant *Addr, llvm::Constant *ID, 3020 OMPTargetRegionEntryKind Flags) { 3021 // If we are emitting code for a target, the entry is already initialized, 3022 // only has to be registered. 3023 if (CGM.getLangOpts().OpenMPIsDevice) { 3024 // This could happen if the device compilation is invoked standalone. 3025 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3026 return; 3027 auto &Entry = 3028 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3029 Entry.setAddress(Addr); 3030 Entry.setID(ID); 3031 Entry.setFlags(Flags); 3032 } else { 3033 if (Flags == 3034 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3035 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3036 /*IgnoreAddressId*/ true)) 3037 return; 3038 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3039 "Target region entry already registered!"); 3040 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3041 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3042 ++OffloadingEntriesNum; 3043 } 3044 } 3045 3046 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3047 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3048 bool IgnoreAddressId) const { 3049 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3050 if (PerDevice == OffloadEntriesTargetRegion.end()) 3051 return false; 3052 auto PerFile = PerDevice->second.find(FileID); 3053 if (PerFile == PerDevice->second.end()) 3054 return false; 3055 auto PerParentName = PerFile->second.find(ParentName); 3056 if (PerParentName == PerFile->second.end()) 3057 return false; 3058 auto PerLine = PerParentName->second.find(LineNum); 3059 if (PerLine == PerParentName->second.end()) 3060 return false; 3061 // Fail if this entry is already registered. 3062 if (!IgnoreAddressId && 3063 (PerLine->second.getAddress() || PerLine->second.getID())) 3064 return false; 3065 return true; 3066 } 3067 3068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3069 const OffloadTargetRegionEntryInfoActTy &Action) { 3070 // Scan all target region entries and perform the provided action. 3071 for (const auto &D : OffloadEntriesTargetRegion) 3072 for (const auto &F : D.second) 3073 for (const auto &P : F.second) 3074 for (const auto &L : P.second) 3075 Action(D.first, F.first, P.first(), L.first, L.second); 3076 } 3077 3078 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3079 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3080 OMPTargetGlobalVarEntryKind Flags, 3081 unsigned Order) { 3082 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3083 "only required for the device " 3084 "code generation."); 3085 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3086 ++OffloadingEntriesNum; 3087 } 3088 3089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3090 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3091 CharUnits VarSize, 3092 OMPTargetGlobalVarEntryKind Flags, 3093 llvm::GlobalValue::LinkageTypes Linkage) { 3094 if (CGM.getLangOpts().OpenMPIsDevice) { 3095 // This could happen if the device compilation is invoked standalone. 3096 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3097 return; 3098 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3099 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3100 if (Entry.getVarSize().isZero()) { 3101 Entry.setVarSize(VarSize); 3102 Entry.setLinkage(Linkage); 3103 } 3104 return; 3105 } 3106 Entry.setVarSize(VarSize); 3107 Entry.setLinkage(Linkage); 3108 Entry.setAddress(Addr); 3109 } else { 3110 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3111 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3112 assert(Entry.isValid() && Entry.getFlags() == Flags && 3113 "Entry not initialized!"); 3114 if (Entry.getVarSize().isZero()) { 3115 Entry.setVarSize(VarSize); 3116 Entry.setLinkage(Linkage); 3117 } 3118 return; 3119 } 3120 OffloadEntriesDeviceGlobalVar.try_emplace( 3121 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3122 ++OffloadingEntriesNum; 3123 } 3124 } 3125 3126 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3127 actOnDeviceGlobalVarEntriesInfo( 3128 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3129 // Scan all target region entries and perform the provided action. 3130 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3131 Action(E.getKey(), E.getValue()); 3132 } 3133 3134 void CGOpenMPRuntime::createOffloadEntry( 3135 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3136 llvm::GlobalValue::LinkageTypes Linkage) { 3137 StringRef Name = Addr->getName(); 3138 llvm::Module &M = CGM.getModule(); 3139 llvm::LLVMContext &C = M.getContext(); 3140 3141 // Create constant string with the name. 3142 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3143 3144 std::string StringName = getName({"omp_offloading", "entry_name"}); 3145 auto *Str = new llvm::GlobalVariable( 3146 M, StrPtrInit->getType(), /*isConstant=*/true, 3147 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3148 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3149 3150 llvm::Constant *Data[] = { 3151 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3152 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3153 llvm::ConstantInt::get(CGM.SizeTy, Size), 3154 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3155 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3156 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3157 llvm::GlobalVariable *Entry = createGlobalStruct( 3158 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3159 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3160 3161 // The entry has to be created in the section the linker expects it to be. 3162 Entry->setSection("omp_offloading_entries"); 3163 } 3164 3165 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3166 // Emit the offloading entries and metadata so that the device codegen side 3167 // can easily figure out what to emit. The produced metadata looks like 3168 // this: 3169 // 3170 // !omp_offload.info = !{!1, ...} 3171 // 3172 // Right now we only generate metadata for function that contain target 3173 // regions. 3174 3175 // If we are in simd mode or there are no entries, we don't need to do 3176 // anything. 3177 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3178 return; 3179 3180 llvm::Module &M = CGM.getModule(); 3181 llvm::LLVMContext &C = M.getContext(); 3182 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3183 SourceLocation, StringRef>, 3184 16> 3185 OrderedEntries(OffloadEntriesInfoManager.size()); 3186 llvm::SmallVector<StringRef, 16> ParentFunctions( 3187 OffloadEntriesInfoManager.size()); 3188 3189 // Auxiliary methods to create metadata values and strings. 3190 auto &&GetMDInt = [this](unsigned V) { 3191 return llvm::ConstantAsMetadata::get( 3192 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3193 }; 3194 3195 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3196 3197 // Create the offloading info metadata node. 3198 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3199 3200 // Create function that emits metadata for each target region entry; 3201 auto &&TargetRegionMetadataEmitter = 3202 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3203 &GetMDString]( 3204 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3205 unsigned Line, 3206 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3207 // Generate metadata for target regions. Each entry of this metadata 3208 // contains: 3209 // - Entry 0 -> Kind of this type of metadata (0). 3210 // - Entry 1 -> Device ID of the file where the entry was identified. 3211 // - Entry 2 -> File ID of the file where the entry was identified. 3212 // - Entry 3 -> Mangled name of the function where the entry was 3213 // identified. 3214 // - Entry 4 -> Line in the file where the entry was identified. 3215 // - Entry 5 -> Order the entry was created. 3216 // The first element of the metadata node is the kind. 3217 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3218 GetMDInt(FileID), GetMDString(ParentName), 3219 GetMDInt(Line), GetMDInt(E.getOrder())}; 3220 3221 SourceLocation Loc; 3222 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3223 E = CGM.getContext().getSourceManager().fileinfo_end(); 3224 I != E; ++I) { 3225 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3226 I->getFirst()->getUniqueID().getFile() == FileID) { 3227 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3228 I->getFirst(), Line, 1); 3229 break; 3230 } 3231 } 3232 // Save this entry in the right position of the ordered entries array. 3233 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3234 ParentFunctions[E.getOrder()] = ParentName; 3235 3236 // Add metadata to the named metadata node. 3237 MD->addOperand(llvm::MDNode::get(C, Ops)); 3238 }; 3239 3240 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3241 TargetRegionMetadataEmitter); 3242 3243 // Create function that emits metadata for each device global variable entry; 3244 auto &&DeviceGlobalVarMetadataEmitter = 3245 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3246 MD](StringRef MangledName, 3247 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3248 &E) { 3249 // Generate metadata for global variables. Each entry of this metadata 3250 // contains: 3251 // - Entry 0 -> Kind of this type of metadata (1). 3252 // - Entry 1 -> Mangled name of the variable. 3253 // - Entry 2 -> Declare target kind. 3254 // - Entry 3 -> Order the entry was created. 3255 // The first element of the metadata node is the kind. 3256 llvm::Metadata *Ops[] = { 3257 GetMDInt(E.getKind()), GetMDString(MangledName), 3258 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3259 3260 // Save this entry in the right position of the ordered entries array. 3261 OrderedEntries[E.getOrder()] = 3262 std::make_tuple(&E, SourceLocation(), MangledName); 3263 3264 // Add metadata to the named metadata node. 3265 MD->addOperand(llvm::MDNode::get(C, Ops)); 3266 }; 3267 3268 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3269 DeviceGlobalVarMetadataEmitter); 3270 3271 for (const auto &E : OrderedEntries) { 3272 assert(std::get<0>(E) && "All ordered entries must exist!"); 3273 if (const auto *CE = 3274 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3275 std::get<0>(E))) { 3276 if (!CE->getID() || !CE->getAddress()) { 3277 // Do not blame the entry if the parent funtion is not emitted. 3278 StringRef FnName = ParentFunctions[CE->getOrder()]; 3279 if (!CGM.GetGlobalValue(FnName)) 3280 continue; 3281 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3282 DiagnosticsEngine::Error, 3283 "Offloading entry for target region in %0 is incorrect: either the " 3284 "address or the ID is invalid."); 3285 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3286 continue; 3287 } 3288 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3289 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3290 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3291 OffloadEntryInfoDeviceGlobalVar>( 3292 std::get<0>(E))) { 3293 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3294 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3295 CE->getFlags()); 3296 switch (Flags) { 3297 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3298 if (CGM.getLangOpts().OpenMPIsDevice && 3299 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3300 continue; 3301 if (!CE->getAddress()) { 3302 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3303 DiagnosticsEngine::Error, "Offloading entry for declare target " 3304 "variable %0 is incorrect: the " 3305 "address is invalid."); 3306 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3307 continue; 3308 } 3309 // The vaiable has no definition - no need to add the entry. 3310 if (CE->getVarSize().isZero()) 3311 continue; 3312 break; 3313 } 3314 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3315 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3316 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3317 "Declaret target link address is set."); 3318 if (CGM.getLangOpts().OpenMPIsDevice) 3319 continue; 3320 if (!CE->getAddress()) { 3321 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3322 DiagnosticsEngine::Error, 3323 "Offloading entry for declare target variable is incorrect: the " 3324 "address is invalid."); 3325 CGM.getDiags().Report(DiagID); 3326 continue; 3327 } 3328 break; 3329 } 3330 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3331 CE->getVarSize().getQuantity(), Flags, 3332 CE->getLinkage()); 3333 } else { 3334 llvm_unreachable("Unsupported entry kind."); 3335 } 3336 } 3337 } 3338 3339 /// Loads all the offload entries information from the host IR 3340 /// metadata. 3341 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3342 // If we are in target mode, load the metadata from the host IR. This code has 3343 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3344 3345 if (!CGM.getLangOpts().OpenMPIsDevice) 3346 return; 3347 3348 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3349 return; 3350 3351 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3352 if (auto EC = Buf.getError()) { 3353 CGM.getDiags().Report(diag::err_cannot_open_file) 3354 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3355 return; 3356 } 3357 3358 llvm::LLVMContext C; 3359 auto ME = expectedToErrorOrAndEmitErrors( 3360 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3361 3362 if (auto EC = ME.getError()) { 3363 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3364 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3365 CGM.getDiags().Report(DiagID) 3366 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3367 return; 3368 } 3369 3370 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3371 if (!MD) 3372 return; 3373 3374 for (llvm::MDNode *MN : MD->operands()) { 3375 auto &&GetMDInt = [MN](unsigned Idx) { 3376 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3377 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3378 }; 3379 3380 auto &&GetMDString = [MN](unsigned Idx) { 3381 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3382 return V->getString(); 3383 }; 3384 3385 switch (GetMDInt(0)) { 3386 default: 3387 llvm_unreachable("Unexpected metadata!"); 3388 break; 3389 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3390 OffloadingEntryInfoTargetRegion: 3391 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3392 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3393 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3394 /*Order=*/GetMDInt(5)); 3395 break; 3396 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3397 OffloadingEntryInfoDeviceGlobalVar: 3398 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3399 /*MangledName=*/GetMDString(1), 3400 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3401 /*Flags=*/GetMDInt(2)), 3402 /*Order=*/GetMDInt(3)); 3403 break; 3404 } 3405 } 3406 } 3407 3408 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3409 if (!KmpRoutineEntryPtrTy) { 3410 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3411 ASTContext &C = CGM.getContext(); 3412 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3413 FunctionProtoType::ExtProtoInfo EPI; 3414 KmpRoutineEntryPtrQTy = C.getPointerType( 3415 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3416 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3417 } 3418 } 3419 3420 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3421 // Make sure the type of the entry is already created. This is the type we 3422 // have to create: 3423 // struct __tgt_offload_entry{ 3424 // void *addr; // Pointer to the offload entry info. 3425 // // (function or global) 3426 // char *name; // Name of the function or global. 3427 // size_t size; // Size of the entry info (0 if it a function). 3428 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3429 // int32_t reserved; // Reserved, to use by the runtime library. 3430 // }; 3431 if (TgtOffloadEntryQTy.isNull()) { 3432 ASTContext &C = CGM.getContext(); 3433 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3434 RD->startDefinition(); 3435 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3436 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3437 addFieldToRecordDecl(C, RD, C.getSizeType()); 3438 addFieldToRecordDecl( 3439 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3440 addFieldToRecordDecl( 3441 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3442 RD->completeDefinition(); 3443 RD->addAttr(PackedAttr::CreateImplicit(C)); 3444 TgtOffloadEntryQTy = C.getRecordType(RD); 3445 } 3446 return TgtOffloadEntryQTy; 3447 } 3448 3449 namespace { 3450 struct PrivateHelpersTy { 3451 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3452 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3453 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3454 PrivateElemInit(PrivateElemInit) {} 3455 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3456 const Expr *OriginalRef = nullptr; 3457 const VarDecl *Original = nullptr; 3458 const VarDecl *PrivateCopy = nullptr; 3459 const VarDecl *PrivateElemInit = nullptr; 3460 bool isLocalPrivate() const { 3461 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3462 } 3463 }; 3464 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3465 } // anonymous namespace 3466 3467 static bool isAllocatableDecl(const VarDecl *VD) { 3468 const VarDecl *CVD = VD->getCanonicalDecl(); 3469 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3470 return false; 3471 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3472 // Use the default allocation. 3473 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3474 !AA->getAllocator()); 3475 } 3476 3477 static RecordDecl * 3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3479 if (!Privates.empty()) { 3480 ASTContext &C = CGM.getContext(); 3481 // Build struct .kmp_privates_t. { 3482 // /* private vars */ 3483 // }; 3484 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3485 RD->startDefinition(); 3486 for (const auto &Pair : Privates) { 3487 const VarDecl *VD = Pair.second.Original; 3488 QualType Type = VD->getType().getNonReferenceType(); 3489 // If the private variable is a local variable with lvalue ref type, 3490 // allocate the pointer instead of the pointee type. 3491 if (Pair.second.isLocalPrivate()) { 3492 if (VD->getType()->isLValueReferenceType()) 3493 Type = C.getPointerType(Type); 3494 if (isAllocatableDecl(VD)) 3495 Type = C.getPointerType(Type); 3496 } 3497 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3498 if (VD->hasAttrs()) { 3499 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3500 E(VD->getAttrs().end()); 3501 I != E; ++I) 3502 FD->addAttr(*I); 3503 } 3504 } 3505 RD->completeDefinition(); 3506 return RD; 3507 } 3508 return nullptr; 3509 } 3510 3511 static RecordDecl * 3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3513 QualType KmpInt32Ty, 3514 QualType KmpRoutineEntryPointerQTy) { 3515 ASTContext &C = CGM.getContext(); 3516 // Build struct kmp_task_t { 3517 // void * shareds; 3518 // kmp_routine_entry_t routine; 3519 // kmp_int32 part_id; 3520 // kmp_cmplrdata_t data1; 3521 // kmp_cmplrdata_t data2; 3522 // For taskloops additional fields: 3523 // kmp_uint64 lb; 3524 // kmp_uint64 ub; 3525 // kmp_int64 st; 3526 // kmp_int32 liter; 3527 // void * reductions; 3528 // }; 3529 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3530 UD->startDefinition(); 3531 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3532 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3533 UD->completeDefinition(); 3534 QualType KmpCmplrdataTy = C.getRecordType(UD); 3535 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3536 RD->startDefinition(); 3537 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3538 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3539 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3540 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3541 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3542 if (isOpenMPTaskLoopDirective(Kind)) { 3543 QualType KmpUInt64Ty = 3544 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3545 QualType KmpInt64Ty = 3546 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3547 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3548 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3549 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3550 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3551 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3552 } 3553 RD->completeDefinition(); 3554 return RD; 3555 } 3556 3557 static RecordDecl * 3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3559 ArrayRef<PrivateDataTy> Privates) { 3560 ASTContext &C = CGM.getContext(); 3561 // Build struct kmp_task_t_with_privates { 3562 // kmp_task_t task_data; 3563 // .kmp_privates_t. privates; 3564 // }; 3565 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3566 RD->startDefinition(); 3567 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3568 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3569 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3570 RD->completeDefinition(); 3571 return RD; 3572 } 3573 3574 /// Emit a proxy function which accepts kmp_task_t as the second 3575 /// argument. 3576 /// \code 3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3578 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3579 /// For taskloops: 3580 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3581 /// tt->reductions, tt->shareds); 3582 /// return 0; 3583 /// } 3584 /// \endcode 3585 static llvm::Function * 3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3587 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3588 QualType KmpTaskTWithPrivatesPtrQTy, 3589 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3590 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3591 llvm::Value *TaskPrivatesMap) { 3592 ASTContext &C = CGM.getContext(); 3593 FunctionArgList Args; 3594 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3595 ImplicitParamDecl::Other); 3596 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3597 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3598 ImplicitParamDecl::Other); 3599 Args.push_back(&GtidArg); 3600 Args.push_back(&TaskTypeArg); 3601 const auto &TaskEntryFnInfo = 3602 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3603 llvm::FunctionType *TaskEntryTy = 3604 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3605 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3606 auto *TaskEntry = llvm::Function::Create( 3607 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3608 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3609 TaskEntry->setDoesNotRecurse(); 3610 CodeGenFunction CGF(CGM); 3611 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3612 Loc, Loc); 3613 3614 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3615 // tt, 3616 // For taskloops: 3617 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3618 // tt->task_data.shareds); 3619 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3620 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3621 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3622 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3623 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3624 const auto *KmpTaskTWithPrivatesQTyRD = 3625 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3626 LValue Base = 3627 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3628 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3629 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3630 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3631 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3632 3633 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3634 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3635 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3636 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3637 CGF.ConvertTypeForMem(SharedsPtrTy)); 3638 3639 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3640 llvm::Value *PrivatesParam; 3641 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3642 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3643 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3644 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3645 } else { 3646 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3647 } 3648 3649 llvm::Value *CommonArgs[] = { 3650 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3651 CGF.Builder 3652 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3653 CGF.VoidPtrTy, CGF.Int8Ty) 3654 .getPointer()}; 3655 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3656 std::end(CommonArgs)); 3657 if (isOpenMPTaskLoopDirective(Kind)) { 3658 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3659 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3660 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3661 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3662 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3663 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3664 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3665 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3666 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3667 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3668 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3669 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3670 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3671 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3672 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3673 CallArgs.push_back(LBParam); 3674 CallArgs.push_back(UBParam); 3675 CallArgs.push_back(StParam); 3676 CallArgs.push_back(LIParam); 3677 CallArgs.push_back(RParam); 3678 } 3679 CallArgs.push_back(SharedsParam); 3680 3681 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3682 CallArgs); 3683 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3684 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3685 CGF.FinishFunction(); 3686 return TaskEntry; 3687 } 3688 3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3690 SourceLocation Loc, 3691 QualType KmpInt32Ty, 3692 QualType KmpTaskTWithPrivatesPtrQTy, 3693 QualType KmpTaskTWithPrivatesQTy) { 3694 ASTContext &C = CGM.getContext(); 3695 FunctionArgList Args; 3696 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3697 ImplicitParamDecl::Other); 3698 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3699 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3700 ImplicitParamDecl::Other); 3701 Args.push_back(&GtidArg); 3702 Args.push_back(&TaskTypeArg); 3703 const auto &DestructorFnInfo = 3704 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3705 llvm::FunctionType *DestructorFnTy = 3706 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3707 std::string Name = 3708 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3709 auto *DestructorFn = 3710 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3711 Name, &CGM.getModule()); 3712 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3713 DestructorFnInfo); 3714 DestructorFn->setDoesNotRecurse(); 3715 CodeGenFunction CGF(CGM); 3716 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3717 Args, Loc, Loc); 3718 3719 LValue Base = CGF.EmitLoadOfPointerLValue( 3720 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3721 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3722 const auto *KmpTaskTWithPrivatesQTyRD = 3723 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3724 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3725 Base = CGF.EmitLValueForField(Base, *FI); 3726 for (const auto *Field : 3727 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3728 if (QualType::DestructionKind DtorKind = 3729 Field->getType().isDestructedType()) { 3730 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3731 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3732 } 3733 } 3734 CGF.FinishFunction(); 3735 return DestructorFn; 3736 } 3737 3738 /// Emit a privates mapping function for correct handling of private and 3739 /// firstprivate variables. 3740 /// \code 3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3742 /// **noalias priv1,..., <tyn> **noalias privn) { 3743 /// *priv1 = &.privates.priv1; 3744 /// ...; 3745 /// *privn = &.privates.privn; 3746 /// } 3747 /// \endcode 3748 static llvm::Value * 3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3750 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3751 ArrayRef<PrivateDataTy> Privates) { 3752 ASTContext &C = CGM.getContext(); 3753 FunctionArgList Args; 3754 ImplicitParamDecl TaskPrivatesArg( 3755 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3756 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3757 ImplicitParamDecl::Other); 3758 Args.push_back(&TaskPrivatesArg); 3759 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3760 unsigned Counter = 1; 3761 for (const Expr *E : Data.PrivateVars) { 3762 Args.push_back(ImplicitParamDecl::Create( 3763 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3764 C.getPointerType(C.getPointerType(E->getType())) 3765 .withConst() 3766 .withRestrict(), 3767 ImplicitParamDecl::Other)); 3768 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3769 PrivateVarsPos[VD] = Counter; 3770 ++Counter; 3771 } 3772 for (const Expr *E : Data.FirstprivateVars) { 3773 Args.push_back(ImplicitParamDecl::Create( 3774 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3775 C.getPointerType(C.getPointerType(E->getType())) 3776 .withConst() 3777 .withRestrict(), 3778 ImplicitParamDecl::Other)); 3779 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3780 PrivateVarsPos[VD] = Counter; 3781 ++Counter; 3782 } 3783 for (const Expr *E : Data.LastprivateVars) { 3784 Args.push_back(ImplicitParamDecl::Create( 3785 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3786 C.getPointerType(C.getPointerType(E->getType())) 3787 .withConst() 3788 .withRestrict(), 3789 ImplicitParamDecl::Other)); 3790 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3791 PrivateVarsPos[VD] = Counter; 3792 ++Counter; 3793 } 3794 for (const VarDecl *VD : Data.PrivateLocals) { 3795 QualType Ty = VD->getType().getNonReferenceType(); 3796 if (VD->getType()->isLValueReferenceType()) 3797 Ty = C.getPointerType(Ty); 3798 if (isAllocatableDecl(VD)) 3799 Ty = C.getPointerType(Ty); 3800 Args.push_back(ImplicitParamDecl::Create( 3801 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3802 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3803 ImplicitParamDecl::Other)); 3804 PrivateVarsPos[VD] = Counter; 3805 ++Counter; 3806 } 3807 const auto &TaskPrivatesMapFnInfo = 3808 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3809 llvm::FunctionType *TaskPrivatesMapTy = 3810 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3811 std::string Name = 3812 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3813 auto *TaskPrivatesMap = llvm::Function::Create( 3814 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3815 &CGM.getModule()); 3816 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3817 TaskPrivatesMapFnInfo); 3818 if (CGM.getLangOpts().Optimize) { 3819 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3820 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3821 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3822 } 3823 CodeGenFunction CGF(CGM); 3824 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3825 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3826 3827 // *privi = &.privates.privi; 3828 LValue Base = CGF.EmitLoadOfPointerLValue( 3829 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3830 TaskPrivatesArg.getType()->castAs<PointerType>()); 3831 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3832 Counter = 0; 3833 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3834 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3835 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3836 LValue RefLVal = 3837 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3838 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3839 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3840 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3841 ++Counter; 3842 } 3843 CGF.FinishFunction(); 3844 return TaskPrivatesMap; 3845 } 3846 3847 /// Emit initialization for private variables in task-based directives. 3848 static void emitPrivatesInit(CodeGenFunction &CGF, 3849 const OMPExecutableDirective &D, 3850 Address KmpTaskSharedsPtr, LValue TDBase, 3851 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3852 QualType SharedsTy, QualType SharedsPtrTy, 3853 const OMPTaskDataTy &Data, 3854 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3855 ASTContext &C = CGF.getContext(); 3856 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3857 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3858 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3859 ? OMPD_taskloop 3860 : OMPD_task; 3861 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3862 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3863 LValue SrcBase; 3864 bool IsTargetTask = 3865 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3866 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3867 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3868 // PointersArray, SizesArray, and MappersArray. The original variables for 3869 // these arrays are not captured and we get their addresses explicitly. 3870 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3871 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3872 SrcBase = CGF.MakeAddrLValue( 3873 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3874 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3875 CGF.ConvertTypeForMem(SharedsTy)), 3876 SharedsTy); 3877 } 3878 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3879 for (const PrivateDataTy &Pair : Privates) { 3880 // Do not initialize private locals. 3881 if (Pair.second.isLocalPrivate()) { 3882 ++FI; 3883 continue; 3884 } 3885 const VarDecl *VD = Pair.second.PrivateCopy; 3886 const Expr *Init = VD->getAnyInitializer(); 3887 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3888 !CGF.isTrivialInitializer(Init)))) { 3889 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3890 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3891 const VarDecl *OriginalVD = Pair.second.Original; 3892 // Check if the variable is the target-based BasePointersArray, 3893 // PointersArray, SizesArray, or MappersArray. 3894 LValue SharedRefLValue; 3895 QualType Type = PrivateLValue.getType(); 3896 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3897 if (IsTargetTask && !SharedField) { 3898 assert(isa<ImplicitParamDecl>(OriginalVD) && 3899 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3900 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3901 ->getNumParams() == 0 && 3902 isa<TranslationUnitDecl>( 3903 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3904 ->getDeclContext()) && 3905 "Expected artificial target data variable."); 3906 SharedRefLValue = 3907 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3908 } else if (ForDup) { 3909 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3910 SharedRefLValue = CGF.MakeAddrLValue( 3911 SharedRefLValue.getAddress(CGF).withAlignment( 3912 C.getDeclAlign(OriginalVD)), 3913 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3914 SharedRefLValue.getTBAAInfo()); 3915 } else if (CGF.LambdaCaptureFields.count( 3916 Pair.second.Original->getCanonicalDecl()) > 0 || 3917 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3918 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3919 } else { 3920 // Processing for implicitly captured variables. 3921 InlinedOpenMPRegionRAII Region( 3922 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3923 /*HasCancel=*/false, /*NoInheritance=*/true); 3924 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3925 } 3926 if (Type->isArrayType()) { 3927 // Initialize firstprivate array. 3928 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3929 // Perform simple memcpy. 3930 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3931 } else { 3932 // Initialize firstprivate array using element-by-element 3933 // initialization. 3934 CGF.EmitOMPAggregateAssign( 3935 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3936 Type, 3937 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3938 Address SrcElement) { 3939 // Clean up any temporaries needed by the initialization. 3940 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3941 InitScope.addPrivate( 3942 Elem, [SrcElement]() -> Address { return SrcElement; }); 3943 (void)InitScope.Privatize(); 3944 // Emit initialization for single element. 3945 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3946 CGF, &CapturesInfo); 3947 CGF.EmitAnyExprToMem(Init, DestElement, 3948 Init->getType().getQualifiers(), 3949 /*IsInitializer=*/false); 3950 }); 3951 } 3952 } else { 3953 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3954 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3955 return SharedRefLValue.getAddress(CGF); 3956 }); 3957 (void)InitScope.Privatize(); 3958 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3959 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3960 /*capturedByInit=*/false); 3961 } 3962 } else { 3963 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3964 } 3965 } 3966 ++FI; 3967 } 3968 } 3969 3970 /// Check if duplication function is required for taskloops. 3971 static bool checkInitIsRequired(CodeGenFunction &CGF, 3972 ArrayRef<PrivateDataTy> Privates) { 3973 bool InitRequired = false; 3974 for (const PrivateDataTy &Pair : Privates) { 3975 if (Pair.second.isLocalPrivate()) 3976 continue; 3977 const VarDecl *VD = Pair.second.PrivateCopy; 3978 const Expr *Init = VD->getAnyInitializer(); 3979 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3980 !CGF.isTrivialInitializer(Init)); 3981 if (InitRequired) 3982 break; 3983 } 3984 return InitRequired; 3985 } 3986 3987 3988 /// Emit task_dup function (for initialization of 3989 /// private/firstprivate/lastprivate vars and last_iter flag) 3990 /// \code 3991 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3992 /// lastpriv) { 3993 /// // setup lastprivate flag 3994 /// task_dst->last = lastpriv; 3995 /// // could be constructor calls here... 3996 /// } 3997 /// \endcode 3998 static llvm::Value * 3999 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4000 const OMPExecutableDirective &D, 4001 QualType KmpTaskTWithPrivatesPtrQTy, 4002 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4003 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4004 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4005 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4006 ASTContext &C = CGM.getContext(); 4007 FunctionArgList Args; 4008 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4009 KmpTaskTWithPrivatesPtrQTy, 4010 ImplicitParamDecl::Other); 4011 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4012 KmpTaskTWithPrivatesPtrQTy, 4013 ImplicitParamDecl::Other); 4014 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4015 ImplicitParamDecl::Other); 4016 Args.push_back(&DstArg); 4017 Args.push_back(&SrcArg); 4018 Args.push_back(&LastprivArg); 4019 const auto &TaskDupFnInfo = 4020 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4021 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4022 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4023 auto *TaskDup = llvm::Function::Create( 4024 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4025 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4026 TaskDup->setDoesNotRecurse(); 4027 CodeGenFunction CGF(CGM); 4028 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4029 Loc); 4030 4031 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4032 CGF.GetAddrOfLocalVar(&DstArg), 4033 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4034 // task_dst->liter = lastpriv; 4035 if (WithLastIter) { 4036 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4037 LValue Base = CGF.EmitLValueForField( 4038 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4039 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4040 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4041 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4042 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4043 } 4044 4045 // Emit initial values for private copies (if any). 4046 assert(!Privates.empty()); 4047 Address KmpTaskSharedsPtr = Address::invalid(); 4048 if (!Data.FirstprivateVars.empty()) { 4049 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4050 CGF.GetAddrOfLocalVar(&SrcArg), 4051 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4052 LValue Base = CGF.EmitLValueForField( 4053 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4054 KmpTaskSharedsPtr = Address::deprecated( 4055 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4056 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4057 KmpTaskTShareds)), 4058 Loc), 4059 CGM.getNaturalTypeAlignment(SharedsTy)); 4060 } 4061 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4062 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4063 CGF.FinishFunction(); 4064 return TaskDup; 4065 } 4066 4067 /// Checks if destructor function is required to be generated. 4068 /// \return true if cleanups are required, false otherwise. 4069 static bool 4070 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4071 ArrayRef<PrivateDataTy> Privates) { 4072 for (const PrivateDataTy &P : Privates) { 4073 if (P.second.isLocalPrivate()) 4074 continue; 4075 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4076 if (Ty.isDestructedType()) 4077 return true; 4078 } 4079 return false; 4080 } 4081 4082 namespace { 4083 /// Loop generator for OpenMP iterator expression. 4084 class OMPIteratorGeneratorScope final 4085 : public CodeGenFunction::OMPPrivateScope { 4086 CodeGenFunction &CGF; 4087 const OMPIteratorExpr *E = nullptr; 4088 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4089 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4090 OMPIteratorGeneratorScope() = delete; 4091 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4092 4093 public: 4094 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4095 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4096 if (!E) 4097 return; 4098 SmallVector<llvm::Value *, 4> Uppers; 4099 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4100 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4101 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4102 addPrivate(VD, [&CGF, VD]() { 4103 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4104 }); 4105 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4106 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4107 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4108 "counter.addr"); 4109 }); 4110 } 4111 Privatize(); 4112 4113 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4114 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4115 LValue CLVal = 4116 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4117 HelperData.CounterVD->getType()); 4118 // Counter = 0; 4119 CGF.EmitStoreOfScalar( 4120 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4121 CLVal); 4122 CodeGenFunction::JumpDest &ContDest = 4123 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4124 CodeGenFunction::JumpDest &ExitDest = 4125 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4126 // N = <number-of_iterations>; 4127 llvm::Value *N = Uppers[I]; 4128 // cont: 4129 // if (Counter < N) goto body; else goto exit; 4130 CGF.EmitBlock(ContDest.getBlock()); 4131 auto *CVal = 4132 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4133 llvm::Value *Cmp = 4134 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4135 ? CGF.Builder.CreateICmpSLT(CVal, N) 4136 : CGF.Builder.CreateICmpULT(CVal, N); 4137 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4138 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4139 // body: 4140 CGF.EmitBlock(BodyBB); 4141 // Iteri = Begini + Counter * Stepi; 4142 CGF.EmitIgnoredExpr(HelperData.Update); 4143 } 4144 } 4145 ~OMPIteratorGeneratorScope() { 4146 if (!E) 4147 return; 4148 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4149 // Counter = Counter + 1; 4150 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4151 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4152 // goto cont; 4153 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4154 // exit: 4155 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4156 } 4157 } 4158 }; 4159 } // namespace 4160 4161 static std::pair<llvm::Value *, llvm::Value *> 4162 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4163 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4164 llvm::Value *Addr; 4165 if (OASE) { 4166 const Expr *Base = OASE->getBase(); 4167 Addr = CGF.EmitScalarExpr(Base); 4168 } else { 4169 Addr = CGF.EmitLValue(E).getPointer(CGF); 4170 } 4171 llvm::Value *SizeVal; 4172 QualType Ty = E->getType(); 4173 if (OASE) { 4174 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4175 for (const Expr *SE : OASE->getDimensions()) { 4176 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4177 Sz = CGF.EmitScalarConversion( 4178 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4179 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4180 } 4181 } else if (const auto *ASE = 4182 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4183 LValue UpAddrLVal = 4184 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4185 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4186 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4187 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4188 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4189 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4190 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4191 } else { 4192 SizeVal = CGF.getTypeSize(Ty); 4193 } 4194 return std::make_pair(Addr, SizeVal); 4195 } 4196 4197 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4198 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4199 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4200 if (KmpTaskAffinityInfoTy.isNull()) { 4201 RecordDecl *KmpAffinityInfoRD = 4202 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4203 KmpAffinityInfoRD->startDefinition(); 4204 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4206 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4207 KmpAffinityInfoRD->completeDefinition(); 4208 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4209 } 4210 } 4211 4212 CGOpenMPRuntime::TaskResultTy 4213 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4214 const OMPExecutableDirective &D, 4215 llvm::Function *TaskFunction, QualType SharedsTy, 4216 Address Shareds, const OMPTaskDataTy &Data) { 4217 ASTContext &C = CGM.getContext(); 4218 llvm::SmallVector<PrivateDataTy, 4> Privates; 4219 // Aggregate privates and sort them by the alignment. 4220 const auto *I = Data.PrivateCopies.begin(); 4221 for (const Expr *E : Data.PrivateVars) { 4222 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4223 Privates.emplace_back( 4224 C.getDeclAlign(VD), 4225 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4226 /*PrivateElemInit=*/nullptr)); 4227 ++I; 4228 } 4229 I = Data.FirstprivateCopies.begin(); 4230 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4231 for (const Expr *E : Data.FirstprivateVars) { 4232 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4233 Privates.emplace_back( 4234 C.getDeclAlign(VD), 4235 PrivateHelpersTy( 4236 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4237 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4238 ++I; 4239 ++IElemInitRef; 4240 } 4241 I = Data.LastprivateCopies.begin(); 4242 for (const Expr *E : Data.LastprivateVars) { 4243 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4244 Privates.emplace_back( 4245 C.getDeclAlign(VD), 4246 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4247 /*PrivateElemInit=*/nullptr)); 4248 ++I; 4249 } 4250 for (const VarDecl *VD : Data.PrivateLocals) { 4251 if (isAllocatableDecl(VD)) 4252 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4253 else 4254 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4255 } 4256 llvm::stable_sort(Privates, 4257 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4258 return L.first > R.first; 4259 }); 4260 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4261 // Build type kmp_routine_entry_t (if not built yet). 4262 emitKmpRoutineEntryT(KmpInt32Ty); 4263 // Build type kmp_task_t (if not built yet). 4264 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4265 if (SavedKmpTaskloopTQTy.isNull()) { 4266 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4267 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4268 } 4269 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4270 } else { 4271 assert((D.getDirectiveKind() == OMPD_task || 4272 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4274 "Expected taskloop, task or target directive"); 4275 if (SavedKmpTaskTQTy.isNull()) { 4276 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4277 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4278 } 4279 KmpTaskTQTy = SavedKmpTaskTQTy; 4280 } 4281 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4282 // Build particular struct kmp_task_t for the given task. 4283 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4284 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4285 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4286 QualType KmpTaskTWithPrivatesPtrQTy = 4287 C.getPointerType(KmpTaskTWithPrivatesQTy); 4288 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4289 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4290 KmpTaskTWithPrivatesTy->getPointerTo(); 4291 llvm::Value *KmpTaskTWithPrivatesTySize = 4292 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4293 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4294 4295 // Emit initial values for private copies (if any). 4296 llvm::Value *TaskPrivatesMap = nullptr; 4297 llvm::Type *TaskPrivatesMapTy = 4298 std::next(TaskFunction->arg_begin(), 3)->getType(); 4299 if (!Privates.empty()) { 4300 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4301 TaskPrivatesMap = 4302 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4303 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4304 TaskPrivatesMap, TaskPrivatesMapTy); 4305 } else { 4306 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4307 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4308 } 4309 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4310 // kmp_task_t *tt); 4311 llvm::Function *TaskEntry = emitProxyTaskFunction( 4312 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4313 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4314 TaskPrivatesMap); 4315 4316 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4317 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4318 // kmp_routine_entry_t *task_entry); 4319 // Task flags. Format is taken from 4320 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4321 // description of kmp_tasking_flags struct. 4322 enum { 4323 TiedFlag = 0x1, 4324 FinalFlag = 0x2, 4325 DestructorsFlag = 0x8, 4326 PriorityFlag = 0x20, 4327 DetachableFlag = 0x40, 4328 }; 4329 unsigned Flags = Data.Tied ? TiedFlag : 0; 4330 bool NeedsCleanup = false; 4331 if (!Privates.empty()) { 4332 NeedsCleanup = 4333 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4334 if (NeedsCleanup) 4335 Flags = Flags | DestructorsFlag; 4336 } 4337 if (Data.Priority.getInt()) 4338 Flags = Flags | PriorityFlag; 4339 if (D.hasClausesOfKind<OMPDetachClause>()) 4340 Flags = Flags | DetachableFlag; 4341 llvm::Value *TaskFlags = 4342 Data.Final.getPointer() 4343 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4344 CGF.Builder.getInt32(FinalFlag), 4345 CGF.Builder.getInt32(/*C=*/0)) 4346 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4347 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4348 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4349 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4350 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4351 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4352 TaskEntry, KmpRoutineEntryPtrTy)}; 4353 llvm::Value *NewTask; 4354 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4355 // Check if we have any device clause associated with the directive. 4356 const Expr *Device = nullptr; 4357 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4358 Device = C->getDevice(); 4359 // Emit device ID if any otherwise use default value. 4360 llvm::Value *DeviceID; 4361 if (Device) 4362 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4363 CGF.Int64Ty, /*isSigned=*/true); 4364 else 4365 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4366 AllocArgs.push_back(DeviceID); 4367 NewTask = CGF.EmitRuntimeCall( 4368 OMPBuilder.getOrCreateRuntimeFunction( 4369 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4370 AllocArgs); 4371 } else { 4372 NewTask = 4373 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4374 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4375 AllocArgs); 4376 } 4377 // Emit detach clause initialization. 4378 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4379 // task_descriptor); 4380 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4381 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4382 LValue EvtLVal = CGF.EmitLValue(Evt); 4383 4384 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4385 // int gtid, kmp_task_t *task); 4386 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4387 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4388 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4389 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4390 OMPBuilder.getOrCreateRuntimeFunction( 4391 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4392 {Loc, Tid, NewTask}); 4393 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4394 Evt->getExprLoc()); 4395 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4396 } 4397 // Process affinity clauses. 4398 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4399 // Process list of affinity data. 4400 ASTContext &C = CGM.getContext(); 4401 Address AffinitiesArray = Address::invalid(); 4402 // Calculate number of elements to form the array of affinity data. 4403 llvm::Value *NumOfElements = nullptr; 4404 unsigned NumAffinities = 0; 4405 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4406 if (const Expr *Modifier = C->getModifier()) { 4407 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4408 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4409 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4410 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4411 NumOfElements = 4412 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4413 } 4414 } else { 4415 NumAffinities += C->varlist_size(); 4416 } 4417 } 4418 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4419 // Fields ids in kmp_task_affinity_info record. 4420 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4421 4422 QualType KmpTaskAffinityInfoArrayTy; 4423 if (NumOfElements) { 4424 NumOfElements = CGF.Builder.CreateNUWAdd( 4425 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4426 auto *OVE = new (C) OpaqueValueExpr( 4427 Loc, 4428 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4429 VK_PRValue); 4430 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4431 RValue::get(NumOfElements)); 4432 KmpTaskAffinityInfoArrayTy = 4433 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4434 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4435 // Properly emit variable-sized array. 4436 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4437 ImplicitParamDecl::Other); 4438 CGF.EmitVarDecl(*PD); 4439 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4440 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4441 /*isSigned=*/false); 4442 } else { 4443 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4444 KmpTaskAffinityInfoTy, 4445 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4446 ArrayType::Normal, /*IndexTypeQuals=*/0); 4447 AffinitiesArray = 4448 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4449 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4450 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4451 /*isSigned=*/false); 4452 } 4453 4454 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4455 // Fill array by elements without iterators. 4456 unsigned Pos = 0; 4457 bool HasIterator = false; 4458 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4459 if (C->getModifier()) { 4460 HasIterator = true; 4461 continue; 4462 } 4463 for (const Expr *E : C->varlists()) { 4464 llvm::Value *Addr; 4465 llvm::Value *Size; 4466 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4467 LValue Base = 4468 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4469 KmpTaskAffinityInfoTy); 4470 // affs[i].base_addr = &<Affinities[i].second>; 4471 LValue BaseAddrLVal = CGF.EmitLValueForField( 4472 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4473 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4474 BaseAddrLVal); 4475 // affs[i].len = sizeof(<Affinities[i].second>); 4476 LValue LenLVal = CGF.EmitLValueForField( 4477 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4478 CGF.EmitStoreOfScalar(Size, LenLVal); 4479 ++Pos; 4480 } 4481 } 4482 LValue PosLVal; 4483 if (HasIterator) { 4484 PosLVal = CGF.MakeAddrLValue( 4485 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4486 C.getSizeType()); 4487 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4488 } 4489 // Process elements with iterators. 4490 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4491 const Expr *Modifier = C->getModifier(); 4492 if (!Modifier) 4493 continue; 4494 OMPIteratorGeneratorScope IteratorScope( 4495 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4496 for (const Expr *E : C->varlists()) { 4497 llvm::Value *Addr; 4498 llvm::Value *Size; 4499 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4500 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4501 LValue Base = CGF.MakeAddrLValue( 4502 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4503 // affs[i].base_addr = &<Affinities[i].second>; 4504 LValue BaseAddrLVal = CGF.EmitLValueForField( 4505 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4506 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4507 BaseAddrLVal); 4508 // affs[i].len = sizeof(<Affinities[i].second>); 4509 LValue LenLVal = CGF.EmitLValueForField( 4510 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4511 CGF.EmitStoreOfScalar(Size, LenLVal); 4512 Idx = CGF.Builder.CreateNUWAdd( 4513 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4514 CGF.EmitStoreOfScalar(Idx, PosLVal); 4515 } 4516 } 4517 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4518 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4519 // naffins, kmp_task_affinity_info_t *affin_list); 4520 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4521 llvm::Value *GTid = getThreadID(CGF, Loc); 4522 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4523 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4524 // FIXME: Emit the function and ignore its result for now unless the 4525 // runtime function is properly implemented. 4526 (void)CGF.EmitRuntimeCall( 4527 OMPBuilder.getOrCreateRuntimeFunction( 4528 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4529 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4530 } 4531 llvm::Value *NewTaskNewTaskTTy = 4532 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4533 NewTask, KmpTaskTWithPrivatesPtrTy); 4534 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4535 KmpTaskTWithPrivatesQTy); 4536 LValue TDBase = 4537 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4538 // Fill the data in the resulting kmp_task_t record. 4539 // Copy shareds if there are any. 4540 Address KmpTaskSharedsPtr = Address::invalid(); 4541 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4542 KmpTaskSharedsPtr = Address::deprecated( 4543 CGF.EmitLoadOfScalar( 4544 CGF.EmitLValueForField( 4545 TDBase, 4546 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4547 Loc), 4548 CGM.getNaturalTypeAlignment(SharedsTy)); 4549 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4550 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4551 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4552 } 4553 // Emit initial values for private copies (if any). 4554 TaskResultTy Result; 4555 if (!Privates.empty()) { 4556 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4557 SharedsTy, SharedsPtrTy, Data, Privates, 4558 /*ForDup=*/false); 4559 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4560 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4561 Result.TaskDupFn = emitTaskDupFunction( 4562 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4563 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4564 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4565 } 4566 } 4567 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4568 enum { Priority = 0, Destructors = 1 }; 4569 // Provide pointer to function with destructors for privates. 4570 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4571 const RecordDecl *KmpCmplrdataUD = 4572 (*FI)->getType()->getAsUnionType()->getDecl(); 4573 if (NeedsCleanup) { 4574 llvm::Value *DestructorFn = emitDestructorsFunction( 4575 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4576 KmpTaskTWithPrivatesQTy); 4577 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4578 LValue DestructorsLV = CGF.EmitLValueForField( 4579 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4580 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4581 DestructorFn, KmpRoutineEntryPtrTy), 4582 DestructorsLV); 4583 } 4584 // Set priority. 4585 if (Data.Priority.getInt()) { 4586 LValue Data2LV = CGF.EmitLValueForField( 4587 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4588 LValue PriorityLV = CGF.EmitLValueForField( 4589 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4590 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4591 } 4592 Result.NewTask = NewTask; 4593 Result.TaskEntry = TaskEntry; 4594 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4595 Result.TDBase = TDBase; 4596 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4597 return Result; 4598 } 4599 4600 namespace { 4601 /// Dependence kind for RTL. 4602 enum RTLDependenceKindTy { 4603 DepIn = 0x01, 4604 DepInOut = 0x3, 4605 DepMutexInOutSet = 0x4, 4606 DepInOutSet = 0x8 4607 }; 4608 /// Fields ids in kmp_depend_info record. 4609 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4610 } // namespace 4611 4612 /// Translates internal dependency kind into the runtime kind. 4613 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4614 RTLDependenceKindTy DepKind; 4615 switch (K) { 4616 case OMPC_DEPEND_in: 4617 DepKind = DepIn; 4618 break; 4619 // Out and InOut dependencies must use the same code. 4620 case OMPC_DEPEND_out: 4621 case OMPC_DEPEND_inout: 4622 DepKind = DepInOut; 4623 break; 4624 case OMPC_DEPEND_mutexinoutset: 4625 DepKind = DepMutexInOutSet; 4626 break; 4627 case OMPC_DEPEND_inoutset: 4628 DepKind = DepInOutSet; 4629 break; 4630 case OMPC_DEPEND_source: 4631 case OMPC_DEPEND_sink: 4632 case OMPC_DEPEND_depobj: 4633 case OMPC_DEPEND_unknown: 4634 llvm_unreachable("Unknown task dependence type"); 4635 } 4636 return DepKind; 4637 } 4638 4639 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4640 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4641 QualType &FlagsTy) { 4642 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4643 if (KmpDependInfoTy.isNull()) { 4644 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4645 KmpDependInfoRD->startDefinition(); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4647 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4648 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4649 KmpDependInfoRD->completeDefinition(); 4650 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4651 } 4652 } 4653 4654 std::pair<llvm::Value *, LValue> 4655 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4656 SourceLocation Loc) { 4657 ASTContext &C = CGM.getContext(); 4658 QualType FlagsTy; 4659 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4660 RecordDecl *KmpDependInfoRD = 4661 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4662 LValue Base = CGF.EmitLoadOfPointerLValue( 4663 DepobjLVal.getAddress(CGF), 4664 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4665 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4666 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4667 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4668 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4669 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4670 Base.getTBAAInfo()); 4671 Address DepObjAddr = CGF.Builder.CreateGEP( 4672 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4673 LValue NumDepsBase = CGF.MakeAddrLValue( 4674 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4675 // NumDeps = deps[i].base_addr; 4676 LValue BaseAddrLVal = CGF.EmitLValueForField( 4677 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4678 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4679 return std::make_pair(NumDeps, Base); 4680 } 4681 4682 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4683 llvm::PointerUnion<unsigned *, LValue *> Pos, 4684 const OMPTaskDataTy::DependData &Data, 4685 Address DependenciesArray) { 4686 CodeGenModule &CGM = CGF.CGM; 4687 ASTContext &C = CGM.getContext(); 4688 QualType FlagsTy; 4689 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4690 RecordDecl *KmpDependInfoRD = 4691 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4692 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4693 4694 OMPIteratorGeneratorScope IteratorScope( 4695 CGF, cast_or_null<OMPIteratorExpr>( 4696 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4697 : nullptr)); 4698 for (const Expr *E : Data.DepExprs) { 4699 llvm::Value *Addr; 4700 llvm::Value *Size; 4701 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4702 LValue Base; 4703 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4704 Base = CGF.MakeAddrLValue( 4705 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4706 } else { 4707 LValue &PosLVal = *Pos.get<LValue *>(); 4708 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4709 Base = CGF.MakeAddrLValue( 4710 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4711 } 4712 // deps[i].base_addr = &<Dependencies[i].second>; 4713 LValue BaseAddrLVal = CGF.EmitLValueForField( 4714 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4715 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4716 BaseAddrLVal); 4717 // deps[i].len = sizeof(<Dependencies[i].second>); 4718 LValue LenLVal = CGF.EmitLValueForField( 4719 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4720 CGF.EmitStoreOfScalar(Size, LenLVal); 4721 // deps[i].flags = <Dependencies[i].first>; 4722 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4723 LValue FlagsLVal = CGF.EmitLValueForField( 4724 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4725 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4726 FlagsLVal); 4727 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4728 ++(*P); 4729 } else { 4730 LValue &PosLVal = *Pos.get<LValue *>(); 4731 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4732 Idx = CGF.Builder.CreateNUWAdd(Idx, 4733 llvm::ConstantInt::get(Idx->getType(), 1)); 4734 CGF.EmitStoreOfScalar(Idx, PosLVal); 4735 } 4736 } 4737 } 4738 4739 static SmallVector<llvm::Value *, 4> 4740 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4741 const OMPTaskDataTy::DependData &Data) { 4742 assert(Data.DepKind == OMPC_DEPEND_depobj && 4743 "Expected depobj dependecy kind."); 4744 SmallVector<llvm::Value *, 4> Sizes; 4745 SmallVector<LValue, 4> SizeLVals; 4746 ASTContext &C = CGF.getContext(); 4747 QualType FlagsTy; 4748 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4749 RecordDecl *KmpDependInfoRD = 4750 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4751 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4752 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4753 { 4754 OMPIteratorGeneratorScope IteratorScope( 4755 CGF, cast_or_null<OMPIteratorExpr>( 4756 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4757 : nullptr)); 4758 for (const Expr *E : Data.DepExprs) { 4759 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4760 LValue Base = CGF.EmitLoadOfPointerLValue( 4761 DepobjLVal.getAddress(CGF), 4762 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4763 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4764 Base.getAddress(CGF), KmpDependInfoPtrT, 4765 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4766 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4767 Base.getTBAAInfo()); 4768 Address DepObjAddr = CGF.Builder.CreateGEP( 4769 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4770 LValue NumDepsBase = CGF.MakeAddrLValue( 4771 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4772 // NumDeps = deps[i].base_addr; 4773 LValue BaseAddrLVal = CGF.EmitLValueForField( 4774 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4775 llvm::Value *NumDeps = 4776 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4777 LValue NumLVal = CGF.MakeAddrLValue( 4778 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4779 C.getUIntPtrType()); 4780 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4781 NumLVal.getAddress(CGF)); 4782 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4783 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4784 CGF.EmitStoreOfScalar(Add, NumLVal); 4785 SizeLVals.push_back(NumLVal); 4786 } 4787 } 4788 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4789 llvm::Value *Size = 4790 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4791 Sizes.push_back(Size); 4792 } 4793 return Sizes; 4794 } 4795 4796 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4797 LValue PosLVal, 4798 const OMPTaskDataTy::DependData &Data, 4799 Address DependenciesArray) { 4800 assert(Data.DepKind == OMPC_DEPEND_depobj && 4801 "Expected depobj dependecy kind."); 4802 ASTContext &C = CGF.getContext(); 4803 QualType FlagsTy; 4804 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4805 RecordDecl *KmpDependInfoRD = 4806 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4807 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4808 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4809 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4810 { 4811 OMPIteratorGeneratorScope IteratorScope( 4812 CGF, cast_or_null<OMPIteratorExpr>( 4813 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4814 : nullptr)); 4815 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4816 const Expr *E = Data.DepExprs[I]; 4817 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4818 LValue Base = CGF.EmitLoadOfPointerLValue( 4819 DepobjLVal.getAddress(CGF), 4820 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4821 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4822 Base.getAddress(CGF), KmpDependInfoPtrT, 4823 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4824 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4825 Base.getTBAAInfo()); 4826 4827 // Get number of elements in a single depobj. 4828 Address DepObjAddr = CGF.Builder.CreateGEP( 4829 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4830 LValue NumDepsBase = CGF.MakeAddrLValue( 4831 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4832 // NumDeps = deps[i].base_addr; 4833 LValue BaseAddrLVal = CGF.EmitLValueForField( 4834 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4835 llvm::Value *NumDeps = 4836 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4837 4838 // memcopy dependency data. 4839 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4840 ElSize, 4841 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4842 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4843 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4844 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4845 4846 // Increase pos. 4847 // pos += size; 4848 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4849 CGF.EmitStoreOfScalar(Add, PosLVal); 4850 } 4851 } 4852 } 4853 4854 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4855 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4856 SourceLocation Loc) { 4857 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4858 return D.DepExprs.empty(); 4859 })) 4860 return std::make_pair(nullptr, Address::invalid()); 4861 // Process list of dependencies. 4862 ASTContext &C = CGM.getContext(); 4863 Address DependenciesArray = Address::invalid(); 4864 llvm::Value *NumOfElements = nullptr; 4865 unsigned NumDependencies = std::accumulate( 4866 Dependencies.begin(), Dependencies.end(), 0, 4867 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4868 return D.DepKind == OMPC_DEPEND_depobj 4869 ? V 4870 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4871 }); 4872 QualType FlagsTy; 4873 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4874 bool HasDepobjDeps = false; 4875 bool HasRegularWithIterators = false; 4876 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4877 llvm::Value *NumOfRegularWithIterators = 4878 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4879 // Calculate number of depobj dependecies and regular deps with the iterators. 4880 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4881 if (D.DepKind == OMPC_DEPEND_depobj) { 4882 SmallVector<llvm::Value *, 4> Sizes = 4883 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4884 for (llvm::Value *Size : Sizes) { 4885 NumOfDepobjElements = 4886 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4887 } 4888 HasDepobjDeps = true; 4889 continue; 4890 } 4891 // Include number of iterations, if any. 4892 4893 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4894 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4895 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4896 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4897 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4898 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4899 NumOfRegularWithIterators = 4900 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4901 } 4902 HasRegularWithIterators = true; 4903 continue; 4904 } 4905 } 4906 4907 QualType KmpDependInfoArrayTy; 4908 if (HasDepobjDeps || HasRegularWithIterators) { 4909 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4910 /*isSigned=*/false); 4911 if (HasDepobjDeps) { 4912 NumOfElements = 4913 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4914 } 4915 if (HasRegularWithIterators) { 4916 NumOfElements = 4917 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4918 } 4919 auto *OVE = new (C) OpaqueValueExpr( 4920 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4921 VK_PRValue); 4922 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4923 RValue::get(NumOfElements)); 4924 KmpDependInfoArrayTy = 4925 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4926 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4927 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4928 // Properly emit variable-sized array. 4929 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4930 ImplicitParamDecl::Other); 4931 CGF.EmitVarDecl(*PD); 4932 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4933 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4934 /*isSigned=*/false); 4935 } else { 4936 KmpDependInfoArrayTy = C.getConstantArrayType( 4937 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4938 ArrayType::Normal, /*IndexTypeQuals=*/0); 4939 DependenciesArray = 4940 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4941 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4942 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4943 /*isSigned=*/false); 4944 } 4945 unsigned Pos = 0; 4946 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4947 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4948 Dependencies[I].IteratorExpr) 4949 continue; 4950 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4951 DependenciesArray); 4952 } 4953 // Copy regular dependecies with iterators. 4954 LValue PosLVal = CGF.MakeAddrLValue( 4955 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4956 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4957 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4958 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4959 !Dependencies[I].IteratorExpr) 4960 continue; 4961 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4962 DependenciesArray); 4963 } 4964 // Copy final depobj arrays without iterators. 4965 if (HasDepobjDeps) { 4966 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4967 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4968 continue; 4969 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4970 DependenciesArray); 4971 } 4972 } 4973 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4974 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4975 return std::make_pair(NumOfElements, DependenciesArray); 4976 } 4977 4978 Address CGOpenMPRuntime::emitDepobjDependClause( 4979 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4980 SourceLocation Loc) { 4981 if (Dependencies.DepExprs.empty()) 4982 return Address::invalid(); 4983 // Process list of dependencies. 4984 ASTContext &C = CGM.getContext(); 4985 Address DependenciesArray = Address::invalid(); 4986 unsigned NumDependencies = Dependencies.DepExprs.size(); 4987 QualType FlagsTy; 4988 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4989 RecordDecl *KmpDependInfoRD = 4990 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4991 4992 llvm::Value *Size; 4993 // Define type kmp_depend_info[<Dependencies.size()>]; 4994 // For depobj reserve one extra element to store the number of elements. 4995 // It is required to handle depobj(x) update(in) construct. 4996 // kmp_depend_info[<Dependencies.size()>] deps; 4997 llvm::Value *NumDepsVal; 4998 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4999 if (const auto *IE = 5000 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5001 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5002 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5003 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5004 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5005 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5006 } 5007 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5008 NumDepsVal); 5009 CharUnits SizeInBytes = 5010 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5011 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5012 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5013 NumDepsVal = 5014 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5015 } else { 5016 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5017 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5018 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5019 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5020 Size = CGM.getSize(Sz.alignTo(Align)); 5021 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5022 } 5023 // Need to allocate on the dynamic memory. 5024 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5025 // Use default allocator. 5026 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5027 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5028 5029 llvm::Value *Addr = 5030 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5031 CGM.getModule(), OMPRTL___kmpc_alloc), 5032 Args, ".dep.arr.addr"); 5033 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5034 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5035 DependenciesArray = Address::deprecated(Addr, Align); 5036 // Write number of elements in the first element of array for depobj. 5037 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5038 // deps[i].base_addr = NumDependencies; 5039 LValue BaseAddrLVal = CGF.EmitLValueForField( 5040 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5041 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5042 llvm::PointerUnion<unsigned *, LValue *> Pos; 5043 unsigned Idx = 1; 5044 LValue PosLVal; 5045 if (Dependencies.IteratorExpr) { 5046 PosLVal = CGF.MakeAddrLValue( 5047 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5048 C.getSizeType()); 5049 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5050 /*IsInit=*/true); 5051 Pos = &PosLVal; 5052 } else { 5053 Pos = &Idx; 5054 } 5055 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5056 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5057 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 5058 CGF.Int8Ty); 5059 return DependenciesArray; 5060 } 5061 5062 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5063 SourceLocation Loc) { 5064 ASTContext &C = CGM.getContext(); 5065 QualType FlagsTy; 5066 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5067 LValue Base = CGF.EmitLoadOfPointerLValue( 5068 DepobjLVal.getAddress(CGF), 5069 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5070 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5071 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5072 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 5073 CGF.ConvertTypeForMem(KmpDependInfoTy)); 5074 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5075 Addr.getElementType(), Addr.getPointer(), 5076 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5077 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5078 CGF.VoidPtrTy); 5079 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5080 // Use default allocator. 5081 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5082 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5083 5084 // _kmpc_free(gtid, addr, nullptr); 5085 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5086 CGM.getModule(), OMPRTL___kmpc_free), 5087 Args); 5088 } 5089 5090 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5091 OpenMPDependClauseKind NewDepKind, 5092 SourceLocation Loc) { 5093 ASTContext &C = CGM.getContext(); 5094 QualType FlagsTy; 5095 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5096 RecordDecl *KmpDependInfoRD = 5097 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5098 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5099 llvm::Value *NumDeps; 5100 LValue Base; 5101 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5102 5103 Address Begin = Base.getAddress(CGF); 5104 // Cast from pointer to array type to pointer to single element. 5105 llvm::Value *End = CGF.Builder.CreateGEP( 5106 Begin.getElementType(), Begin.getPointer(), NumDeps); 5107 // The basic structure here is a while-do loop. 5108 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5109 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5110 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5111 CGF.EmitBlock(BodyBB); 5112 llvm::PHINode *ElementPHI = 5113 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5114 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5115 Begin = Begin.withPointer(ElementPHI); 5116 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5117 Base.getTBAAInfo()); 5118 // deps[i].flags = NewDepKind; 5119 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5120 LValue FlagsLVal = CGF.EmitLValueForField( 5121 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5122 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5123 FlagsLVal); 5124 5125 // Shift the address forward by one element. 5126 Address ElementNext = 5127 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5128 ElementPHI->addIncoming(ElementNext.getPointer(), 5129 CGF.Builder.GetInsertBlock()); 5130 llvm::Value *IsEmpty = 5131 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5132 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5133 // Done. 5134 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5135 } 5136 5137 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5138 const OMPExecutableDirective &D, 5139 llvm::Function *TaskFunction, 5140 QualType SharedsTy, Address Shareds, 5141 const Expr *IfCond, 5142 const OMPTaskDataTy &Data) { 5143 if (!CGF.HaveInsertPoint()) 5144 return; 5145 5146 TaskResultTy Result = 5147 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5148 llvm::Value *NewTask = Result.NewTask; 5149 llvm::Function *TaskEntry = Result.TaskEntry; 5150 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5151 LValue TDBase = Result.TDBase; 5152 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5153 // Process list of dependences. 5154 Address DependenciesArray = Address::invalid(); 5155 llvm::Value *NumOfElements; 5156 std::tie(NumOfElements, DependenciesArray) = 5157 emitDependClause(CGF, Data.Dependences, Loc); 5158 5159 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5160 // libcall. 5161 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5162 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5163 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5164 // list is not empty 5165 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5166 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5167 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5168 llvm::Value *DepTaskArgs[7]; 5169 if (!Data.Dependences.empty()) { 5170 DepTaskArgs[0] = UpLoc; 5171 DepTaskArgs[1] = ThreadID; 5172 DepTaskArgs[2] = NewTask; 5173 DepTaskArgs[3] = NumOfElements; 5174 DepTaskArgs[4] = DependenciesArray.getPointer(); 5175 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5176 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5177 } 5178 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5179 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5180 if (!Data.Tied) { 5181 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5182 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5183 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5184 } 5185 if (!Data.Dependences.empty()) { 5186 CGF.EmitRuntimeCall( 5187 OMPBuilder.getOrCreateRuntimeFunction( 5188 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5189 DepTaskArgs); 5190 } else { 5191 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5192 CGM.getModule(), OMPRTL___kmpc_omp_task), 5193 TaskArgs); 5194 } 5195 // Check if parent region is untied and build return for untied task; 5196 if (auto *Region = 5197 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5198 Region->emitUntiedSwitch(CGF); 5199 }; 5200 5201 llvm::Value *DepWaitTaskArgs[6]; 5202 if (!Data.Dependences.empty()) { 5203 DepWaitTaskArgs[0] = UpLoc; 5204 DepWaitTaskArgs[1] = ThreadID; 5205 DepWaitTaskArgs[2] = NumOfElements; 5206 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5207 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5208 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5209 } 5210 auto &M = CGM.getModule(); 5211 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5212 TaskEntry, &Data, &DepWaitTaskArgs, 5213 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5214 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5215 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5216 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5217 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5218 // is specified. 5219 if (!Data.Dependences.empty()) 5220 CGF.EmitRuntimeCall( 5221 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5222 DepWaitTaskArgs); 5223 // Call proxy_task_entry(gtid, new_task); 5224 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5225 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5226 Action.Enter(CGF); 5227 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5228 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5229 OutlinedFnArgs); 5230 }; 5231 5232 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5233 // kmp_task_t *new_task); 5234 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5235 // kmp_task_t *new_task); 5236 RegionCodeGenTy RCG(CodeGen); 5237 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5238 M, OMPRTL___kmpc_omp_task_begin_if0), 5239 TaskArgs, 5240 OMPBuilder.getOrCreateRuntimeFunction( 5241 M, OMPRTL___kmpc_omp_task_complete_if0), 5242 TaskArgs); 5243 RCG.setAction(Action); 5244 RCG(CGF); 5245 }; 5246 5247 if (IfCond) { 5248 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5249 } else { 5250 RegionCodeGenTy ThenRCG(ThenCodeGen); 5251 ThenRCG(CGF); 5252 } 5253 } 5254 5255 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5256 const OMPLoopDirective &D, 5257 llvm::Function *TaskFunction, 5258 QualType SharedsTy, Address Shareds, 5259 const Expr *IfCond, 5260 const OMPTaskDataTy &Data) { 5261 if (!CGF.HaveInsertPoint()) 5262 return; 5263 TaskResultTy Result = 5264 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5265 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5266 // libcall. 5267 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5268 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5269 // sched, kmp_uint64 grainsize, void *task_dup); 5270 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5271 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5272 llvm::Value *IfVal; 5273 if (IfCond) { 5274 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5275 /*isSigned=*/true); 5276 } else { 5277 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5278 } 5279 5280 LValue LBLVal = CGF.EmitLValueForField( 5281 Result.TDBase, 5282 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5283 const auto *LBVar = 5284 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5285 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5286 LBLVal.getQuals(), 5287 /*IsInitializer=*/true); 5288 LValue UBLVal = CGF.EmitLValueForField( 5289 Result.TDBase, 5290 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5291 const auto *UBVar = 5292 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5293 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5294 UBLVal.getQuals(), 5295 /*IsInitializer=*/true); 5296 LValue StLVal = CGF.EmitLValueForField( 5297 Result.TDBase, 5298 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5299 const auto *StVar = 5300 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5301 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5302 StLVal.getQuals(), 5303 /*IsInitializer=*/true); 5304 // Store reductions address. 5305 LValue RedLVal = CGF.EmitLValueForField( 5306 Result.TDBase, 5307 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5308 if (Data.Reductions) { 5309 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5310 } else { 5311 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5312 CGF.getContext().VoidPtrTy); 5313 } 5314 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5315 llvm::Value *TaskArgs[] = { 5316 UpLoc, 5317 ThreadID, 5318 Result.NewTask, 5319 IfVal, 5320 LBLVal.getPointer(CGF), 5321 UBLVal.getPointer(CGF), 5322 CGF.EmitLoadOfScalar(StLVal, Loc), 5323 llvm::ConstantInt::getSigned( 5324 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5325 llvm::ConstantInt::getSigned( 5326 CGF.IntTy, Data.Schedule.getPointer() 5327 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5328 : NoSchedule), 5329 Data.Schedule.getPointer() 5330 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5331 /*isSigned=*/false) 5332 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5333 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5334 Result.TaskDupFn, CGF.VoidPtrTy) 5335 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5336 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5337 CGM.getModule(), OMPRTL___kmpc_taskloop), 5338 TaskArgs); 5339 } 5340 5341 /// Emit reduction operation for each element of array (required for 5342 /// array sections) LHS op = RHS. 5343 /// \param Type Type of array. 5344 /// \param LHSVar Variable on the left side of the reduction operation 5345 /// (references element of array in original variable). 5346 /// \param RHSVar Variable on the right side of the reduction operation 5347 /// (references element of array in original variable). 5348 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5349 /// RHSVar. 5350 static void EmitOMPAggregateReduction( 5351 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5352 const VarDecl *RHSVar, 5353 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5354 const Expr *, const Expr *)> &RedOpGen, 5355 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5356 const Expr *UpExpr = nullptr) { 5357 // Perform element-by-element initialization. 5358 QualType ElementTy; 5359 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5360 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5361 5362 // Drill down to the base element type on both arrays. 5363 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5364 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5365 5366 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5367 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5368 // Cast from pointer to array type to pointer to single element. 5369 llvm::Value *LHSEnd = 5370 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5371 // The basic structure here is a while-do loop. 5372 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5373 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5374 llvm::Value *IsEmpty = 5375 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5376 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5377 5378 // Enter the loop body, making that address the current address. 5379 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5380 CGF.EmitBlock(BodyBB); 5381 5382 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5383 5384 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5385 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5386 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5387 Address RHSElementCurrent = Address::deprecated( 5388 RHSElementPHI, 5389 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5390 5391 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5392 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5393 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5394 Address LHSElementCurrent = Address::deprecated( 5395 LHSElementPHI, 5396 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5397 5398 // Emit copy. 5399 CodeGenFunction::OMPPrivateScope Scope(CGF); 5400 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5401 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5402 Scope.Privatize(); 5403 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5404 Scope.ForceCleanup(); 5405 5406 // Shift the address forward by one element. 5407 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5408 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5409 "omp.arraycpy.dest.element"); 5410 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5411 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5412 "omp.arraycpy.src.element"); 5413 // Check whether we've reached the end. 5414 llvm::Value *Done = 5415 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5416 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5417 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5418 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5419 5420 // Done. 5421 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5422 } 5423 5424 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5425 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5426 /// UDR combiner function. 5427 static void emitReductionCombiner(CodeGenFunction &CGF, 5428 const Expr *ReductionOp) { 5429 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5430 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5431 if (const auto *DRE = 5432 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5433 if (const auto *DRD = 5434 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5435 std::pair<llvm::Function *, llvm::Function *> Reduction = 5436 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5437 RValue Func = RValue::get(Reduction.first); 5438 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5439 CGF.EmitIgnoredExpr(ReductionOp); 5440 return; 5441 } 5442 CGF.EmitIgnoredExpr(ReductionOp); 5443 } 5444 5445 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5446 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5447 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5448 ArrayRef<const Expr *> ReductionOps) { 5449 ASTContext &C = CGM.getContext(); 5450 5451 // void reduction_func(void *LHSArg, void *RHSArg); 5452 FunctionArgList Args; 5453 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5454 ImplicitParamDecl::Other); 5455 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5456 ImplicitParamDecl::Other); 5457 Args.push_back(&LHSArg); 5458 Args.push_back(&RHSArg); 5459 const auto &CGFI = 5460 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5461 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5462 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5463 llvm::GlobalValue::InternalLinkage, Name, 5464 &CGM.getModule()); 5465 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5466 Fn->setDoesNotRecurse(); 5467 CodeGenFunction CGF(CGM); 5468 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5469 5470 // Dst = (void*[n])(LHSArg); 5471 // Src = (void*[n])(RHSArg); 5472 Address LHS = Address::deprecated( 5473 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5474 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 5475 CGF.getPointerAlign()); 5476 Address RHS = Address::deprecated( 5477 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5478 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 5479 CGF.getPointerAlign()); 5480 5481 // ... 5482 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5483 // ... 5484 CodeGenFunction::OMPPrivateScope Scope(CGF); 5485 const auto *IPriv = Privates.begin(); 5486 unsigned Idx = 0; 5487 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5488 const auto *RHSVar = 5489 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5490 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5491 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5492 }); 5493 const auto *LHSVar = 5494 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5495 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5496 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5497 }); 5498 QualType PrivTy = (*IPriv)->getType(); 5499 if (PrivTy->isVariablyModifiedType()) { 5500 // Get array size and emit VLA type. 5501 ++Idx; 5502 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5503 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5504 const VariableArrayType *VLA = 5505 CGF.getContext().getAsVariableArrayType(PrivTy); 5506 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5507 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5508 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5509 CGF.EmitVariablyModifiedType(PrivTy); 5510 } 5511 } 5512 Scope.Privatize(); 5513 IPriv = Privates.begin(); 5514 const auto *ILHS = LHSExprs.begin(); 5515 const auto *IRHS = RHSExprs.begin(); 5516 for (const Expr *E : ReductionOps) { 5517 if ((*IPriv)->getType()->isArrayType()) { 5518 // Emit reduction for array section. 5519 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5520 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5521 EmitOMPAggregateReduction( 5522 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5523 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5524 emitReductionCombiner(CGF, E); 5525 }); 5526 } else { 5527 // Emit reduction for array subscript or single variable. 5528 emitReductionCombiner(CGF, E); 5529 } 5530 ++IPriv; 5531 ++ILHS; 5532 ++IRHS; 5533 } 5534 Scope.ForceCleanup(); 5535 CGF.FinishFunction(); 5536 return Fn; 5537 } 5538 5539 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5540 const Expr *ReductionOp, 5541 const Expr *PrivateRef, 5542 const DeclRefExpr *LHS, 5543 const DeclRefExpr *RHS) { 5544 if (PrivateRef->getType()->isArrayType()) { 5545 // Emit reduction for array section. 5546 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5547 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5548 EmitOMPAggregateReduction( 5549 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5550 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5551 emitReductionCombiner(CGF, ReductionOp); 5552 }); 5553 } else { 5554 // Emit reduction for array subscript or single variable. 5555 emitReductionCombiner(CGF, ReductionOp); 5556 } 5557 } 5558 5559 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5560 ArrayRef<const Expr *> Privates, 5561 ArrayRef<const Expr *> LHSExprs, 5562 ArrayRef<const Expr *> RHSExprs, 5563 ArrayRef<const Expr *> ReductionOps, 5564 ReductionOptionsTy Options) { 5565 if (!CGF.HaveInsertPoint()) 5566 return; 5567 5568 bool WithNowait = Options.WithNowait; 5569 bool SimpleReduction = Options.SimpleReduction; 5570 5571 // Next code should be emitted for reduction: 5572 // 5573 // static kmp_critical_name lock = { 0 }; 5574 // 5575 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5576 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5577 // ... 5578 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5579 // *(Type<n>-1*)rhs[<n>-1]); 5580 // } 5581 // 5582 // ... 5583 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5584 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5585 // RedList, reduce_func, &<lock>)) { 5586 // case 1: 5587 // ... 5588 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5589 // ... 5590 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5591 // break; 5592 // case 2: 5593 // ... 5594 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5595 // ... 5596 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5597 // break; 5598 // default:; 5599 // } 5600 // 5601 // if SimpleReduction is true, only the next code is generated: 5602 // ... 5603 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5604 // ... 5605 5606 ASTContext &C = CGM.getContext(); 5607 5608 if (SimpleReduction) { 5609 CodeGenFunction::RunCleanupsScope Scope(CGF); 5610 const auto *IPriv = Privates.begin(); 5611 const auto *ILHS = LHSExprs.begin(); 5612 const auto *IRHS = RHSExprs.begin(); 5613 for (const Expr *E : ReductionOps) { 5614 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5615 cast<DeclRefExpr>(*IRHS)); 5616 ++IPriv; 5617 ++ILHS; 5618 ++IRHS; 5619 } 5620 return; 5621 } 5622 5623 // 1. Build a list of reduction variables. 5624 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5625 auto Size = RHSExprs.size(); 5626 for (const Expr *E : Privates) { 5627 if (E->getType()->isVariablyModifiedType()) 5628 // Reserve place for array size. 5629 ++Size; 5630 } 5631 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5632 QualType ReductionArrayTy = 5633 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5634 /*IndexTypeQuals=*/0); 5635 Address ReductionList = 5636 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5637 const auto *IPriv = Privates.begin(); 5638 unsigned Idx = 0; 5639 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5640 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5641 CGF.Builder.CreateStore( 5642 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5643 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5644 Elem); 5645 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5646 // Store array size. 5647 ++Idx; 5648 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5649 llvm::Value *Size = CGF.Builder.CreateIntCast( 5650 CGF.getVLASize( 5651 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5652 .NumElts, 5653 CGF.SizeTy, /*isSigned=*/false); 5654 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5655 Elem); 5656 } 5657 } 5658 5659 // 2. Emit reduce_func(). 5660 llvm::Function *ReductionFn = emitReductionFunction( 5661 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5662 LHSExprs, RHSExprs, ReductionOps); 5663 5664 // 3. Create static kmp_critical_name lock = { 0 }; 5665 std::string Name = getName({"reduction"}); 5666 llvm::Value *Lock = getCriticalRegionLock(Name); 5667 5668 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5669 // RedList, reduce_func, &<lock>); 5670 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5671 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5672 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5673 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5674 ReductionList.getPointer(), CGF.VoidPtrTy); 5675 llvm::Value *Args[] = { 5676 IdentTLoc, // ident_t *<loc> 5677 ThreadId, // i32 <gtid> 5678 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5679 ReductionArrayTySize, // size_type sizeof(RedList) 5680 RL, // void *RedList 5681 ReductionFn, // void (*) (void *, void *) <reduce_func> 5682 Lock // kmp_critical_name *&<lock> 5683 }; 5684 llvm::Value *Res = CGF.EmitRuntimeCall( 5685 OMPBuilder.getOrCreateRuntimeFunction( 5686 CGM.getModule(), 5687 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5688 Args); 5689 5690 // 5. Build switch(res) 5691 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5692 llvm::SwitchInst *SwInst = 5693 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5694 5695 // 6. Build case 1: 5696 // ... 5697 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5698 // ... 5699 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5700 // break; 5701 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5702 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5703 CGF.EmitBlock(Case1BB); 5704 5705 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5706 llvm::Value *EndArgs[] = { 5707 IdentTLoc, // ident_t *<loc> 5708 ThreadId, // i32 <gtid> 5709 Lock // kmp_critical_name *&<lock> 5710 }; 5711 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5712 CodeGenFunction &CGF, PrePostActionTy &Action) { 5713 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5714 const auto *IPriv = Privates.begin(); 5715 const auto *ILHS = LHSExprs.begin(); 5716 const auto *IRHS = RHSExprs.begin(); 5717 for (const Expr *E : ReductionOps) { 5718 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5719 cast<DeclRefExpr>(*IRHS)); 5720 ++IPriv; 5721 ++ILHS; 5722 ++IRHS; 5723 } 5724 }; 5725 RegionCodeGenTy RCG(CodeGen); 5726 CommonActionTy Action( 5727 nullptr, llvm::None, 5728 OMPBuilder.getOrCreateRuntimeFunction( 5729 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5730 : OMPRTL___kmpc_end_reduce), 5731 EndArgs); 5732 RCG.setAction(Action); 5733 RCG(CGF); 5734 5735 CGF.EmitBranch(DefaultBB); 5736 5737 // 7. Build case 2: 5738 // ... 5739 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5740 // ... 5741 // break; 5742 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5743 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5744 CGF.EmitBlock(Case2BB); 5745 5746 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5747 CodeGenFunction &CGF, PrePostActionTy &Action) { 5748 const auto *ILHS = LHSExprs.begin(); 5749 const auto *IRHS = RHSExprs.begin(); 5750 const auto *IPriv = Privates.begin(); 5751 for (const Expr *E : ReductionOps) { 5752 const Expr *XExpr = nullptr; 5753 const Expr *EExpr = nullptr; 5754 const Expr *UpExpr = nullptr; 5755 BinaryOperatorKind BO = BO_Comma; 5756 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5757 if (BO->getOpcode() == BO_Assign) { 5758 XExpr = BO->getLHS(); 5759 UpExpr = BO->getRHS(); 5760 } 5761 } 5762 // Try to emit update expression as a simple atomic. 5763 const Expr *RHSExpr = UpExpr; 5764 if (RHSExpr) { 5765 // Analyze RHS part of the whole expression. 5766 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5767 RHSExpr->IgnoreParenImpCasts())) { 5768 // If this is a conditional operator, analyze its condition for 5769 // min/max reduction operator. 5770 RHSExpr = ACO->getCond(); 5771 } 5772 if (const auto *BORHS = 5773 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5774 EExpr = BORHS->getRHS(); 5775 BO = BORHS->getOpcode(); 5776 } 5777 } 5778 if (XExpr) { 5779 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5780 auto &&AtomicRedGen = [BO, VD, 5781 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5782 const Expr *EExpr, const Expr *UpExpr) { 5783 LValue X = CGF.EmitLValue(XExpr); 5784 RValue E; 5785 if (EExpr) 5786 E = CGF.EmitAnyExpr(EExpr); 5787 CGF.EmitOMPAtomicSimpleUpdateExpr( 5788 X, E, BO, /*IsXLHSInRHSPart=*/true, 5789 llvm::AtomicOrdering::Monotonic, Loc, 5790 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5791 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5792 PrivateScope.addPrivate( 5793 VD, [&CGF, VD, XRValue, Loc]() { 5794 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5795 CGF.emitOMPSimpleStore( 5796 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5797 VD->getType().getNonReferenceType(), Loc); 5798 return LHSTemp; 5799 }); 5800 (void)PrivateScope.Privatize(); 5801 return CGF.EmitAnyExpr(UpExpr); 5802 }); 5803 }; 5804 if ((*IPriv)->getType()->isArrayType()) { 5805 // Emit atomic reduction for array section. 5806 const auto *RHSVar = 5807 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5808 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5809 AtomicRedGen, XExpr, EExpr, UpExpr); 5810 } else { 5811 // Emit atomic reduction for array subscript or single variable. 5812 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5813 } 5814 } else { 5815 // Emit as a critical region. 5816 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5817 const Expr *, const Expr *) { 5818 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5819 std::string Name = RT.getName({"atomic_reduction"}); 5820 RT.emitCriticalRegion( 5821 CGF, Name, 5822 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5823 Action.Enter(CGF); 5824 emitReductionCombiner(CGF, E); 5825 }, 5826 Loc); 5827 }; 5828 if ((*IPriv)->getType()->isArrayType()) { 5829 const auto *LHSVar = 5830 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5831 const auto *RHSVar = 5832 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5833 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5834 CritRedGen); 5835 } else { 5836 CritRedGen(CGF, nullptr, nullptr, nullptr); 5837 } 5838 } 5839 ++ILHS; 5840 ++IRHS; 5841 ++IPriv; 5842 } 5843 }; 5844 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5845 if (!WithNowait) { 5846 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5847 llvm::Value *EndArgs[] = { 5848 IdentTLoc, // ident_t *<loc> 5849 ThreadId, // i32 <gtid> 5850 Lock // kmp_critical_name *&<lock> 5851 }; 5852 CommonActionTy Action(nullptr, llvm::None, 5853 OMPBuilder.getOrCreateRuntimeFunction( 5854 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5855 EndArgs); 5856 AtomicRCG.setAction(Action); 5857 AtomicRCG(CGF); 5858 } else { 5859 AtomicRCG(CGF); 5860 } 5861 5862 CGF.EmitBranch(DefaultBB); 5863 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5864 } 5865 5866 /// Generates unique name for artificial threadprivate variables. 5867 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5868 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5869 const Expr *Ref) { 5870 SmallString<256> Buffer; 5871 llvm::raw_svector_ostream Out(Buffer); 5872 const clang::DeclRefExpr *DE; 5873 const VarDecl *D = ::getBaseDecl(Ref, DE); 5874 if (!D) 5875 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5876 D = D->getCanonicalDecl(); 5877 std::string Name = CGM.getOpenMPRuntime().getName( 5878 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5879 Out << Prefix << Name << "_" 5880 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5881 return std::string(Out.str()); 5882 } 5883 5884 /// Emits reduction initializer function: 5885 /// \code 5886 /// void @.red_init(void* %arg, void* %orig) { 5887 /// %0 = bitcast void* %arg to <type>* 5888 /// store <type> <init>, <type>* %0 5889 /// ret void 5890 /// } 5891 /// \endcode 5892 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5893 SourceLocation Loc, 5894 ReductionCodeGen &RCG, unsigned N) { 5895 ASTContext &C = CGM.getContext(); 5896 QualType VoidPtrTy = C.VoidPtrTy; 5897 VoidPtrTy.addRestrict(); 5898 FunctionArgList Args; 5899 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5900 ImplicitParamDecl::Other); 5901 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5902 ImplicitParamDecl::Other); 5903 Args.emplace_back(&Param); 5904 Args.emplace_back(&ParamOrig); 5905 const auto &FnInfo = 5906 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5907 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5908 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5909 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5910 Name, &CGM.getModule()); 5911 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5912 Fn->setDoesNotRecurse(); 5913 CodeGenFunction CGF(CGM); 5914 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5915 Address PrivateAddr = CGF.EmitLoadOfPointer( 5916 CGF.GetAddrOfLocalVar(&Param), 5917 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5918 llvm::Value *Size = nullptr; 5919 // If the size of the reduction item is non-constant, load it from global 5920 // threadprivate variable. 5921 if (RCG.getSizes(N).second) { 5922 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5923 CGF, CGM.getContext().getSizeType(), 5924 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5925 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5926 CGM.getContext().getSizeType(), Loc); 5927 } 5928 RCG.emitAggregateType(CGF, N, Size); 5929 Address OrigAddr = Address::invalid(); 5930 // If initializer uses initializer from declare reduction construct, emit a 5931 // pointer to the address of the original reduction item (reuired by reduction 5932 // initializer) 5933 if (RCG.usesReductionInitializer(N)) { 5934 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5935 OrigAddr = CGF.EmitLoadOfPointer( 5936 SharedAddr, 5937 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5938 } 5939 // Emit the initializer: 5940 // %0 = bitcast void* %arg to <type>* 5941 // store <type> <init>, <type>* %0 5942 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5943 [](CodeGenFunction &) { return false; }); 5944 CGF.FinishFunction(); 5945 return Fn; 5946 } 5947 5948 /// Emits reduction combiner function: 5949 /// \code 5950 /// void @.red_comb(void* %arg0, void* %arg1) { 5951 /// %lhs = bitcast void* %arg0 to <type>* 5952 /// %rhs = bitcast void* %arg1 to <type>* 5953 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5954 /// store <type> %2, <type>* %lhs 5955 /// ret void 5956 /// } 5957 /// \endcode 5958 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5959 SourceLocation Loc, 5960 ReductionCodeGen &RCG, unsigned N, 5961 const Expr *ReductionOp, 5962 const Expr *LHS, const Expr *RHS, 5963 const Expr *PrivateRef) { 5964 ASTContext &C = CGM.getContext(); 5965 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5966 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5967 FunctionArgList Args; 5968 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5969 C.VoidPtrTy, ImplicitParamDecl::Other); 5970 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5971 ImplicitParamDecl::Other); 5972 Args.emplace_back(&ParamInOut); 5973 Args.emplace_back(&ParamIn); 5974 const auto &FnInfo = 5975 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5976 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5977 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5978 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5979 Name, &CGM.getModule()); 5980 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5981 Fn->setDoesNotRecurse(); 5982 CodeGenFunction CGF(CGM); 5983 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5984 llvm::Value *Size = nullptr; 5985 // If the size of the reduction item is non-constant, load it from global 5986 // threadprivate variable. 5987 if (RCG.getSizes(N).second) { 5988 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5989 CGF, CGM.getContext().getSizeType(), 5990 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5991 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5992 CGM.getContext().getSizeType(), Loc); 5993 } 5994 RCG.emitAggregateType(CGF, N, Size); 5995 // Remap lhs and rhs variables to the addresses of the function arguments. 5996 // %lhs = bitcast void* %arg0 to <type>* 5997 // %rhs = bitcast void* %arg1 to <type>* 5998 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5999 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6000 // Pull out the pointer to the variable. 6001 Address PtrAddr = CGF.EmitLoadOfPointer( 6002 CGF.GetAddrOfLocalVar(&ParamInOut), 6003 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6004 return CGF.Builder.CreateElementBitCast( 6005 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6006 }); 6007 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6008 // Pull out the pointer to the variable. 6009 Address PtrAddr = CGF.EmitLoadOfPointer( 6010 CGF.GetAddrOfLocalVar(&ParamIn), 6011 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6012 return CGF.Builder.CreateElementBitCast( 6013 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6014 }); 6015 PrivateScope.Privatize(); 6016 // Emit the combiner body: 6017 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6018 // store <type> %2, <type>* %lhs 6019 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6020 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6021 cast<DeclRefExpr>(RHS)); 6022 CGF.FinishFunction(); 6023 return Fn; 6024 } 6025 6026 /// Emits reduction finalizer function: 6027 /// \code 6028 /// void @.red_fini(void* %arg) { 6029 /// %0 = bitcast void* %arg to <type>* 6030 /// <destroy>(<type>* %0) 6031 /// ret void 6032 /// } 6033 /// \endcode 6034 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6035 SourceLocation Loc, 6036 ReductionCodeGen &RCG, unsigned N) { 6037 if (!RCG.needCleanups(N)) 6038 return nullptr; 6039 ASTContext &C = CGM.getContext(); 6040 FunctionArgList Args; 6041 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6042 ImplicitParamDecl::Other); 6043 Args.emplace_back(&Param); 6044 const auto &FnInfo = 6045 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6046 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6047 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6048 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6049 Name, &CGM.getModule()); 6050 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6051 Fn->setDoesNotRecurse(); 6052 CodeGenFunction CGF(CGM); 6053 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6054 Address PrivateAddr = CGF.EmitLoadOfPointer( 6055 CGF.GetAddrOfLocalVar(&Param), 6056 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6057 llvm::Value *Size = nullptr; 6058 // If the size of the reduction item is non-constant, load it from global 6059 // threadprivate variable. 6060 if (RCG.getSizes(N).second) { 6061 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6062 CGF, CGM.getContext().getSizeType(), 6063 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6064 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6065 CGM.getContext().getSizeType(), Loc); 6066 } 6067 RCG.emitAggregateType(CGF, N, Size); 6068 // Emit the finalizer body: 6069 // <destroy>(<type>* %0) 6070 RCG.emitCleanups(CGF, N, PrivateAddr); 6071 CGF.FinishFunction(Loc); 6072 return Fn; 6073 } 6074 6075 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6076 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6077 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6078 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6079 return nullptr; 6080 6081 // Build typedef struct: 6082 // kmp_taskred_input { 6083 // void *reduce_shar; // shared reduction item 6084 // void *reduce_orig; // original reduction item used for initialization 6085 // size_t reduce_size; // size of data item 6086 // void *reduce_init; // data initialization routine 6087 // void *reduce_fini; // data finalization routine 6088 // void *reduce_comb; // data combiner routine 6089 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6090 // } kmp_taskred_input_t; 6091 ASTContext &C = CGM.getContext(); 6092 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6093 RD->startDefinition(); 6094 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6095 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6096 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6097 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6098 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6099 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6100 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6101 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6102 RD->completeDefinition(); 6103 QualType RDType = C.getRecordType(RD); 6104 unsigned Size = Data.ReductionVars.size(); 6105 llvm::APInt ArraySize(/*numBits=*/64, Size); 6106 QualType ArrayRDType = C.getConstantArrayType( 6107 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6108 // kmp_task_red_input_t .rd_input.[Size]; 6109 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6110 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6111 Data.ReductionCopies, Data.ReductionOps); 6112 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6113 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6114 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6115 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6116 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6117 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6118 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6119 ".rd_input.gep."); 6120 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6121 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6122 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6123 RCG.emitSharedOrigLValue(CGF, Cnt); 6124 llvm::Value *CastedShared = 6125 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6126 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6127 // ElemLVal.reduce_orig = &Origs[Cnt]; 6128 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6129 llvm::Value *CastedOrig = 6130 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6131 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6132 RCG.emitAggregateType(CGF, Cnt); 6133 llvm::Value *SizeValInChars; 6134 llvm::Value *SizeVal; 6135 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6136 // We use delayed creation/initialization for VLAs and array sections. It is 6137 // required because runtime does not provide the way to pass the sizes of 6138 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6139 // threadprivate global variables are used to store these values and use 6140 // them in the functions. 6141 bool DelayedCreation = !!SizeVal; 6142 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6143 /*isSigned=*/false); 6144 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6145 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6146 // ElemLVal.reduce_init = init; 6147 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6148 llvm::Value *InitAddr = 6149 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6150 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6151 // ElemLVal.reduce_fini = fini; 6152 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6153 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6154 llvm::Value *FiniAddr = Fini 6155 ? CGF.EmitCastToVoidPtr(Fini) 6156 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6157 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6158 // ElemLVal.reduce_comb = comb; 6159 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6160 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6161 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6162 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6163 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6164 // ElemLVal.flags = 0; 6165 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6166 if (DelayedCreation) { 6167 CGF.EmitStoreOfScalar( 6168 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6169 FlagsLVal); 6170 } else 6171 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6172 FlagsLVal.getType()); 6173 } 6174 if (Data.IsReductionWithTaskMod) { 6175 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6176 // is_ws, int num, void *data); 6177 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6178 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6179 CGM.IntTy, /*isSigned=*/true); 6180 llvm::Value *Args[] = { 6181 IdentTLoc, GTid, 6182 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6183 /*isSigned=*/true), 6184 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6185 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6186 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6187 return CGF.EmitRuntimeCall( 6188 OMPBuilder.getOrCreateRuntimeFunction( 6189 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6190 Args); 6191 } 6192 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6193 llvm::Value *Args[] = { 6194 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6195 /*isSigned=*/true), 6196 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6197 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6198 CGM.VoidPtrTy)}; 6199 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6200 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6201 Args); 6202 } 6203 6204 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6205 SourceLocation Loc, 6206 bool IsWorksharingReduction) { 6207 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6208 // is_ws, int num, void *data); 6209 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6210 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6211 CGM.IntTy, /*isSigned=*/true); 6212 llvm::Value *Args[] = {IdentTLoc, GTid, 6213 llvm::ConstantInt::get(CGM.IntTy, 6214 IsWorksharingReduction ? 1 : 0, 6215 /*isSigned=*/true)}; 6216 (void)CGF.EmitRuntimeCall( 6217 OMPBuilder.getOrCreateRuntimeFunction( 6218 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6219 Args); 6220 } 6221 6222 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6223 SourceLocation Loc, 6224 ReductionCodeGen &RCG, 6225 unsigned N) { 6226 auto Sizes = RCG.getSizes(N); 6227 // Emit threadprivate global variable if the type is non-constant 6228 // (Sizes.second = nullptr). 6229 if (Sizes.second) { 6230 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6231 /*isSigned=*/false); 6232 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6233 CGF, CGM.getContext().getSizeType(), 6234 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6235 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6236 } 6237 } 6238 6239 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6240 SourceLocation Loc, 6241 llvm::Value *ReductionsPtr, 6242 LValue SharedLVal) { 6243 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6244 // *d); 6245 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6246 CGM.IntTy, 6247 /*isSigned=*/true), 6248 ReductionsPtr, 6249 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6250 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6251 return Address::deprecated( 6252 CGF.EmitRuntimeCall( 6253 OMPBuilder.getOrCreateRuntimeFunction( 6254 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6255 Args), 6256 SharedLVal.getAlignment()); 6257 } 6258 6259 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6260 const OMPTaskDataTy &Data) { 6261 if (!CGF.HaveInsertPoint()) 6262 return; 6263 6264 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6265 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6266 OMPBuilder.createTaskwait(CGF.Builder); 6267 } else { 6268 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6269 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6270 auto &M = CGM.getModule(); 6271 Address DependenciesArray = Address::invalid(); 6272 llvm::Value *NumOfElements; 6273 std::tie(NumOfElements, DependenciesArray) = 6274 emitDependClause(CGF, Data.Dependences, Loc); 6275 llvm::Value *DepWaitTaskArgs[6]; 6276 if (!Data.Dependences.empty()) { 6277 DepWaitTaskArgs[0] = UpLoc; 6278 DepWaitTaskArgs[1] = ThreadID; 6279 DepWaitTaskArgs[2] = NumOfElements; 6280 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6281 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6282 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6283 6284 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6285 6286 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6287 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6288 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6289 // is specified. 6290 CGF.EmitRuntimeCall( 6291 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6292 DepWaitTaskArgs); 6293 6294 } else { 6295 6296 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6297 // global_tid); 6298 llvm::Value *Args[] = {UpLoc, ThreadID}; 6299 // Ignore return result until untied tasks are supported. 6300 CGF.EmitRuntimeCall( 6301 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6302 Args); 6303 } 6304 } 6305 6306 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6307 Region->emitUntiedSwitch(CGF); 6308 } 6309 6310 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6311 OpenMPDirectiveKind InnerKind, 6312 const RegionCodeGenTy &CodeGen, 6313 bool HasCancel) { 6314 if (!CGF.HaveInsertPoint()) 6315 return; 6316 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6317 InnerKind != OMPD_critical && 6318 InnerKind != OMPD_master && 6319 InnerKind != OMPD_masked); 6320 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6321 } 6322 6323 namespace { 6324 enum RTCancelKind { 6325 CancelNoreq = 0, 6326 CancelParallel = 1, 6327 CancelLoop = 2, 6328 CancelSections = 3, 6329 CancelTaskgroup = 4 6330 }; 6331 } // anonymous namespace 6332 6333 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6334 RTCancelKind CancelKind = CancelNoreq; 6335 if (CancelRegion == OMPD_parallel) 6336 CancelKind = CancelParallel; 6337 else if (CancelRegion == OMPD_for) 6338 CancelKind = CancelLoop; 6339 else if (CancelRegion == OMPD_sections) 6340 CancelKind = CancelSections; 6341 else { 6342 assert(CancelRegion == OMPD_taskgroup); 6343 CancelKind = CancelTaskgroup; 6344 } 6345 return CancelKind; 6346 } 6347 6348 void CGOpenMPRuntime::emitCancellationPointCall( 6349 CodeGenFunction &CGF, SourceLocation Loc, 6350 OpenMPDirectiveKind CancelRegion) { 6351 if (!CGF.HaveInsertPoint()) 6352 return; 6353 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6354 // global_tid, kmp_int32 cncl_kind); 6355 if (auto *OMPRegionInfo = 6356 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6357 // For 'cancellation point taskgroup', the task region info may not have a 6358 // cancel. This may instead happen in another adjacent task. 6359 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6360 llvm::Value *Args[] = { 6361 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6362 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6363 // Ignore return result until untied tasks are supported. 6364 llvm::Value *Result = CGF.EmitRuntimeCall( 6365 OMPBuilder.getOrCreateRuntimeFunction( 6366 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6367 Args); 6368 // if (__kmpc_cancellationpoint()) { 6369 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6370 // exit from construct; 6371 // } 6372 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6373 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6374 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6375 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6376 CGF.EmitBlock(ExitBB); 6377 if (CancelRegion == OMPD_parallel) 6378 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6379 // exit from construct; 6380 CodeGenFunction::JumpDest CancelDest = 6381 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6382 CGF.EmitBranchThroughCleanup(CancelDest); 6383 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6384 } 6385 } 6386 } 6387 6388 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6389 const Expr *IfCond, 6390 OpenMPDirectiveKind CancelRegion) { 6391 if (!CGF.HaveInsertPoint()) 6392 return; 6393 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6394 // kmp_int32 cncl_kind); 6395 auto &M = CGM.getModule(); 6396 if (auto *OMPRegionInfo = 6397 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6398 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6399 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6400 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6401 llvm::Value *Args[] = { 6402 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6403 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6404 // Ignore return result until untied tasks are supported. 6405 llvm::Value *Result = CGF.EmitRuntimeCall( 6406 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6407 // if (__kmpc_cancel()) { 6408 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6409 // exit from construct; 6410 // } 6411 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6412 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6413 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6414 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6415 CGF.EmitBlock(ExitBB); 6416 if (CancelRegion == OMPD_parallel) 6417 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6418 // exit from construct; 6419 CodeGenFunction::JumpDest CancelDest = 6420 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6421 CGF.EmitBranchThroughCleanup(CancelDest); 6422 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6423 }; 6424 if (IfCond) { 6425 emitIfClause(CGF, IfCond, ThenGen, 6426 [](CodeGenFunction &, PrePostActionTy &) {}); 6427 } else { 6428 RegionCodeGenTy ThenRCG(ThenGen); 6429 ThenRCG(CGF); 6430 } 6431 } 6432 } 6433 6434 namespace { 6435 /// Cleanup action for uses_allocators support. 6436 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6437 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6438 6439 public: 6440 OMPUsesAllocatorsActionTy( 6441 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6442 : Allocators(Allocators) {} 6443 void Enter(CodeGenFunction &CGF) override { 6444 if (!CGF.HaveInsertPoint()) 6445 return; 6446 for (const auto &AllocatorData : Allocators) { 6447 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6448 CGF, AllocatorData.first, AllocatorData.second); 6449 } 6450 } 6451 void Exit(CodeGenFunction &CGF) override { 6452 if (!CGF.HaveInsertPoint()) 6453 return; 6454 for (const auto &AllocatorData : Allocators) { 6455 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6456 AllocatorData.first); 6457 } 6458 } 6459 }; 6460 } // namespace 6461 6462 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6463 const OMPExecutableDirective &D, StringRef ParentName, 6464 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6465 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6466 assert(!ParentName.empty() && "Invalid target region parent name!"); 6467 HasEmittedTargetRegion = true; 6468 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6469 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6470 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6471 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6472 if (!D.AllocatorTraits) 6473 continue; 6474 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6475 } 6476 } 6477 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6478 CodeGen.setAction(UsesAllocatorAction); 6479 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6480 IsOffloadEntry, CodeGen); 6481 } 6482 6483 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6484 const Expr *Allocator, 6485 const Expr *AllocatorTraits) { 6486 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6487 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6488 // Use default memspace handle. 6489 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6490 llvm::Value *NumTraits = llvm::ConstantInt::get( 6491 CGF.IntTy, cast<ConstantArrayType>( 6492 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6493 ->getSize() 6494 .getLimitedValue()); 6495 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6496 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6497 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6498 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6499 AllocatorTraitsLVal.getBaseInfo(), 6500 AllocatorTraitsLVal.getTBAAInfo()); 6501 llvm::Value *Traits = 6502 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6503 6504 llvm::Value *AllocatorVal = 6505 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6506 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6507 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6508 // Store to allocator. 6509 CGF.EmitVarDecl(*cast<VarDecl>( 6510 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6511 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6512 AllocatorVal = 6513 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6514 Allocator->getType(), Allocator->getExprLoc()); 6515 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6516 } 6517 6518 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6519 const Expr *Allocator) { 6520 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6521 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6522 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6523 llvm::Value *AllocatorVal = 6524 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6525 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6526 CGF.getContext().VoidPtrTy, 6527 Allocator->getExprLoc()); 6528 (void)CGF.EmitRuntimeCall( 6529 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6530 OMPRTL___kmpc_destroy_allocator), 6531 {ThreadId, AllocatorVal}); 6532 } 6533 6534 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6535 const OMPExecutableDirective &D, StringRef ParentName, 6536 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6537 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6538 // Create a unique name for the entry function using the source location 6539 // information of the current target region. The name will be something like: 6540 // 6541 // __omp_offloading_DD_FFFF_PP_lBB 6542 // 6543 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6544 // mangled name of the function that encloses the target region and BB is the 6545 // line number of the target region. 6546 6547 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6548 !CGM.getLangOpts().OpenMPOffloadMandatory; 6549 unsigned DeviceID; 6550 unsigned FileID; 6551 unsigned Line; 6552 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6553 Line); 6554 SmallString<64> EntryFnName; 6555 { 6556 llvm::raw_svector_ostream OS(EntryFnName); 6557 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6558 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6559 } 6560 6561 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6562 6563 CodeGenFunction CGF(CGM, true); 6564 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6565 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6566 6567 if (BuildOutlinedFn) 6568 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6569 6570 // If this target outline function is not an offload entry, we don't need to 6571 // register it. 6572 if (!IsOffloadEntry) 6573 return; 6574 6575 // The target region ID is used by the runtime library to identify the current 6576 // target region, so it only has to be unique and not necessarily point to 6577 // anything. It could be the pointer to the outlined function that implements 6578 // the target region, but we aren't using that so that the compiler doesn't 6579 // need to keep that, and could therefore inline the host function if proven 6580 // worthwhile during optimization. In the other hand, if emitting code for the 6581 // device, the ID has to be the function address so that it can retrieved from 6582 // the offloading entry and launched by the runtime library. We also mark the 6583 // outlined function to have external linkage in case we are emitting code for 6584 // the device, because these functions will be entry points to the device. 6585 6586 if (CGM.getLangOpts().OpenMPIsDevice) { 6587 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6588 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6589 OutlinedFn->setDSOLocal(false); 6590 if (CGM.getTriple().isAMDGCN()) 6591 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6592 } else { 6593 std::string Name = getName({EntryFnName, "region_id"}); 6594 OutlinedFnID = new llvm::GlobalVariable( 6595 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6596 llvm::GlobalValue::WeakAnyLinkage, 6597 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6598 } 6599 6600 // If we do not allow host fallback we still need a named address to use. 6601 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6602 if (!BuildOutlinedFn) { 6603 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6604 "Named kernel already exists?"); 6605 TargetRegionEntryAddr = new llvm::GlobalVariable( 6606 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6607 llvm::GlobalValue::InternalLinkage, 6608 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6609 } 6610 6611 // Register the information for the entry associated with this target region. 6612 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6613 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6614 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6615 6616 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6617 int32_t DefaultValTeams = -1; 6618 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6619 if (DefaultValTeams > 0 && OutlinedFn) { 6620 OutlinedFn->addFnAttr("omp_target_num_teams", 6621 std::to_string(DefaultValTeams)); 6622 } 6623 int32_t DefaultValThreads = -1; 6624 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6625 if (DefaultValThreads > 0 && OutlinedFn) { 6626 OutlinedFn->addFnAttr("omp_target_thread_limit", 6627 std::to_string(DefaultValThreads)); 6628 } 6629 6630 if (BuildOutlinedFn) 6631 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6632 } 6633 6634 /// Checks if the expression is constant or does not have non-trivial function 6635 /// calls. 6636 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6637 // We can skip constant expressions. 6638 // We can skip expressions with trivial calls or simple expressions. 6639 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6640 !E->hasNonTrivialCall(Ctx)) && 6641 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6642 } 6643 6644 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6645 const Stmt *Body) { 6646 const Stmt *Child = Body->IgnoreContainers(); 6647 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6648 Child = nullptr; 6649 for (const Stmt *S : C->body()) { 6650 if (const auto *E = dyn_cast<Expr>(S)) { 6651 if (isTrivial(Ctx, E)) 6652 continue; 6653 } 6654 // Some of the statements can be ignored. 6655 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6656 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6657 continue; 6658 // Analyze declarations. 6659 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6660 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6661 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6662 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6663 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6664 isa<UsingDirectiveDecl>(D) || 6665 isa<OMPDeclareReductionDecl>(D) || 6666 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6667 return true; 6668 const auto *VD = dyn_cast<VarDecl>(D); 6669 if (!VD) 6670 return false; 6671 return VD->hasGlobalStorage() || !VD->isUsed(); 6672 })) 6673 continue; 6674 } 6675 // Found multiple children - cannot get the one child only. 6676 if (Child) 6677 return nullptr; 6678 Child = S; 6679 } 6680 if (Child) 6681 Child = Child->IgnoreContainers(); 6682 } 6683 return Child; 6684 } 6685 6686 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6687 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6688 int32_t &DefaultVal) { 6689 6690 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6691 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6692 "Expected target-based executable directive."); 6693 switch (DirectiveKind) { 6694 case OMPD_target: { 6695 const auto *CS = D.getInnermostCapturedStmt(); 6696 const auto *Body = 6697 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6698 const Stmt *ChildStmt = 6699 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6700 if (const auto *NestedDir = 6701 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6702 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6703 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6704 const Expr *NumTeams = 6705 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6706 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6707 if (auto Constant = 6708 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6709 DefaultVal = Constant->getExtValue(); 6710 return NumTeams; 6711 } 6712 DefaultVal = 0; 6713 return nullptr; 6714 } 6715 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6716 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6717 DefaultVal = 1; 6718 return nullptr; 6719 } 6720 DefaultVal = 1; 6721 return nullptr; 6722 } 6723 // A value of -1 is used to check if we need to emit no teams region 6724 DefaultVal = -1; 6725 return nullptr; 6726 } 6727 case OMPD_target_teams: 6728 case OMPD_target_teams_distribute: 6729 case OMPD_target_teams_distribute_simd: 6730 case OMPD_target_teams_distribute_parallel_for: 6731 case OMPD_target_teams_distribute_parallel_for_simd: { 6732 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6733 const Expr *NumTeams = 6734 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6735 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6736 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6737 DefaultVal = Constant->getExtValue(); 6738 return NumTeams; 6739 } 6740 DefaultVal = 0; 6741 return nullptr; 6742 } 6743 case OMPD_target_parallel: 6744 case OMPD_target_parallel_for: 6745 case OMPD_target_parallel_for_simd: 6746 case OMPD_target_simd: 6747 DefaultVal = 1; 6748 return nullptr; 6749 case OMPD_parallel: 6750 case OMPD_for: 6751 case OMPD_parallel_for: 6752 case OMPD_parallel_master: 6753 case OMPD_parallel_sections: 6754 case OMPD_for_simd: 6755 case OMPD_parallel_for_simd: 6756 case OMPD_cancel: 6757 case OMPD_cancellation_point: 6758 case OMPD_ordered: 6759 case OMPD_threadprivate: 6760 case OMPD_allocate: 6761 case OMPD_task: 6762 case OMPD_simd: 6763 case OMPD_tile: 6764 case OMPD_unroll: 6765 case OMPD_sections: 6766 case OMPD_section: 6767 case OMPD_single: 6768 case OMPD_master: 6769 case OMPD_critical: 6770 case OMPD_taskyield: 6771 case OMPD_barrier: 6772 case OMPD_taskwait: 6773 case OMPD_taskgroup: 6774 case OMPD_atomic: 6775 case OMPD_flush: 6776 case OMPD_depobj: 6777 case OMPD_scan: 6778 case OMPD_teams: 6779 case OMPD_target_data: 6780 case OMPD_target_exit_data: 6781 case OMPD_target_enter_data: 6782 case OMPD_distribute: 6783 case OMPD_distribute_simd: 6784 case OMPD_distribute_parallel_for: 6785 case OMPD_distribute_parallel_for_simd: 6786 case OMPD_teams_distribute: 6787 case OMPD_teams_distribute_simd: 6788 case OMPD_teams_distribute_parallel_for: 6789 case OMPD_teams_distribute_parallel_for_simd: 6790 case OMPD_target_update: 6791 case OMPD_declare_simd: 6792 case OMPD_declare_variant: 6793 case OMPD_begin_declare_variant: 6794 case OMPD_end_declare_variant: 6795 case OMPD_declare_target: 6796 case OMPD_end_declare_target: 6797 case OMPD_declare_reduction: 6798 case OMPD_declare_mapper: 6799 case OMPD_taskloop: 6800 case OMPD_taskloop_simd: 6801 case OMPD_master_taskloop: 6802 case OMPD_master_taskloop_simd: 6803 case OMPD_parallel_master_taskloop: 6804 case OMPD_parallel_master_taskloop_simd: 6805 case OMPD_requires: 6806 case OMPD_metadirective: 6807 case OMPD_unknown: 6808 break; 6809 default: 6810 break; 6811 } 6812 llvm_unreachable("Unexpected directive kind."); 6813 } 6814 6815 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6816 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6817 assert(!CGF.getLangOpts().OpenMPIsDevice && 6818 "Clauses associated with the teams directive expected to be emitted " 6819 "only for the host!"); 6820 CGBuilderTy &Bld = CGF.Builder; 6821 int32_t DefaultNT = -1; 6822 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6823 if (NumTeams != nullptr) { 6824 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6825 6826 switch (DirectiveKind) { 6827 case OMPD_target: { 6828 const auto *CS = D.getInnermostCapturedStmt(); 6829 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6830 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6831 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6832 /*IgnoreResultAssign*/ true); 6833 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6834 /*isSigned=*/true); 6835 } 6836 case OMPD_target_teams: 6837 case OMPD_target_teams_distribute: 6838 case OMPD_target_teams_distribute_simd: 6839 case OMPD_target_teams_distribute_parallel_for: 6840 case OMPD_target_teams_distribute_parallel_for_simd: { 6841 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6842 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6843 /*IgnoreResultAssign*/ true); 6844 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6845 /*isSigned=*/true); 6846 } 6847 default: 6848 break; 6849 } 6850 } else if (DefaultNT == -1) { 6851 return nullptr; 6852 } 6853 6854 return Bld.getInt32(DefaultNT); 6855 } 6856 6857 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6858 llvm::Value *DefaultThreadLimitVal) { 6859 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6860 CGF.getContext(), CS->getCapturedStmt()); 6861 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6862 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6863 llvm::Value *NumThreads = nullptr; 6864 llvm::Value *CondVal = nullptr; 6865 // Handle if clause. If if clause present, the number of threads is 6866 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6867 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6868 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6869 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6870 const OMPIfClause *IfClause = nullptr; 6871 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6872 if (C->getNameModifier() == OMPD_unknown || 6873 C->getNameModifier() == OMPD_parallel) { 6874 IfClause = C; 6875 break; 6876 } 6877 } 6878 if (IfClause) { 6879 const Expr *Cond = IfClause->getCondition(); 6880 bool Result; 6881 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6882 if (!Result) 6883 return CGF.Builder.getInt32(1); 6884 } else { 6885 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6886 if (const auto *PreInit = 6887 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6888 for (const auto *I : PreInit->decls()) { 6889 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6890 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6891 } else { 6892 CodeGenFunction::AutoVarEmission Emission = 6893 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6894 CGF.EmitAutoVarCleanups(Emission); 6895 } 6896 } 6897 } 6898 CondVal = CGF.EvaluateExprAsBool(Cond); 6899 } 6900 } 6901 } 6902 // Check the value of num_threads clause iff if clause was not specified 6903 // or is not evaluated to false. 6904 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6905 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6906 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6907 const auto *NumThreadsClause = 6908 Dir->getSingleClause<OMPNumThreadsClause>(); 6909 CodeGenFunction::LexicalScope Scope( 6910 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6911 if (const auto *PreInit = 6912 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6913 for (const auto *I : PreInit->decls()) { 6914 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6915 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6916 } else { 6917 CodeGenFunction::AutoVarEmission Emission = 6918 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6919 CGF.EmitAutoVarCleanups(Emission); 6920 } 6921 } 6922 } 6923 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6924 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6925 /*isSigned=*/false); 6926 if (DefaultThreadLimitVal) 6927 NumThreads = CGF.Builder.CreateSelect( 6928 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6929 DefaultThreadLimitVal, NumThreads); 6930 } else { 6931 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6932 : CGF.Builder.getInt32(0); 6933 } 6934 // Process condition of the if clause. 6935 if (CondVal) { 6936 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6937 CGF.Builder.getInt32(1)); 6938 } 6939 return NumThreads; 6940 } 6941 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6942 return CGF.Builder.getInt32(1); 6943 return DefaultThreadLimitVal; 6944 } 6945 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6946 : CGF.Builder.getInt32(0); 6947 } 6948 6949 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6950 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6951 int32_t &DefaultVal) { 6952 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6953 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6954 "Expected target-based executable directive."); 6955 6956 switch (DirectiveKind) { 6957 case OMPD_target: 6958 // Teams have no clause thread_limit 6959 return nullptr; 6960 case OMPD_target_teams: 6961 case OMPD_target_teams_distribute: 6962 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6963 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6964 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6965 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6966 if (auto Constant = 6967 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6968 DefaultVal = Constant->getExtValue(); 6969 return ThreadLimit; 6970 } 6971 return nullptr; 6972 case OMPD_target_parallel: 6973 case OMPD_target_parallel_for: 6974 case OMPD_target_parallel_for_simd: 6975 case OMPD_target_teams_distribute_parallel_for: 6976 case OMPD_target_teams_distribute_parallel_for_simd: { 6977 Expr *ThreadLimit = nullptr; 6978 Expr *NumThreads = nullptr; 6979 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6980 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6981 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6982 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6983 if (auto Constant = 6984 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6985 DefaultVal = Constant->getExtValue(); 6986 } 6987 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6988 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6989 NumThreads = NumThreadsClause->getNumThreads(); 6990 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6991 if (auto Constant = 6992 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6993 if (Constant->getExtValue() < DefaultVal) { 6994 DefaultVal = Constant->getExtValue(); 6995 ThreadLimit = NumThreads; 6996 } 6997 } 6998 } 6999 } 7000 return ThreadLimit; 7001 } 7002 case OMPD_target_teams_distribute_simd: 7003 case OMPD_target_simd: 7004 DefaultVal = 1; 7005 return nullptr; 7006 case OMPD_parallel: 7007 case OMPD_for: 7008 case OMPD_parallel_for: 7009 case OMPD_parallel_master: 7010 case OMPD_parallel_sections: 7011 case OMPD_for_simd: 7012 case OMPD_parallel_for_simd: 7013 case OMPD_cancel: 7014 case OMPD_cancellation_point: 7015 case OMPD_ordered: 7016 case OMPD_threadprivate: 7017 case OMPD_allocate: 7018 case OMPD_task: 7019 case OMPD_simd: 7020 case OMPD_tile: 7021 case OMPD_unroll: 7022 case OMPD_sections: 7023 case OMPD_section: 7024 case OMPD_single: 7025 case OMPD_master: 7026 case OMPD_critical: 7027 case OMPD_taskyield: 7028 case OMPD_barrier: 7029 case OMPD_taskwait: 7030 case OMPD_taskgroup: 7031 case OMPD_atomic: 7032 case OMPD_flush: 7033 case OMPD_depobj: 7034 case OMPD_scan: 7035 case OMPD_teams: 7036 case OMPD_target_data: 7037 case OMPD_target_exit_data: 7038 case OMPD_target_enter_data: 7039 case OMPD_distribute: 7040 case OMPD_distribute_simd: 7041 case OMPD_distribute_parallel_for: 7042 case OMPD_distribute_parallel_for_simd: 7043 case OMPD_teams_distribute: 7044 case OMPD_teams_distribute_simd: 7045 case OMPD_teams_distribute_parallel_for: 7046 case OMPD_teams_distribute_parallel_for_simd: 7047 case OMPD_target_update: 7048 case OMPD_declare_simd: 7049 case OMPD_declare_variant: 7050 case OMPD_begin_declare_variant: 7051 case OMPD_end_declare_variant: 7052 case OMPD_declare_target: 7053 case OMPD_end_declare_target: 7054 case OMPD_declare_reduction: 7055 case OMPD_declare_mapper: 7056 case OMPD_taskloop: 7057 case OMPD_taskloop_simd: 7058 case OMPD_master_taskloop: 7059 case OMPD_master_taskloop_simd: 7060 case OMPD_parallel_master_taskloop: 7061 case OMPD_parallel_master_taskloop_simd: 7062 case OMPD_requires: 7063 case OMPD_unknown: 7064 break; 7065 default: 7066 break; 7067 } 7068 llvm_unreachable("Unsupported directive kind."); 7069 } 7070 7071 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7072 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7073 assert(!CGF.getLangOpts().OpenMPIsDevice && 7074 "Clauses associated with the teams directive expected to be emitted " 7075 "only for the host!"); 7076 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7077 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7078 "Expected target-based executable directive."); 7079 CGBuilderTy &Bld = CGF.Builder; 7080 llvm::Value *ThreadLimitVal = nullptr; 7081 llvm::Value *NumThreadsVal = nullptr; 7082 switch (DirectiveKind) { 7083 case OMPD_target: { 7084 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7085 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7086 return NumThreads; 7087 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7088 CGF.getContext(), CS->getCapturedStmt()); 7089 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7090 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7091 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7092 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7093 const auto *ThreadLimitClause = 7094 Dir->getSingleClause<OMPThreadLimitClause>(); 7095 CodeGenFunction::LexicalScope Scope( 7096 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7097 if (const auto *PreInit = 7098 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7099 for (const auto *I : PreInit->decls()) { 7100 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7101 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7102 } else { 7103 CodeGenFunction::AutoVarEmission Emission = 7104 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7105 CGF.EmitAutoVarCleanups(Emission); 7106 } 7107 } 7108 } 7109 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7110 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7111 ThreadLimitVal = 7112 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7113 } 7114 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7115 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7116 CS = Dir->getInnermostCapturedStmt(); 7117 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7118 CGF.getContext(), CS->getCapturedStmt()); 7119 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7120 } 7121 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7122 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7123 CS = Dir->getInnermostCapturedStmt(); 7124 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7125 return NumThreads; 7126 } 7127 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7128 return Bld.getInt32(1); 7129 } 7130 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7131 } 7132 case OMPD_target_teams: { 7133 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7134 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7135 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7136 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7137 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7138 ThreadLimitVal = 7139 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7140 } 7141 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7142 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7143 return NumThreads; 7144 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7145 CGF.getContext(), CS->getCapturedStmt()); 7146 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7147 if (Dir->getDirectiveKind() == OMPD_distribute) { 7148 CS = Dir->getInnermostCapturedStmt(); 7149 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7150 return NumThreads; 7151 } 7152 } 7153 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7154 } 7155 case OMPD_target_teams_distribute: 7156 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7157 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7158 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7159 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7160 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7161 ThreadLimitVal = 7162 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7163 } 7164 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7165 case OMPD_target_parallel: 7166 case OMPD_target_parallel_for: 7167 case OMPD_target_parallel_for_simd: 7168 case OMPD_target_teams_distribute_parallel_for: 7169 case OMPD_target_teams_distribute_parallel_for_simd: { 7170 llvm::Value *CondVal = nullptr; 7171 // Handle if clause. If if clause present, the number of threads is 7172 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7173 if (D.hasClausesOfKind<OMPIfClause>()) { 7174 const OMPIfClause *IfClause = nullptr; 7175 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7176 if (C->getNameModifier() == OMPD_unknown || 7177 C->getNameModifier() == OMPD_parallel) { 7178 IfClause = C; 7179 break; 7180 } 7181 } 7182 if (IfClause) { 7183 const Expr *Cond = IfClause->getCondition(); 7184 bool Result; 7185 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7186 if (!Result) 7187 return Bld.getInt32(1); 7188 } else { 7189 CodeGenFunction::RunCleanupsScope Scope(CGF); 7190 CondVal = CGF.EvaluateExprAsBool(Cond); 7191 } 7192 } 7193 } 7194 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7195 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7196 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7197 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7198 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7199 ThreadLimitVal = 7200 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7201 } 7202 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7203 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7204 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7205 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7206 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7207 NumThreadsVal = 7208 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7209 ThreadLimitVal = ThreadLimitVal 7210 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7211 ThreadLimitVal), 7212 NumThreadsVal, ThreadLimitVal) 7213 : NumThreadsVal; 7214 } 7215 if (!ThreadLimitVal) 7216 ThreadLimitVal = Bld.getInt32(0); 7217 if (CondVal) 7218 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7219 return ThreadLimitVal; 7220 } 7221 case OMPD_target_teams_distribute_simd: 7222 case OMPD_target_simd: 7223 return Bld.getInt32(1); 7224 case OMPD_parallel: 7225 case OMPD_for: 7226 case OMPD_parallel_for: 7227 case OMPD_parallel_master: 7228 case OMPD_parallel_sections: 7229 case OMPD_for_simd: 7230 case OMPD_parallel_for_simd: 7231 case OMPD_cancel: 7232 case OMPD_cancellation_point: 7233 case OMPD_ordered: 7234 case OMPD_threadprivate: 7235 case OMPD_allocate: 7236 case OMPD_task: 7237 case OMPD_simd: 7238 case OMPD_tile: 7239 case OMPD_unroll: 7240 case OMPD_sections: 7241 case OMPD_section: 7242 case OMPD_single: 7243 case OMPD_master: 7244 case OMPD_critical: 7245 case OMPD_taskyield: 7246 case OMPD_barrier: 7247 case OMPD_taskwait: 7248 case OMPD_taskgroup: 7249 case OMPD_atomic: 7250 case OMPD_flush: 7251 case OMPD_depobj: 7252 case OMPD_scan: 7253 case OMPD_teams: 7254 case OMPD_target_data: 7255 case OMPD_target_exit_data: 7256 case OMPD_target_enter_data: 7257 case OMPD_distribute: 7258 case OMPD_distribute_simd: 7259 case OMPD_distribute_parallel_for: 7260 case OMPD_distribute_parallel_for_simd: 7261 case OMPD_teams_distribute: 7262 case OMPD_teams_distribute_simd: 7263 case OMPD_teams_distribute_parallel_for: 7264 case OMPD_teams_distribute_parallel_for_simd: 7265 case OMPD_target_update: 7266 case OMPD_declare_simd: 7267 case OMPD_declare_variant: 7268 case OMPD_begin_declare_variant: 7269 case OMPD_end_declare_variant: 7270 case OMPD_declare_target: 7271 case OMPD_end_declare_target: 7272 case OMPD_declare_reduction: 7273 case OMPD_declare_mapper: 7274 case OMPD_taskloop: 7275 case OMPD_taskloop_simd: 7276 case OMPD_master_taskloop: 7277 case OMPD_master_taskloop_simd: 7278 case OMPD_parallel_master_taskloop: 7279 case OMPD_parallel_master_taskloop_simd: 7280 case OMPD_requires: 7281 case OMPD_metadirective: 7282 case OMPD_unknown: 7283 break; 7284 default: 7285 break; 7286 } 7287 llvm_unreachable("Unsupported directive kind."); 7288 } 7289 7290 namespace { 7291 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7292 7293 // Utility to handle information from clauses associated with a given 7294 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7295 // It provides a convenient interface to obtain the information and generate 7296 // code for that information. 7297 class MappableExprsHandler { 7298 public: 7299 /// Values for bit flags used to specify the mapping type for 7300 /// offloading. 7301 enum OpenMPOffloadMappingFlags : uint64_t { 7302 /// No flags 7303 OMP_MAP_NONE = 0x0, 7304 /// Allocate memory on the device and move data from host to device. 7305 OMP_MAP_TO = 0x01, 7306 /// Allocate memory on the device and move data from device to host. 7307 OMP_MAP_FROM = 0x02, 7308 /// Always perform the requested mapping action on the element, even 7309 /// if it was already mapped before. 7310 OMP_MAP_ALWAYS = 0x04, 7311 /// Delete the element from the device environment, ignoring the 7312 /// current reference count associated with the element. 7313 OMP_MAP_DELETE = 0x08, 7314 /// The element being mapped is a pointer-pointee pair; both the 7315 /// pointer and the pointee should be mapped. 7316 OMP_MAP_PTR_AND_OBJ = 0x10, 7317 /// This flags signals that the base address of an entry should be 7318 /// passed to the target kernel as an argument. 7319 OMP_MAP_TARGET_PARAM = 0x20, 7320 /// Signal that the runtime library has to return the device pointer 7321 /// in the current position for the data being mapped. Used when we have the 7322 /// use_device_ptr or use_device_addr clause. 7323 OMP_MAP_RETURN_PARAM = 0x40, 7324 /// This flag signals that the reference being passed is a pointer to 7325 /// private data. 7326 OMP_MAP_PRIVATE = 0x80, 7327 /// Pass the element to the device by value. 7328 OMP_MAP_LITERAL = 0x100, 7329 /// Implicit map 7330 OMP_MAP_IMPLICIT = 0x200, 7331 /// Close is a hint to the runtime to allocate memory close to 7332 /// the target device. 7333 OMP_MAP_CLOSE = 0x400, 7334 /// 0x800 is reserved for compatibility with XLC. 7335 /// Produce a runtime error if the data is not already allocated. 7336 OMP_MAP_PRESENT = 0x1000, 7337 // Increment and decrement a separate reference counter so that the data 7338 // cannot be unmapped within the associated region. Thus, this flag is 7339 // intended to be used on 'target' and 'target data' directives because they 7340 // are inherently structured. It is not intended to be used on 'target 7341 // enter data' and 'target exit data' directives because they are inherently 7342 // dynamic. 7343 // This is an OpenMP extension for the sake of OpenACC support. 7344 OMP_MAP_OMPX_HOLD = 0x2000, 7345 /// Signal that the runtime library should use args as an array of 7346 /// descriptor_dim pointers and use args_size as dims. Used when we have 7347 /// non-contiguous list items in target update directive 7348 OMP_MAP_NON_CONTIG = 0x100000000000, 7349 /// The 16 MSBs of the flags indicate whether the entry is member of some 7350 /// struct/class. 7351 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7352 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7353 }; 7354 7355 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7356 static unsigned getFlagMemberOffset() { 7357 unsigned Offset = 0; 7358 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7359 Remain = Remain >> 1) 7360 Offset++; 7361 return Offset; 7362 } 7363 7364 /// Class that holds debugging information for a data mapping to be passed to 7365 /// the runtime library. 7366 class MappingExprInfo { 7367 /// The variable declaration used for the data mapping. 7368 const ValueDecl *MapDecl = nullptr; 7369 /// The original expression used in the map clause, or null if there is 7370 /// none. 7371 const Expr *MapExpr = nullptr; 7372 7373 public: 7374 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7375 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7376 7377 const ValueDecl *getMapDecl() const { return MapDecl; } 7378 const Expr *getMapExpr() const { return MapExpr; } 7379 }; 7380 7381 /// Class that associates information with a base pointer to be passed to the 7382 /// runtime library. 7383 class BasePointerInfo { 7384 /// The base pointer. 7385 llvm::Value *Ptr = nullptr; 7386 /// The base declaration that refers to this device pointer, or null if 7387 /// there is none. 7388 const ValueDecl *DevPtrDecl = nullptr; 7389 7390 public: 7391 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7392 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7393 llvm::Value *operator*() const { return Ptr; } 7394 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7395 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7396 }; 7397 7398 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7399 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7400 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7401 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7402 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7403 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7404 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7405 7406 /// This structure contains combined information generated for mappable 7407 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7408 /// mappers, and non-contiguous information. 7409 struct MapCombinedInfoTy { 7410 struct StructNonContiguousInfo { 7411 bool IsNonContiguous = false; 7412 MapDimArrayTy Dims; 7413 MapNonContiguousArrayTy Offsets; 7414 MapNonContiguousArrayTy Counts; 7415 MapNonContiguousArrayTy Strides; 7416 }; 7417 MapExprsArrayTy Exprs; 7418 MapBaseValuesArrayTy BasePointers; 7419 MapValuesArrayTy Pointers; 7420 MapValuesArrayTy Sizes; 7421 MapFlagsArrayTy Types; 7422 MapMappersArrayTy Mappers; 7423 StructNonContiguousInfo NonContigInfo; 7424 7425 /// Append arrays in \a CurInfo. 7426 void append(MapCombinedInfoTy &CurInfo) { 7427 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7428 BasePointers.append(CurInfo.BasePointers.begin(), 7429 CurInfo.BasePointers.end()); 7430 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7431 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7432 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7433 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7434 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7435 CurInfo.NonContigInfo.Dims.end()); 7436 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7437 CurInfo.NonContigInfo.Offsets.end()); 7438 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7439 CurInfo.NonContigInfo.Counts.end()); 7440 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7441 CurInfo.NonContigInfo.Strides.end()); 7442 } 7443 }; 7444 7445 /// Map between a struct and the its lowest & highest elements which have been 7446 /// mapped. 7447 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7448 /// HE(FieldIndex, Pointer)} 7449 struct StructRangeInfoTy { 7450 MapCombinedInfoTy PreliminaryMapData; 7451 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7452 0, Address::invalid()}; 7453 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7454 0, Address::invalid()}; 7455 Address Base = Address::invalid(); 7456 Address LB = Address::invalid(); 7457 bool IsArraySection = false; 7458 bool HasCompleteRecord = false; 7459 }; 7460 7461 private: 7462 /// Kind that defines how a device pointer has to be returned. 7463 struct MapInfo { 7464 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7465 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7466 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7467 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7468 bool ReturnDevicePointer = false; 7469 bool IsImplicit = false; 7470 const ValueDecl *Mapper = nullptr; 7471 const Expr *VarRef = nullptr; 7472 bool ForDeviceAddr = false; 7473 7474 MapInfo() = default; 7475 MapInfo( 7476 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7477 OpenMPMapClauseKind MapType, 7478 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7479 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7480 bool ReturnDevicePointer, bool IsImplicit, 7481 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7482 bool ForDeviceAddr = false) 7483 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7484 MotionModifiers(MotionModifiers), 7485 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7486 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7487 }; 7488 7489 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7490 /// member and there is no map information about it, then emission of that 7491 /// entry is deferred until the whole struct has been processed. 7492 struct DeferredDevicePtrEntryTy { 7493 const Expr *IE = nullptr; 7494 const ValueDecl *VD = nullptr; 7495 bool ForDeviceAddr = false; 7496 7497 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7498 bool ForDeviceAddr) 7499 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7500 }; 7501 7502 /// The target directive from where the mappable clauses were extracted. It 7503 /// is either a executable directive or a user-defined mapper directive. 7504 llvm::PointerUnion<const OMPExecutableDirective *, 7505 const OMPDeclareMapperDecl *> 7506 CurDir; 7507 7508 /// Function the directive is being generated for. 7509 CodeGenFunction &CGF; 7510 7511 /// Set of all first private variables in the current directive. 7512 /// bool data is set to true if the variable is implicitly marked as 7513 /// firstprivate, false otherwise. 7514 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7515 7516 /// Map between device pointer declarations and their expression components. 7517 /// The key value for declarations in 'this' is null. 7518 llvm::DenseMap< 7519 const ValueDecl *, 7520 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7521 DevPointersMap; 7522 7523 /// Map between lambda declarations and their map type. 7524 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7525 7526 llvm::Value *getExprTypeSize(const Expr *E) const { 7527 QualType ExprTy = E->getType().getCanonicalType(); 7528 7529 // Calculate the size for array shaping expression. 7530 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7531 llvm::Value *Size = 7532 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7533 for (const Expr *SE : OAE->getDimensions()) { 7534 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7535 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7536 CGF.getContext().getSizeType(), 7537 SE->getExprLoc()); 7538 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7539 } 7540 return Size; 7541 } 7542 7543 // Reference types are ignored for mapping purposes. 7544 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7545 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7546 7547 // Given that an array section is considered a built-in type, we need to 7548 // do the calculation based on the length of the section instead of relying 7549 // on CGF.getTypeSize(E->getType()). 7550 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7551 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7552 OAE->getBase()->IgnoreParenImpCasts()) 7553 .getCanonicalType(); 7554 7555 // If there is no length associated with the expression and lower bound is 7556 // not specified too, that means we are using the whole length of the 7557 // base. 7558 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7559 !OAE->getLowerBound()) 7560 return CGF.getTypeSize(BaseTy); 7561 7562 llvm::Value *ElemSize; 7563 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7564 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7565 } else { 7566 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7567 assert(ATy && "Expecting array type if not a pointer type."); 7568 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7569 } 7570 7571 // If we don't have a length at this point, that is because we have an 7572 // array section with a single element. 7573 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7574 return ElemSize; 7575 7576 if (const Expr *LenExpr = OAE->getLength()) { 7577 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7578 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7579 CGF.getContext().getSizeType(), 7580 LenExpr->getExprLoc()); 7581 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7582 } 7583 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7584 OAE->getLowerBound() && "expected array_section[lb:]."); 7585 // Size = sizetype - lb * elemtype; 7586 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7587 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7588 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7589 CGF.getContext().getSizeType(), 7590 OAE->getLowerBound()->getExprLoc()); 7591 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7592 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7593 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7594 LengthVal = CGF.Builder.CreateSelect( 7595 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7596 return LengthVal; 7597 } 7598 return CGF.getTypeSize(ExprTy); 7599 } 7600 7601 /// Return the corresponding bits for a given map clause modifier. Add 7602 /// a flag marking the map as a pointer if requested. Add a flag marking the 7603 /// map as the first one of a series of maps that relate to the same map 7604 /// expression. 7605 OpenMPOffloadMappingFlags getMapTypeBits( 7606 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7607 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7608 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7609 OpenMPOffloadMappingFlags Bits = 7610 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7611 switch (MapType) { 7612 case OMPC_MAP_alloc: 7613 case OMPC_MAP_release: 7614 // alloc and release is the default behavior in the runtime library, i.e. 7615 // if we don't pass any bits alloc/release that is what the runtime is 7616 // going to do. Therefore, we don't need to signal anything for these two 7617 // type modifiers. 7618 break; 7619 case OMPC_MAP_to: 7620 Bits |= OMP_MAP_TO; 7621 break; 7622 case OMPC_MAP_from: 7623 Bits |= OMP_MAP_FROM; 7624 break; 7625 case OMPC_MAP_tofrom: 7626 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7627 break; 7628 case OMPC_MAP_delete: 7629 Bits |= OMP_MAP_DELETE; 7630 break; 7631 case OMPC_MAP_unknown: 7632 llvm_unreachable("Unexpected map type!"); 7633 } 7634 if (AddPtrFlag) 7635 Bits |= OMP_MAP_PTR_AND_OBJ; 7636 if (AddIsTargetParamFlag) 7637 Bits |= OMP_MAP_TARGET_PARAM; 7638 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7639 Bits |= OMP_MAP_ALWAYS; 7640 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7641 Bits |= OMP_MAP_CLOSE; 7642 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7643 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7644 Bits |= OMP_MAP_PRESENT; 7645 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7646 Bits |= OMP_MAP_OMPX_HOLD; 7647 if (IsNonContiguous) 7648 Bits |= OMP_MAP_NON_CONTIG; 7649 return Bits; 7650 } 7651 7652 /// Return true if the provided expression is a final array section. A 7653 /// final array section, is one whose length can't be proved to be one. 7654 bool isFinalArraySectionExpression(const Expr *E) const { 7655 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7656 7657 // It is not an array section and therefore not a unity-size one. 7658 if (!OASE) 7659 return false; 7660 7661 // An array section with no colon always refer to a single element. 7662 if (OASE->getColonLocFirst().isInvalid()) 7663 return false; 7664 7665 const Expr *Length = OASE->getLength(); 7666 7667 // If we don't have a length we have to check if the array has size 1 7668 // for this dimension. Also, we should always expect a length if the 7669 // base type is pointer. 7670 if (!Length) { 7671 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7672 OASE->getBase()->IgnoreParenImpCasts()) 7673 .getCanonicalType(); 7674 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7675 return ATy->getSize().getSExtValue() != 1; 7676 // If we don't have a constant dimension length, we have to consider 7677 // the current section as having any size, so it is not necessarily 7678 // unitary. If it happen to be unity size, that's user fault. 7679 return true; 7680 } 7681 7682 // Check if the length evaluates to 1. 7683 Expr::EvalResult Result; 7684 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7685 return true; // Can have more that size 1. 7686 7687 llvm::APSInt ConstLength = Result.Val.getInt(); 7688 return ConstLength.getSExtValue() != 1; 7689 } 7690 7691 /// Generate the base pointers, section pointers, sizes, map type bits, and 7692 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7693 /// map type, map or motion modifiers, and expression components. 7694 /// \a IsFirstComponent should be set to true if the provided set of 7695 /// components is the first associated with a capture. 7696 void generateInfoForComponentList( 7697 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7698 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7699 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7700 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7701 bool IsFirstComponentList, bool IsImplicit, 7702 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7703 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7704 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7705 OverlappedElements = llvm::None) const { 7706 // The following summarizes what has to be generated for each map and the 7707 // types below. The generated information is expressed in this order: 7708 // base pointer, section pointer, size, flags 7709 // (to add to the ones that come from the map type and modifier). 7710 // 7711 // double d; 7712 // int i[100]; 7713 // float *p; 7714 // 7715 // struct S1 { 7716 // int i; 7717 // float f[50]; 7718 // } 7719 // struct S2 { 7720 // int i; 7721 // float f[50]; 7722 // S1 s; 7723 // double *p; 7724 // struct S2 *ps; 7725 // int &ref; 7726 // } 7727 // S2 s; 7728 // S2 *ps; 7729 // 7730 // map(d) 7731 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7732 // 7733 // map(i) 7734 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7735 // 7736 // map(i[1:23]) 7737 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7738 // 7739 // map(p) 7740 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7741 // 7742 // map(p[1:24]) 7743 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7744 // in unified shared memory mode or for local pointers 7745 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7746 // 7747 // map(s) 7748 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7749 // 7750 // map(s.i) 7751 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7752 // 7753 // map(s.s.f) 7754 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7755 // 7756 // map(s.p) 7757 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7758 // 7759 // map(to: s.p[:22]) 7760 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7761 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7762 // &(s.p), &(s.p[0]), 22*sizeof(double), 7763 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7764 // (*) alloc space for struct members, only this is a target parameter 7765 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7766 // optimizes this entry out, same in the examples below) 7767 // (***) map the pointee (map: to) 7768 // 7769 // map(to: s.ref) 7770 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7771 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7772 // (*) alloc space for struct members, only this is a target parameter 7773 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7774 // optimizes this entry out, same in the examples below) 7775 // (***) map the pointee (map: to) 7776 // 7777 // map(s.ps) 7778 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7779 // 7780 // map(from: s.ps->s.i) 7781 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7782 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7783 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7784 // 7785 // map(to: s.ps->ps) 7786 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7787 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7788 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7789 // 7790 // map(s.ps->ps->ps) 7791 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7792 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7793 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7794 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7795 // 7796 // map(to: s.ps->ps->s.f[:22]) 7797 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7798 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7799 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7800 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7801 // 7802 // map(ps) 7803 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7804 // 7805 // map(ps->i) 7806 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7807 // 7808 // map(ps->s.f) 7809 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7810 // 7811 // map(from: ps->p) 7812 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7813 // 7814 // map(to: ps->p[:22]) 7815 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7816 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7817 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7818 // 7819 // map(ps->ps) 7820 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7821 // 7822 // map(from: ps->ps->s.i) 7823 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7824 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7825 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7826 // 7827 // map(from: ps->ps->ps) 7828 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7829 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7830 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7831 // 7832 // map(ps->ps->ps->ps) 7833 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7834 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7835 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7836 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7837 // 7838 // map(to: ps->ps->ps->s.f[:22]) 7839 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7840 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7841 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7842 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7843 // 7844 // map(to: s.f[:22]) map(from: s.p[:33]) 7845 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7846 // sizeof(double*) (**), TARGET_PARAM 7847 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7848 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7849 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7850 // (*) allocate contiguous space needed to fit all mapped members even if 7851 // we allocate space for members not mapped (in this example, 7852 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7853 // them as well because they fall between &s.f[0] and &s.p) 7854 // 7855 // map(from: s.f[:22]) map(to: ps->p[:33]) 7856 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7857 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7858 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7859 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7860 // (*) the struct this entry pertains to is the 2nd element in the list of 7861 // arguments, hence MEMBER_OF(2) 7862 // 7863 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7864 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7865 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7866 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7867 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7868 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7869 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7870 // (*) the struct this entry pertains to is the 4th element in the list 7871 // of arguments, hence MEMBER_OF(4) 7872 7873 // Track if the map information being generated is the first for a capture. 7874 bool IsCaptureFirstInfo = IsFirstComponentList; 7875 // When the variable is on a declare target link or in a to clause with 7876 // unified memory, a reference is needed to hold the host/device address 7877 // of the variable. 7878 bool RequiresReference = false; 7879 7880 // Scan the components from the base to the complete expression. 7881 auto CI = Components.rbegin(); 7882 auto CE = Components.rend(); 7883 auto I = CI; 7884 7885 // Track if the map information being generated is the first for a list of 7886 // components. 7887 bool IsExpressionFirstInfo = true; 7888 bool FirstPointerInComplexData = false; 7889 Address BP = Address::invalid(); 7890 const Expr *AssocExpr = I->getAssociatedExpression(); 7891 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7892 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7893 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7894 7895 if (isa<MemberExpr>(AssocExpr)) { 7896 // The base is the 'this' pointer. The content of the pointer is going 7897 // to be the base of the field being mapped. 7898 BP = CGF.LoadCXXThisAddress(); 7899 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7900 (OASE && 7901 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7902 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7903 } else if (OAShE && 7904 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7905 BP = Address::deprecated( 7906 CGF.EmitScalarExpr(OAShE->getBase()), 7907 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7908 } else { 7909 // The base is the reference to the variable. 7910 // BP = &Var. 7911 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7912 if (const auto *VD = 7913 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7914 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7915 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7916 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7917 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7918 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7919 RequiresReference = true; 7920 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7921 } 7922 } 7923 } 7924 7925 // If the variable is a pointer and is being dereferenced (i.e. is not 7926 // the last component), the base has to be the pointer itself, not its 7927 // reference. References are ignored for mapping purposes. 7928 QualType Ty = 7929 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7930 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7931 // No need to generate individual map information for the pointer, it 7932 // can be associated with the combined storage if shared memory mode is 7933 // active or the base declaration is not global variable. 7934 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7935 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7936 !VD || VD->hasLocalStorage()) 7937 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7938 else 7939 FirstPointerInComplexData = true; 7940 ++I; 7941 } 7942 } 7943 7944 // Track whether a component of the list should be marked as MEMBER_OF some 7945 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7946 // in a component list should be marked as MEMBER_OF, all subsequent entries 7947 // do not belong to the base struct. E.g. 7948 // struct S2 s; 7949 // s.ps->ps->ps->f[:] 7950 // (1) (2) (3) (4) 7951 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7952 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7953 // is the pointee of ps(2) which is not member of struct s, so it should not 7954 // be marked as such (it is still PTR_AND_OBJ). 7955 // The variable is initialized to false so that PTR_AND_OBJ entries which 7956 // are not struct members are not considered (e.g. array of pointers to 7957 // data). 7958 bool ShouldBeMemberOf = false; 7959 7960 // Variable keeping track of whether or not we have encountered a component 7961 // in the component list which is a member expression. Useful when we have a 7962 // pointer or a final array section, in which case it is the previous 7963 // component in the list which tells us whether we have a member expression. 7964 // E.g. X.f[:] 7965 // While processing the final array section "[:]" it is "f" which tells us 7966 // whether we are dealing with a member of a declared struct. 7967 const MemberExpr *EncounteredME = nullptr; 7968 7969 // Track for the total number of dimension. Start from one for the dummy 7970 // dimension. 7971 uint64_t DimSize = 1; 7972 7973 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7974 bool IsPrevMemberReference = false; 7975 7976 for (; I != CE; ++I) { 7977 // If the current component is member of a struct (parent struct) mark it. 7978 if (!EncounteredME) { 7979 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7980 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7981 // as MEMBER_OF the parent struct. 7982 if (EncounteredME) { 7983 ShouldBeMemberOf = true; 7984 // Do not emit as complex pointer if this is actually not array-like 7985 // expression. 7986 if (FirstPointerInComplexData) { 7987 QualType Ty = std::prev(I) 7988 ->getAssociatedDeclaration() 7989 ->getType() 7990 .getNonReferenceType(); 7991 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7992 FirstPointerInComplexData = false; 7993 } 7994 } 7995 } 7996 7997 auto Next = std::next(I); 7998 7999 // We need to generate the addresses and sizes if this is the last 8000 // component, if the component is a pointer or if it is an array section 8001 // whose length can't be proved to be one. If this is a pointer, it 8002 // becomes the base address for the following components. 8003 8004 // A final array section, is one whose length can't be proved to be one. 8005 // If the map item is non-contiguous then we don't treat any array section 8006 // as final array section. 8007 bool IsFinalArraySection = 8008 !IsNonContiguous && 8009 isFinalArraySectionExpression(I->getAssociatedExpression()); 8010 8011 // If we have a declaration for the mapping use that, otherwise use 8012 // the base declaration of the map clause. 8013 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 8014 ? I->getAssociatedDeclaration() 8015 : BaseDecl; 8016 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 8017 : MapExpr; 8018 8019 // Get information on whether the element is a pointer. Have to do a 8020 // special treatment for array sections given that they are built-in 8021 // types. 8022 const auto *OASE = 8023 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8024 const auto *OAShE = 8025 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8026 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8027 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8028 bool IsPointer = 8029 OAShE || 8030 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8031 .getCanonicalType() 8032 ->isAnyPointerType()) || 8033 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8034 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8035 MapDecl && 8036 MapDecl->getType()->isLValueReferenceType(); 8037 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8038 8039 if (OASE) 8040 ++DimSize; 8041 8042 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8043 IsFinalArraySection) { 8044 // If this is not the last component, we expect the pointer to be 8045 // associated with an array expression or member expression. 8046 assert((Next == CE || 8047 isa<MemberExpr>(Next->getAssociatedExpression()) || 8048 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8049 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8050 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8051 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8052 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8053 "Unexpected expression"); 8054 8055 Address LB = Address::invalid(); 8056 Address LowestElem = Address::invalid(); 8057 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8058 const MemberExpr *E) { 8059 const Expr *BaseExpr = E->getBase(); 8060 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8061 // scalar. 8062 LValue BaseLV; 8063 if (E->isArrow()) { 8064 LValueBaseInfo BaseInfo; 8065 TBAAAccessInfo TBAAInfo; 8066 Address Addr = 8067 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8068 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8069 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8070 } else { 8071 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8072 } 8073 return BaseLV; 8074 }; 8075 if (OAShE) { 8076 LowestElem = LB = 8077 Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()), 8078 CGF.getContext().getTypeAlignInChars( 8079 OAShE->getBase()->getType())); 8080 } else if (IsMemberReference) { 8081 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8082 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8083 LowestElem = CGF.EmitLValueForFieldInitialization( 8084 BaseLVal, cast<FieldDecl>(MapDecl)) 8085 .getAddress(CGF); 8086 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8087 .getAddress(CGF); 8088 } else { 8089 LowestElem = LB = 8090 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8091 .getAddress(CGF); 8092 } 8093 8094 // If this component is a pointer inside the base struct then we don't 8095 // need to create any entry for it - it will be combined with the object 8096 // it is pointing to into a single PTR_AND_OBJ entry. 8097 bool IsMemberPointerOrAddr = 8098 EncounteredME && 8099 (((IsPointer || ForDeviceAddr) && 8100 I->getAssociatedExpression() == EncounteredME) || 8101 (IsPrevMemberReference && !IsPointer) || 8102 (IsMemberReference && Next != CE && 8103 !Next->getAssociatedExpression()->getType()->isPointerType())); 8104 if (!OverlappedElements.empty() && Next == CE) { 8105 // Handle base element with the info for overlapped elements. 8106 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8107 assert(!IsPointer && 8108 "Unexpected base element with the pointer type."); 8109 // Mark the whole struct as the struct that requires allocation on the 8110 // device. 8111 PartialStruct.LowestElem = {0, LowestElem}; 8112 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8113 I->getAssociatedExpression()->getType()); 8114 Address HB = CGF.Builder.CreateConstGEP( 8115 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8116 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 8117 TypeSize.getQuantity() - 1); 8118 PartialStruct.HighestElem = { 8119 std::numeric_limits<decltype( 8120 PartialStruct.HighestElem.first)>::max(), 8121 HB}; 8122 PartialStruct.Base = BP; 8123 PartialStruct.LB = LB; 8124 assert( 8125 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8126 "Overlapped elements must be used only once for the variable."); 8127 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8128 // Emit data for non-overlapped data. 8129 OpenMPOffloadMappingFlags Flags = 8130 OMP_MAP_MEMBER_OF | 8131 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8132 /*AddPtrFlag=*/false, 8133 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8134 llvm::Value *Size = nullptr; 8135 // Do bitcopy of all non-overlapped structure elements. 8136 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8137 Component : OverlappedElements) { 8138 Address ComponentLB = Address::invalid(); 8139 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8140 Component) { 8141 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8142 const auto *FD = dyn_cast<FieldDecl>(VD); 8143 if (FD && FD->getType()->isLValueReferenceType()) { 8144 const auto *ME = 8145 cast<MemberExpr>(MC.getAssociatedExpression()); 8146 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8147 ComponentLB = 8148 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8149 .getAddress(CGF); 8150 } else { 8151 ComponentLB = 8152 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8153 .getAddress(CGF); 8154 } 8155 Size = CGF.Builder.CreatePtrDiff( 8156 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8157 CGF.EmitCastToVoidPtr(LB.getPointer())); 8158 break; 8159 } 8160 } 8161 assert(Size && "Failed to determine structure size"); 8162 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8163 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8164 CombinedInfo.Pointers.push_back(LB.getPointer()); 8165 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8166 Size, CGF.Int64Ty, /*isSigned=*/true)); 8167 CombinedInfo.Types.push_back(Flags); 8168 CombinedInfo.Mappers.push_back(nullptr); 8169 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8170 : 1); 8171 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8172 } 8173 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8174 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8175 CombinedInfo.Pointers.push_back(LB.getPointer()); 8176 Size = CGF.Builder.CreatePtrDiff( 8177 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8178 CGF.EmitCastToVoidPtr(LB.getPointer())); 8179 CombinedInfo.Sizes.push_back( 8180 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8181 CombinedInfo.Types.push_back(Flags); 8182 CombinedInfo.Mappers.push_back(nullptr); 8183 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8184 : 1); 8185 break; 8186 } 8187 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8188 if (!IsMemberPointerOrAddr || 8189 (Next == CE && MapType != OMPC_MAP_unknown)) { 8190 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8191 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8192 CombinedInfo.Pointers.push_back(LB.getPointer()); 8193 CombinedInfo.Sizes.push_back( 8194 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8195 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8196 : 1); 8197 8198 // If Mapper is valid, the last component inherits the mapper. 8199 bool HasMapper = Mapper && Next == CE; 8200 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8201 8202 // We need to add a pointer flag for each map that comes from the 8203 // same expression except for the first one. We also need to signal 8204 // this map is the first one that relates with the current capture 8205 // (there is a set of entries for each capture). 8206 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8207 MapType, MapModifiers, MotionModifiers, IsImplicit, 8208 !IsExpressionFirstInfo || RequiresReference || 8209 FirstPointerInComplexData || IsMemberReference, 8210 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8211 8212 if (!IsExpressionFirstInfo || IsMemberReference) { 8213 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8214 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8215 if (IsPointer || (IsMemberReference && Next != CE)) 8216 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8217 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8218 8219 if (ShouldBeMemberOf) { 8220 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8221 // should be later updated with the correct value of MEMBER_OF. 8222 Flags |= OMP_MAP_MEMBER_OF; 8223 // From now on, all subsequent PTR_AND_OBJ entries should not be 8224 // marked as MEMBER_OF. 8225 ShouldBeMemberOf = false; 8226 } 8227 } 8228 8229 CombinedInfo.Types.push_back(Flags); 8230 } 8231 8232 // If we have encountered a member expression so far, keep track of the 8233 // mapped member. If the parent is "*this", then the value declaration 8234 // is nullptr. 8235 if (EncounteredME) { 8236 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8237 unsigned FieldIndex = FD->getFieldIndex(); 8238 8239 // Update info about the lowest and highest elements for this struct 8240 if (!PartialStruct.Base.isValid()) { 8241 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8242 if (IsFinalArraySection) { 8243 Address HB = 8244 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8245 .getAddress(CGF); 8246 PartialStruct.HighestElem = {FieldIndex, HB}; 8247 } else { 8248 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8249 } 8250 PartialStruct.Base = BP; 8251 PartialStruct.LB = BP; 8252 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8253 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8254 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8255 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8256 } 8257 } 8258 8259 // Need to emit combined struct for array sections. 8260 if (IsFinalArraySection || IsNonContiguous) 8261 PartialStruct.IsArraySection = true; 8262 8263 // If we have a final array section, we are done with this expression. 8264 if (IsFinalArraySection) 8265 break; 8266 8267 // The pointer becomes the base for the next element. 8268 if (Next != CE) 8269 BP = IsMemberReference ? LowestElem : LB; 8270 8271 IsExpressionFirstInfo = false; 8272 IsCaptureFirstInfo = false; 8273 FirstPointerInComplexData = false; 8274 IsPrevMemberReference = IsMemberReference; 8275 } else if (FirstPointerInComplexData) { 8276 QualType Ty = Components.rbegin() 8277 ->getAssociatedDeclaration() 8278 ->getType() 8279 .getNonReferenceType(); 8280 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8281 FirstPointerInComplexData = false; 8282 } 8283 } 8284 // If ran into the whole component - allocate the space for the whole 8285 // record. 8286 if (!EncounteredME) 8287 PartialStruct.HasCompleteRecord = true; 8288 8289 if (!IsNonContiguous) 8290 return; 8291 8292 const ASTContext &Context = CGF.getContext(); 8293 8294 // For supporting stride in array section, we need to initialize the first 8295 // dimension size as 1, first offset as 0, and first count as 1 8296 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8297 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8298 MapValuesArrayTy CurStrides; 8299 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8300 uint64_t ElementTypeSize; 8301 8302 // Collect Size information for each dimension and get the element size as 8303 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8304 // should be [10, 10] and the first stride is 4 btyes. 8305 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8306 Components) { 8307 const Expr *AssocExpr = Component.getAssociatedExpression(); 8308 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8309 8310 if (!OASE) 8311 continue; 8312 8313 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8314 auto *CAT = Context.getAsConstantArrayType(Ty); 8315 auto *VAT = Context.getAsVariableArrayType(Ty); 8316 8317 // We need all the dimension size except for the last dimension. 8318 assert((VAT || CAT || &Component == &*Components.begin()) && 8319 "Should be either ConstantArray or VariableArray if not the " 8320 "first Component"); 8321 8322 // Get element size if CurStrides is empty. 8323 if (CurStrides.empty()) { 8324 const Type *ElementType = nullptr; 8325 if (CAT) 8326 ElementType = CAT->getElementType().getTypePtr(); 8327 else if (VAT) 8328 ElementType = VAT->getElementType().getTypePtr(); 8329 else 8330 assert(&Component == &*Components.begin() && 8331 "Only expect pointer (non CAT or VAT) when this is the " 8332 "first Component"); 8333 // If ElementType is null, then it means the base is a pointer 8334 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8335 // for next iteration. 8336 if (ElementType) { 8337 // For the case that having pointer as base, we need to remove one 8338 // level of indirection. 8339 if (&Component != &*Components.begin()) 8340 ElementType = ElementType->getPointeeOrArrayElementType(); 8341 ElementTypeSize = 8342 Context.getTypeSizeInChars(ElementType).getQuantity(); 8343 CurStrides.push_back( 8344 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8345 } 8346 } 8347 // Get dimension value except for the last dimension since we don't need 8348 // it. 8349 if (DimSizes.size() < Components.size() - 1) { 8350 if (CAT) 8351 DimSizes.push_back(llvm::ConstantInt::get( 8352 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8353 else if (VAT) 8354 DimSizes.push_back(CGF.Builder.CreateIntCast( 8355 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8356 /*IsSigned=*/false)); 8357 } 8358 } 8359 8360 // Skip the dummy dimension since we have already have its information. 8361 auto *DI = DimSizes.begin() + 1; 8362 // Product of dimension. 8363 llvm::Value *DimProd = 8364 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8365 8366 // Collect info for non-contiguous. Notice that offset, count, and stride 8367 // are only meaningful for array-section, so we insert a null for anything 8368 // other than array-section. 8369 // Also, the size of offset, count, and stride are not the same as 8370 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8371 // count, and stride are the same as the number of non-contiguous 8372 // declaration in target update to/from clause. 8373 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8374 Components) { 8375 const Expr *AssocExpr = Component.getAssociatedExpression(); 8376 8377 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8378 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8379 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8380 /*isSigned=*/false); 8381 CurOffsets.push_back(Offset); 8382 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8383 CurStrides.push_back(CurStrides.back()); 8384 continue; 8385 } 8386 8387 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8388 8389 if (!OASE) 8390 continue; 8391 8392 // Offset 8393 const Expr *OffsetExpr = OASE->getLowerBound(); 8394 llvm::Value *Offset = nullptr; 8395 if (!OffsetExpr) { 8396 // If offset is absent, then we just set it to zero. 8397 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8398 } else { 8399 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8400 CGF.Int64Ty, 8401 /*isSigned=*/false); 8402 } 8403 CurOffsets.push_back(Offset); 8404 8405 // Count 8406 const Expr *CountExpr = OASE->getLength(); 8407 llvm::Value *Count = nullptr; 8408 if (!CountExpr) { 8409 // In Clang, once a high dimension is an array section, we construct all 8410 // the lower dimension as array section, however, for case like 8411 // arr[0:2][2], Clang construct the inner dimension as an array section 8412 // but it actually is not in an array section form according to spec. 8413 if (!OASE->getColonLocFirst().isValid() && 8414 !OASE->getColonLocSecond().isValid()) { 8415 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8416 } else { 8417 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8418 // When the length is absent it defaults to ⌈(size − 8419 // lower-bound)/stride⌉, where size is the size of the array 8420 // dimension. 8421 const Expr *StrideExpr = OASE->getStride(); 8422 llvm::Value *Stride = 8423 StrideExpr 8424 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8425 CGF.Int64Ty, /*isSigned=*/false) 8426 : nullptr; 8427 if (Stride) 8428 Count = CGF.Builder.CreateUDiv( 8429 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8430 else 8431 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8432 } 8433 } else { 8434 Count = CGF.EmitScalarExpr(CountExpr); 8435 } 8436 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8437 CurCounts.push_back(Count); 8438 8439 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8440 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8441 // Offset Count Stride 8442 // D0 0 1 4 (int) <- dummy dimension 8443 // D1 0 2 8 (2 * (1) * 4) 8444 // D2 1 2 20 (1 * (1 * 5) * 4) 8445 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8446 const Expr *StrideExpr = OASE->getStride(); 8447 llvm::Value *Stride = 8448 StrideExpr 8449 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8450 CGF.Int64Ty, /*isSigned=*/false) 8451 : nullptr; 8452 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8453 if (Stride) 8454 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8455 else 8456 CurStrides.push_back(DimProd); 8457 if (DI != DimSizes.end()) 8458 ++DI; 8459 } 8460 8461 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8462 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8463 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8464 } 8465 8466 /// Return the adjusted map modifiers if the declaration a capture refers to 8467 /// appears in a first-private clause. This is expected to be used only with 8468 /// directives that start with 'target'. 8469 MappableExprsHandler::OpenMPOffloadMappingFlags 8470 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8471 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8472 8473 // A first private variable captured by reference will use only the 8474 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8475 // declaration is known as first-private in this handler. 8476 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8477 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8478 return MappableExprsHandler::OMP_MAP_TO | 8479 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8480 return MappableExprsHandler::OMP_MAP_PRIVATE | 8481 MappableExprsHandler::OMP_MAP_TO; 8482 } 8483 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8484 if (I != LambdasMap.end()) 8485 // for map(to: lambda): using user specified map type. 8486 return getMapTypeBits( 8487 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8488 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8489 /*AddPtrFlag=*/false, 8490 /*AddIsTargetParamFlag=*/false, 8491 /*isNonContiguous=*/false); 8492 return MappableExprsHandler::OMP_MAP_TO | 8493 MappableExprsHandler::OMP_MAP_FROM; 8494 } 8495 8496 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8497 // Rotate by getFlagMemberOffset() bits. 8498 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8499 << getFlagMemberOffset()); 8500 } 8501 8502 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8503 OpenMPOffloadMappingFlags MemberOfFlag) { 8504 // If the entry is PTR_AND_OBJ but has not been marked with the special 8505 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8506 // marked as MEMBER_OF. 8507 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8508 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8509 return; 8510 8511 // Reset the placeholder value to prepare the flag for the assignment of the 8512 // proper MEMBER_OF value. 8513 Flags &= ~OMP_MAP_MEMBER_OF; 8514 Flags |= MemberOfFlag; 8515 } 8516 8517 void getPlainLayout(const CXXRecordDecl *RD, 8518 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8519 bool AsBase) const { 8520 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8521 8522 llvm::StructType *St = 8523 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8524 8525 unsigned NumElements = St->getNumElements(); 8526 llvm::SmallVector< 8527 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8528 RecordLayout(NumElements); 8529 8530 // Fill bases. 8531 for (const auto &I : RD->bases()) { 8532 if (I.isVirtual()) 8533 continue; 8534 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8535 // Ignore empty bases. 8536 if (Base->isEmpty() || CGF.getContext() 8537 .getASTRecordLayout(Base) 8538 .getNonVirtualSize() 8539 .isZero()) 8540 continue; 8541 8542 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8543 RecordLayout[FieldIndex] = Base; 8544 } 8545 // Fill in virtual bases. 8546 for (const auto &I : RD->vbases()) { 8547 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8548 // Ignore empty bases. 8549 if (Base->isEmpty()) 8550 continue; 8551 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8552 if (RecordLayout[FieldIndex]) 8553 continue; 8554 RecordLayout[FieldIndex] = Base; 8555 } 8556 // Fill in all the fields. 8557 assert(!RD->isUnion() && "Unexpected union."); 8558 for (const auto *Field : RD->fields()) { 8559 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8560 // will fill in later.) 8561 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8562 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8563 RecordLayout[FieldIndex] = Field; 8564 } 8565 } 8566 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8567 &Data : RecordLayout) { 8568 if (Data.isNull()) 8569 continue; 8570 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8571 getPlainLayout(Base, Layout, /*AsBase=*/true); 8572 else 8573 Layout.push_back(Data.get<const FieldDecl *>()); 8574 } 8575 } 8576 8577 /// Generate all the base pointers, section pointers, sizes, map types, and 8578 /// mappers for the extracted mappable expressions (all included in \a 8579 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8580 /// pair of the relevant declaration and index where it occurs is appended to 8581 /// the device pointers info array. 8582 void generateAllInfoForClauses( 8583 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8584 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8585 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8586 // We have to process the component lists that relate with the same 8587 // declaration in a single chunk so that we can generate the map flags 8588 // correctly. Therefore, we organize all lists in a map. 8589 enum MapKind { Present, Allocs, Other, Total }; 8590 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8591 SmallVector<SmallVector<MapInfo, 8>, 4>> 8592 Info; 8593 8594 // Helper function to fill the information map for the different supported 8595 // clauses. 8596 auto &&InfoGen = 8597 [&Info, &SkipVarSet]( 8598 const ValueDecl *D, MapKind Kind, 8599 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8600 OpenMPMapClauseKind MapType, 8601 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8602 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8603 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8604 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8605 if (SkipVarSet.contains(D)) 8606 return; 8607 auto It = Info.find(D); 8608 if (It == Info.end()) 8609 It = Info 8610 .insert(std::make_pair( 8611 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8612 .first; 8613 It->second[Kind].emplace_back( 8614 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8615 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8616 }; 8617 8618 for (const auto *Cl : Clauses) { 8619 const auto *C = dyn_cast<OMPMapClause>(Cl); 8620 if (!C) 8621 continue; 8622 MapKind Kind = Other; 8623 if (llvm::is_contained(C->getMapTypeModifiers(), 8624 OMPC_MAP_MODIFIER_present)) 8625 Kind = Present; 8626 else if (C->getMapType() == OMPC_MAP_alloc) 8627 Kind = Allocs; 8628 const auto *EI = C->getVarRefs().begin(); 8629 for (const auto L : C->component_lists()) { 8630 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8631 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8632 C->getMapTypeModifiers(), llvm::None, 8633 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8634 E); 8635 ++EI; 8636 } 8637 } 8638 for (const auto *Cl : Clauses) { 8639 const auto *C = dyn_cast<OMPToClause>(Cl); 8640 if (!C) 8641 continue; 8642 MapKind Kind = Other; 8643 if (llvm::is_contained(C->getMotionModifiers(), 8644 OMPC_MOTION_MODIFIER_present)) 8645 Kind = Present; 8646 const auto *EI = C->getVarRefs().begin(); 8647 for (const auto L : C->component_lists()) { 8648 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8649 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8650 C->isImplicit(), std::get<2>(L), *EI); 8651 ++EI; 8652 } 8653 } 8654 for (const auto *Cl : Clauses) { 8655 const auto *C = dyn_cast<OMPFromClause>(Cl); 8656 if (!C) 8657 continue; 8658 MapKind Kind = Other; 8659 if (llvm::is_contained(C->getMotionModifiers(), 8660 OMPC_MOTION_MODIFIER_present)) 8661 Kind = Present; 8662 const auto *EI = C->getVarRefs().begin(); 8663 for (const auto L : C->component_lists()) { 8664 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8665 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8666 C->isImplicit(), std::get<2>(L), *EI); 8667 ++EI; 8668 } 8669 } 8670 8671 // Look at the use_device_ptr clause information and mark the existing map 8672 // entries as such. If there is no map information for an entry in the 8673 // use_device_ptr list, we create one with map type 'alloc' and zero size 8674 // section. It is the user fault if that was not mapped before. If there is 8675 // no map information and the pointer is a struct member, then we defer the 8676 // emission of that entry until the whole struct has been processed. 8677 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8678 SmallVector<DeferredDevicePtrEntryTy, 4>> 8679 DeferredInfo; 8680 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8681 8682 for (const auto *Cl : Clauses) { 8683 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8684 if (!C) 8685 continue; 8686 for (const auto L : C->component_lists()) { 8687 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8688 std::get<1>(L); 8689 assert(!Components.empty() && 8690 "Not expecting empty list of components!"); 8691 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8692 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8693 const Expr *IE = Components.back().getAssociatedExpression(); 8694 // If the first component is a member expression, we have to look into 8695 // 'this', which maps to null in the map of map information. Otherwise 8696 // look directly for the information. 8697 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8698 8699 // We potentially have map information for this declaration already. 8700 // Look for the first set of components that refer to it. 8701 if (It != Info.end()) { 8702 bool Found = false; 8703 for (auto &Data : It->second) { 8704 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8705 return MI.Components.back().getAssociatedDeclaration() == VD; 8706 }); 8707 // If we found a map entry, signal that the pointer has to be 8708 // returned and move on to the next declaration. Exclude cases where 8709 // the base pointer is mapped as array subscript, array section or 8710 // array shaping. The base address is passed as a pointer to base in 8711 // this case and cannot be used as a base for use_device_ptr list 8712 // item. 8713 if (CI != Data.end()) { 8714 auto PrevCI = std::next(CI->Components.rbegin()); 8715 const auto *VarD = dyn_cast<VarDecl>(VD); 8716 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8717 isa<MemberExpr>(IE) || 8718 !VD->getType().getNonReferenceType()->isPointerType() || 8719 PrevCI == CI->Components.rend() || 8720 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8721 VarD->hasLocalStorage()) { 8722 CI->ReturnDevicePointer = true; 8723 Found = true; 8724 break; 8725 } 8726 } 8727 } 8728 if (Found) 8729 continue; 8730 } 8731 8732 // We didn't find any match in our map information - generate a zero 8733 // size array section - if the pointer is a struct member we defer this 8734 // action until the whole struct has been processed. 8735 if (isa<MemberExpr>(IE)) { 8736 // Insert the pointer into Info to be processed by 8737 // generateInfoForComponentList. Because it is a member pointer 8738 // without a pointee, no entry will be generated for it, therefore 8739 // we need to generate one after the whole struct has been processed. 8740 // Nonetheless, generateInfoForComponentList must be called to take 8741 // the pointer into account for the calculation of the range of the 8742 // partial struct. 8743 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8744 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8745 nullptr); 8746 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8747 } else { 8748 llvm::Value *Ptr = 8749 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8750 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8751 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8752 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8753 UseDevicePtrCombinedInfo.Sizes.push_back( 8754 llvm::Constant::getNullValue(CGF.Int64Ty)); 8755 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8756 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8757 } 8758 } 8759 } 8760 8761 // Look at the use_device_addr clause information and mark the existing map 8762 // entries as such. If there is no map information for an entry in the 8763 // use_device_addr list, we create one with map type 'alloc' and zero size 8764 // section. It is the user fault if that was not mapped before. If there is 8765 // no map information and the pointer is a struct member, then we defer the 8766 // emission of that entry until the whole struct has been processed. 8767 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8768 for (const auto *Cl : Clauses) { 8769 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8770 if (!C) 8771 continue; 8772 for (const auto L : C->component_lists()) { 8773 assert(!std::get<1>(L).empty() && 8774 "Not expecting empty list of components!"); 8775 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8776 if (!Processed.insert(VD).second) 8777 continue; 8778 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8779 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8780 // If the first component is a member expression, we have to look into 8781 // 'this', which maps to null in the map of map information. Otherwise 8782 // look directly for the information. 8783 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8784 8785 // We potentially have map information for this declaration already. 8786 // Look for the first set of components that refer to it. 8787 if (It != Info.end()) { 8788 bool Found = false; 8789 for (auto &Data : It->second) { 8790 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8791 return MI.Components.back().getAssociatedDeclaration() == VD; 8792 }); 8793 // If we found a map entry, signal that the pointer has to be 8794 // returned and move on to the next declaration. 8795 if (CI != Data.end()) { 8796 CI->ReturnDevicePointer = true; 8797 Found = true; 8798 break; 8799 } 8800 } 8801 if (Found) 8802 continue; 8803 } 8804 8805 // We didn't find any match in our map information - generate a zero 8806 // size array section - if the pointer is a struct member we defer this 8807 // action until the whole struct has been processed. 8808 if (isa<MemberExpr>(IE)) { 8809 // Insert the pointer into Info to be processed by 8810 // generateInfoForComponentList. Because it is a member pointer 8811 // without a pointee, no entry will be generated for it, therefore 8812 // we need to generate one after the whole struct has been processed. 8813 // Nonetheless, generateInfoForComponentList must be called to take 8814 // the pointer into account for the calculation of the range of the 8815 // partial struct. 8816 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8817 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8818 nullptr, nullptr, /*ForDeviceAddr=*/true); 8819 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8820 } else { 8821 llvm::Value *Ptr; 8822 if (IE->isGLValue()) 8823 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8824 else 8825 Ptr = CGF.EmitScalarExpr(IE); 8826 CombinedInfo.Exprs.push_back(VD); 8827 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8828 CombinedInfo.Pointers.push_back(Ptr); 8829 CombinedInfo.Sizes.push_back( 8830 llvm::Constant::getNullValue(CGF.Int64Ty)); 8831 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8832 CombinedInfo.Mappers.push_back(nullptr); 8833 } 8834 } 8835 } 8836 8837 for (const auto &Data : Info) { 8838 StructRangeInfoTy PartialStruct; 8839 // Temporary generated information. 8840 MapCombinedInfoTy CurInfo; 8841 const Decl *D = Data.first; 8842 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8843 for (const auto &M : Data.second) { 8844 for (const MapInfo &L : M) { 8845 assert(!L.Components.empty() && 8846 "Not expecting declaration with no component lists."); 8847 8848 // Remember the current base pointer index. 8849 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8850 CurInfo.NonContigInfo.IsNonContiguous = 8851 L.Components.back().isNonContiguous(); 8852 generateInfoForComponentList( 8853 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8854 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8855 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8856 8857 // If this entry relates with a device pointer, set the relevant 8858 // declaration and add the 'return pointer' flag. 8859 if (L.ReturnDevicePointer) { 8860 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8861 "Unexpected number of mapped base pointers."); 8862 8863 const ValueDecl *RelevantVD = 8864 L.Components.back().getAssociatedDeclaration(); 8865 assert(RelevantVD && 8866 "No relevant declaration related with device pointer??"); 8867 8868 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8869 RelevantVD); 8870 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8871 } 8872 } 8873 } 8874 8875 // Append any pending zero-length pointers which are struct members and 8876 // used with use_device_ptr or use_device_addr. 8877 auto CI = DeferredInfo.find(Data.first); 8878 if (CI != DeferredInfo.end()) { 8879 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8880 llvm::Value *BasePtr; 8881 llvm::Value *Ptr; 8882 if (L.ForDeviceAddr) { 8883 if (L.IE->isGLValue()) 8884 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8885 else 8886 Ptr = this->CGF.EmitScalarExpr(L.IE); 8887 BasePtr = Ptr; 8888 // Entry is RETURN_PARAM. Also, set the placeholder value 8889 // MEMBER_OF=FFFF so that the entry is later updated with the 8890 // correct value of MEMBER_OF. 8891 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8892 } else { 8893 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8894 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8895 L.IE->getExprLoc()); 8896 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8897 // placeholder value MEMBER_OF=FFFF so that the entry is later 8898 // updated with the correct value of MEMBER_OF. 8899 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8900 OMP_MAP_MEMBER_OF); 8901 } 8902 CurInfo.Exprs.push_back(L.VD); 8903 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8904 CurInfo.Pointers.push_back(Ptr); 8905 CurInfo.Sizes.push_back( 8906 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8907 CurInfo.Mappers.push_back(nullptr); 8908 } 8909 } 8910 // If there is an entry in PartialStruct it means we have a struct with 8911 // individual members mapped. Emit an extra combined entry. 8912 if (PartialStruct.Base.isValid()) { 8913 CurInfo.NonContigInfo.Dims.push_back(0); 8914 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8915 } 8916 8917 // We need to append the results of this capture to what we already 8918 // have. 8919 CombinedInfo.append(CurInfo); 8920 } 8921 // Append data for use_device_ptr clauses. 8922 CombinedInfo.append(UseDevicePtrCombinedInfo); 8923 } 8924 8925 public: 8926 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8927 : CurDir(&Dir), CGF(CGF) { 8928 // Extract firstprivate clause information. 8929 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8930 for (const auto *D : C->varlists()) 8931 FirstPrivateDecls.try_emplace( 8932 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8933 // Extract implicit firstprivates from uses_allocators clauses. 8934 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8935 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8936 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8937 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8938 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8939 /*Implicit=*/true); 8940 else if (const auto *VD = dyn_cast<VarDecl>( 8941 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8942 ->getDecl())) 8943 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8944 } 8945 } 8946 // Extract device pointer clause information. 8947 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8948 for (auto L : C->component_lists()) 8949 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8950 // Extract map information. 8951 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8952 if (C->getMapType() != OMPC_MAP_to) 8953 continue; 8954 for (auto L : C->component_lists()) { 8955 const ValueDecl *VD = std::get<0>(L); 8956 const auto *RD = VD ? VD->getType() 8957 .getCanonicalType() 8958 .getNonReferenceType() 8959 ->getAsCXXRecordDecl() 8960 : nullptr; 8961 if (RD && RD->isLambda()) 8962 LambdasMap.try_emplace(std::get<0>(L), C); 8963 } 8964 } 8965 } 8966 8967 /// Constructor for the declare mapper directive. 8968 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8969 : CurDir(&Dir), CGF(CGF) {} 8970 8971 /// Generate code for the combined entry if we have a partially mapped struct 8972 /// and take care of the mapping flags of the arguments corresponding to 8973 /// individual struct members. 8974 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8975 MapFlagsArrayTy &CurTypes, 8976 const StructRangeInfoTy &PartialStruct, 8977 const ValueDecl *VD = nullptr, 8978 bool NotTargetParams = true) const { 8979 if (CurTypes.size() == 1 && 8980 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8981 !PartialStruct.IsArraySection) 8982 return; 8983 Address LBAddr = PartialStruct.LowestElem.second; 8984 Address HBAddr = PartialStruct.HighestElem.second; 8985 if (PartialStruct.HasCompleteRecord) { 8986 LBAddr = PartialStruct.LB; 8987 HBAddr = PartialStruct.LB; 8988 } 8989 CombinedInfo.Exprs.push_back(VD); 8990 // Base is the base of the struct 8991 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8992 // Pointer is the address of the lowest element 8993 llvm::Value *LB = LBAddr.getPointer(); 8994 CombinedInfo.Pointers.push_back(LB); 8995 // There should not be a mapper for a combined entry. 8996 CombinedInfo.Mappers.push_back(nullptr); 8997 // Size is (addr of {highest+1} element) - (addr of lowest element) 8998 llvm::Value *HB = HBAddr.getPointer(); 8999 llvm::Value *HAddr = 9000 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 9001 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 9002 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 9003 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 9004 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 9005 /*isSigned=*/false); 9006 CombinedInfo.Sizes.push_back(Size); 9007 // Map type is always TARGET_PARAM, if generate info for captures. 9008 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 9009 : OMP_MAP_TARGET_PARAM); 9010 // If any element has the present modifier, then make sure the runtime 9011 // doesn't attempt to allocate the struct. 9012 if (CurTypes.end() != 9013 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9014 return Type & OMP_MAP_PRESENT; 9015 })) 9016 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 9017 // Remove TARGET_PARAM flag from the first element 9018 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 9019 // If any element has the ompx_hold modifier, then make sure the runtime 9020 // uses the hold reference count for the struct as a whole so that it won't 9021 // be unmapped by an extra dynamic reference count decrement. Add it to all 9022 // elements as well so the runtime knows which reference count to check 9023 // when determining whether it's time for device-to-host transfers of 9024 // individual elements. 9025 if (CurTypes.end() != 9026 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9027 return Type & OMP_MAP_OMPX_HOLD; 9028 })) { 9029 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9030 for (auto &M : CurTypes) 9031 M |= OMP_MAP_OMPX_HOLD; 9032 } 9033 9034 // All other current entries will be MEMBER_OF the combined entry 9035 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9036 // 0xFFFF in the MEMBER_OF field). 9037 OpenMPOffloadMappingFlags MemberOfFlag = 9038 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9039 for (auto &M : CurTypes) 9040 setCorrectMemberOfFlag(M, MemberOfFlag); 9041 } 9042 9043 /// Generate all the base pointers, section pointers, sizes, map types, and 9044 /// mappers for the extracted mappable expressions (all included in \a 9045 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9046 /// pair of the relevant declaration and index where it occurs is appended to 9047 /// the device pointers info array. 9048 void generateAllInfo( 9049 MapCombinedInfoTy &CombinedInfo, 9050 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9051 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9052 assert(CurDir.is<const OMPExecutableDirective *>() && 9053 "Expect a executable directive"); 9054 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9055 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9056 } 9057 9058 /// Generate all the base pointers, section pointers, sizes, map types, and 9059 /// mappers for the extracted map clauses of user-defined mapper (all included 9060 /// in \a CombinedInfo). 9061 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9062 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9063 "Expect a declare mapper directive"); 9064 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9065 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9066 } 9067 9068 /// Emit capture info for lambdas for variables captured by reference. 9069 void generateInfoForLambdaCaptures( 9070 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9071 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9072 const auto *RD = VD->getType() 9073 .getCanonicalType() 9074 .getNonReferenceType() 9075 ->getAsCXXRecordDecl(); 9076 if (!RD || !RD->isLambda()) 9077 return; 9078 Address VDAddr = 9079 Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD)); 9080 LValue VDLVal = CGF.MakeAddrLValue( 9081 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9082 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9083 FieldDecl *ThisCapture = nullptr; 9084 RD->getCaptureFields(Captures, ThisCapture); 9085 if (ThisCapture) { 9086 LValue ThisLVal = 9087 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9088 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9089 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9090 VDLVal.getPointer(CGF)); 9091 CombinedInfo.Exprs.push_back(VD); 9092 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9093 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9094 CombinedInfo.Sizes.push_back( 9095 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9096 CGF.Int64Ty, /*isSigned=*/true)); 9097 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9098 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9099 CombinedInfo.Mappers.push_back(nullptr); 9100 } 9101 for (const LambdaCapture &LC : RD->captures()) { 9102 if (!LC.capturesVariable()) 9103 continue; 9104 const VarDecl *VD = LC.getCapturedVar(); 9105 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9106 continue; 9107 auto It = Captures.find(VD); 9108 assert(It != Captures.end() && "Found lambda capture without field."); 9109 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9110 if (LC.getCaptureKind() == LCK_ByRef) { 9111 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9112 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9113 VDLVal.getPointer(CGF)); 9114 CombinedInfo.Exprs.push_back(VD); 9115 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9116 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9117 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9118 CGF.getTypeSize( 9119 VD->getType().getCanonicalType().getNonReferenceType()), 9120 CGF.Int64Ty, /*isSigned=*/true)); 9121 } else { 9122 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9123 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9124 VDLVal.getPointer(CGF)); 9125 CombinedInfo.Exprs.push_back(VD); 9126 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9127 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9128 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9129 } 9130 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9131 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9132 CombinedInfo.Mappers.push_back(nullptr); 9133 } 9134 } 9135 9136 /// Set correct indices for lambdas captures. 9137 void adjustMemberOfForLambdaCaptures( 9138 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9139 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9140 MapFlagsArrayTy &Types) const { 9141 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9142 // Set correct member_of idx for all implicit lambda captures. 9143 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9144 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9145 continue; 9146 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9147 assert(BasePtr && "Unable to find base lambda address."); 9148 int TgtIdx = -1; 9149 for (unsigned J = I; J > 0; --J) { 9150 unsigned Idx = J - 1; 9151 if (Pointers[Idx] != BasePtr) 9152 continue; 9153 TgtIdx = Idx; 9154 break; 9155 } 9156 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9157 // All other current entries will be MEMBER_OF the combined entry 9158 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9159 // 0xFFFF in the MEMBER_OF field). 9160 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9161 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9162 } 9163 } 9164 9165 /// Generate the base pointers, section pointers, sizes, map types, and 9166 /// mappers associated to a given capture (all included in \a CombinedInfo). 9167 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9168 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9169 StructRangeInfoTy &PartialStruct) const { 9170 assert(!Cap->capturesVariableArrayType() && 9171 "Not expecting to generate map info for a variable array type!"); 9172 9173 // We need to know when we generating information for the first component 9174 const ValueDecl *VD = Cap->capturesThis() 9175 ? nullptr 9176 : Cap->getCapturedVar()->getCanonicalDecl(); 9177 9178 // for map(to: lambda): skip here, processing it in 9179 // generateDefaultMapInfo 9180 if (LambdasMap.count(VD)) 9181 return; 9182 9183 // If this declaration appears in a is_device_ptr clause we just have to 9184 // pass the pointer by value. If it is a reference to a declaration, we just 9185 // pass its value. 9186 if (DevPointersMap.count(VD)) { 9187 CombinedInfo.Exprs.push_back(VD); 9188 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9189 CombinedInfo.Pointers.push_back(Arg); 9190 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9191 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9192 /*isSigned=*/true)); 9193 CombinedInfo.Types.push_back( 9194 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9195 OMP_MAP_TARGET_PARAM); 9196 CombinedInfo.Mappers.push_back(nullptr); 9197 return; 9198 } 9199 9200 using MapData = 9201 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9202 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9203 const ValueDecl *, const Expr *>; 9204 SmallVector<MapData, 4> DeclComponentLists; 9205 assert(CurDir.is<const OMPExecutableDirective *>() && 9206 "Expect a executable directive"); 9207 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9208 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9209 const auto *EI = C->getVarRefs().begin(); 9210 for (const auto L : C->decl_component_lists(VD)) { 9211 const ValueDecl *VDecl, *Mapper; 9212 // The Expression is not correct if the mapping is implicit 9213 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9214 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9215 std::tie(VDecl, Components, Mapper) = L; 9216 assert(VDecl == VD && "We got information for the wrong declaration??"); 9217 assert(!Components.empty() && 9218 "Not expecting declaration with no component lists."); 9219 DeclComponentLists.emplace_back(Components, C->getMapType(), 9220 C->getMapTypeModifiers(), 9221 C->isImplicit(), Mapper, E); 9222 ++EI; 9223 } 9224 } 9225 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9226 const MapData &RHS) { 9227 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9228 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9229 bool HasPresent = 9230 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9231 bool HasAllocs = MapType == OMPC_MAP_alloc; 9232 MapModifiers = std::get<2>(RHS); 9233 MapType = std::get<1>(LHS); 9234 bool HasPresentR = 9235 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9236 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9237 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9238 }); 9239 9240 // Find overlapping elements (including the offset from the base element). 9241 llvm::SmallDenseMap< 9242 const MapData *, 9243 llvm::SmallVector< 9244 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9245 4> 9246 OverlappedData; 9247 size_t Count = 0; 9248 for (const MapData &L : DeclComponentLists) { 9249 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9250 OpenMPMapClauseKind MapType; 9251 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9252 bool IsImplicit; 9253 const ValueDecl *Mapper; 9254 const Expr *VarRef; 9255 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9256 L; 9257 ++Count; 9258 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9259 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9260 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9261 VarRef) = L1; 9262 auto CI = Components.rbegin(); 9263 auto CE = Components.rend(); 9264 auto SI = Components1.rbegin(); 9265 auto SE = Components1.rend(); 9266 for (; CI != CE && SI != SE; ++CI, ++SI) { 9267 if (CI->getAssociatedExpression()->getStmtClass() != 9268 SI->getAssociatedExpression()->getStmtClass()) 9269 break; 9270 // Are we dealing with different variables/fields? 9271 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9272 break; 9273 } 9274 // Found overlapping if, at least for one component, reached the head 9275 // of the components list. 9276 if (CI == CE || SI == SE) { 9277 // Ignore it if it is the same component. 9278 if (CI == CE && SI == SE) 9279 continue; 9280 const auto It = (SI == SE) ? CI : SI; 9281 // If one component is a pointer and another one is a kind of 9282 // dereference of this pointer (array subscript, section, dereference, 9283 // etc.), it is not an overlapping. 9284 // Same, if one component is a base and another component is a 9285 // dereferenced pointer memberexpr with the same base. 9286 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9287 (std::prev(It)->getAssociatedDeclaration() && 9288 std::prev(It) 9289 ->getAssociatedDeclaration() 9290 ->getType() 9291 ->isPointerType()) || 9292 (It->getAssociatedDeclaration() && 9293 It->getAssociatedDeclaration()->getType()->isPointerType() && 9294 std::next(It) != CE && std::next(It) != SE)) 9295 continue; 9296 const MapData &BaseData = CI == CE ? L : L1; 9297 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9298 SI == SE ? Components : Components1; 9299 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9300 OverlappedElements.getSecond().push_back(SubData); 9301 } 9302 } 9303 } 9304 // Sort the overlapped elements for each item. 9305 llvm::SmallVector<const FieldDecl *, 4> Layout; 9306 if (!OverlappedData.empty()) { 9307 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9308 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9309 while (BaseType != OrigType) { 9310 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9311 OrigType = BaseType->getPointeeOrArrayElementType(); 9312 } 9313 9314 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9315 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9316 else { 9317 const auto *RD = BaseType->getAsRecordDecl(); 9318 Layout.append(RD->field_begin(), RD->field_end()); 9319 } 9320 } 9321 for (auto &Pair : OverlappedData) { 9322 llvm::stable_sort( 9323 Pair.getSecond(), 9324 [&Layout]( 9325 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9326 OMPClauseMappableExprCommon::MappableExprComponentListRef 9327 Second) { 9328 auto CI = First.rbegin(); 9329 auto CE = First.rend(); 9330 auto SI = Second.rbegin(); 9331 auto SE = Second.rend(); 9332 for (; CI != CE && SI != SE; ++CI, ++SI) { 9333 if (CI->getAssociatedExpression()->getStmtClass() != 9334 SI->getAssociatedExpression()->getStmtClass()) 9335 break; 9336 // Are we dealing with different variables/fields? 9337 if (CI->getAssociatedDeclaration() != 9338 SI->getAssociatedDeclaration()) 9339 break; 9340 } 9341 9342 // Lists contain the same elements. 9343 if (CI == CE && SI == SE) 9344 return false; 9345 9346 // List with less elements is less than list with more elements. 9347 if (CI == CE || SI == SE) 9348 return CI == CE; 9349 9350 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9351 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9352 if (FD1->getParent() == FD2->getParent()) 9353 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9354 const auto *It = 9355 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9356 return FD == FD1 || FD == FD2; 9357 }); 9358 return *It == FD1; 9359 }); 9360 } 9361 9362 // Associated with a capture, because the mapping flags depend on it. 9363 // Go through all of the elements with the overlapped elements. 9364 bool IsFirstComponentList = true; 9365 for (const auto &Pair : OverlappedData) { 9366 const MapData &L = *Pair.getFirst(); 9367 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9368 OpenMPMapClauseKind MapType; 9369 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9370 bool IsImplicit; 9371 const ValueDecl *Mapper; 9372 const Expr *VarRef; 9373 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9374 L; 9375 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9376 OverlappedComponents = Pair.getSecond(); 9377 generateInfoForComponentList( 9378 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9379 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9380 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9381 IsFirstComponentList = false; 9382 } 9383 // Go through other elements without overlapped elements. 9384 for (const MapData &L : DeclComponentLists) { 9385 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9386 OpenMPMapClauseKind MapType; 9387 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9388 bool IsImplicit; 9389 const ValueDecl *Mapper; 9390 const Expr *VarRef; 9391 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9392 L; 9393 auto It = OverlappedData.find(&L); 9394 if (It == OverlappedData.end()) 9395 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9396 Components, CombinedInfo, PartialStruct, 9397 IsFirstComponentList, IsImplicit, Mapper, 9398 /*ForDeviceAddr=*/false, VD, VarRef); 9399 IsFirstComponentList = false; 9400 } 9401 } 9402 9403 /// Generate the default map information for a given capture \a CI, 9404 /// record field declaration \a RI and captured value \a CV. 9405 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9406 const FieldDecl &RI, llvm::Value *CV, 9407 MapCombinedInfoTy &CombinedInfo) const { 9408 bool IsImplicit = true; 9409 // Do the default mapping. 9410 if (CI.capturesThis()) { 9411 CombinedInfo.Exprs.push_back(nullptr); 9412 CombinedInfo.BasePointers.push_back(CV); 9413 CombinedInfo.Pointers.push_back(CV); 9414 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9415 CombinedInfo.Sizes.push_back( 9416 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9417 CGF.Int64Ty, /*isSigned=*/true)); 9418 // Default map type. 9419 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9420 } else if (CI.capturesVariableByCopy()) { 9421 const VarDecl *VD = CI.getCapturedVar(); 9422 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9423 CombinedInfo.BasePointers.push_back(CV); 9424 CombinedInfo.Pointers.push_back(CV); 9425 if (!RI.getType()->isAnyPointerType()) { 9426 // We have to signal to the runtime captures passed by value that are 9427 // not pointers. 9428 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9429 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9430 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9431 } else { 9432 // Pointers are implicitly mapped with a zero size and no flags 9433 // (other than first map that is added for all implicit maps). 9434 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9435 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9436 } 9437 auto I = FirstPrivateDecls.find(VD); 9438 if (I != FirstPrivateDecls.end()) 9439 IsImplicit = I->getSecond(); 9440 } else { 9441 assert(CI.capturesVariable() && "Expected captured reference."); 9442 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9443 QualType ElementType = PtrTy->getPointeeType(); 9444 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9445 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9446 // The default map type for a scalar/complex type is 'to' because by 9447 // default the value doesn't have to be retrieved. For an aggregate 9448 // type, the default is 'tofrom'. 9449 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9450 const VarDecl *VD = CI.getCapturedVar(); 9451 auto I = FirstPrivateDecls.find(VD); 9452 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9453 CombinedInfo.BasePointers.push_back(CV); 9454 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9455 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9456 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9457 AlignmentSource::Decl)); 9458 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9459 } else { 9460 CombinedInfo.Pointers.push_back(CV); 9461 } 9462 if (I != FirstPrivateDecls.end()) 9463 IsImplicit = I->getSecond(); 9464 } 9465 // Every default map produces a single argument which is a target parameter. 9466 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9467 9468 // Add flag stating this is an implicit map. 9469 if (IsImplicit) 9470 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9471 9472 // No user-defined mapper for default mapping. 9473 CombinedInfo.Mappers.push_back(nullptr); 9474 } 9475 }; 9476 } // anonymous namespace 9477 9478 static void emitNonContiguousDescriptor( 9479 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9480 CGOpenMPRuntime::TargetDataInfo &Info) { 9481 CodeGenModule &CGM = CGF.CGM; 9482 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9483 &NonContigInfo = CombinedInfo.NonContigInfo; 9484 9485 // Build an array of struct descriptor_dim and then assign it to 9486 // offload_args. 9487 // 9488 // struct descriptor_dim { 9489 // uint64_t offset; 9490 // uint64_t count; 9491 // uint64_t stride 9492 // }; 9493 ASTContext &C = CGF.getContext(); 9494 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9495 RecordDecl *RD; 9496 RD = C.buildImplicitRecord("descriptor_dim"); 9497 RD->startDefinition(); 9498 addFieldToRecordDecl(C, RD, Int64Ty); 9499 addFieldToRecordDecl(C, RD, Int64Ty); 9500 addFieldToRecordDecl(C, RD, Int64Ty); 9501 RD->completeDefinition(); 9502 QualType DimTy = C.getRecordType(RD); 9503 9504 enum { OffsetFD = 0, CountFD, StrideFD }; 9505 // We need two index variable here since the size of "Dims" is the same as the 9506 // size of Components, however, the size of offset, count, and stride is equal 9507 // to the size of base declaration that is non-contiguous. 9508 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9509 // Skip emitting ir if dimension size is 1 since it cannot be 9510 // non-contiguous. 9511 if (NonContigInfo.Dims[I] == 1) 9512 continue; 9513 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9514 QualType ArrayTy = 9515 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9516 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9517 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9518 unsigned RevIdx = EE - II - 1; 9519 LValue DimsLVal = CGF.MakeAddrLValue( 9520 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9521 // Offset 9522 LValue OffsetLVal = CGF.EmitLValueForField( 9523 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9524 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9525 // Count 9526 LValue CountLVal = CGF.EmitLValueForField( 9527 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9528 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9529 // Stride 9530 LValue StrideLVal = CGF.EmitLValueForField( 9531 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9532 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9533 } 9534 // args[I] = &dims 9535 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9536 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9537 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9538 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9539 Info.PointersArray, 0, I); 9540 Address PAddr = Address::deprecated(P, CGF.getPointerAlign()); 9541 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9542 ++L; 9543 } 9544 } 9545 9546 // Try to extract the base declaration from a `this->x` expression if possible. 9547 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9548 if (!E) 9549 return nullptr; 9550 9551 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9552 if (const MemberExpr *ME = 9553 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9554 return ME->getMemberDecl(); 9555 return nullptr; 9556 } 9557 9558 /// Emit a string constant containing the names of the values mapped to the 9559 /// offloading runtime library. 9560 llvm::Constant * 9561 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9562 MappableExprsHandler::MappingExprInfo &MapExprs) { 9563 9564 uint32_t SrcLocStrSize; 9565 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9566 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9567 9568 SourceLocation Loc; 9569 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9570 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9571 Loc = VD->getLocation(); 9572 else 9573 Loc = MapExprs.getMapExpr()->getExprLoc(); 9574 } else { 9575 Loc = MapExprs.getMapDecl()->getLocation(); 9576 } 9577 9578 std::string ExprName; 9579 if (MapExprs.getMapExpr()) { 9580 PrintingPolicy P(CGF.getContext().getLangOpts()); 9581 llvm::raw_string_ostream OS(ExprName); 9582 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9583 OS.flush(); 9584 } else { 9585 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9586 } 9587 9588 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9589 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9590 PLoc.getLine(), PLoc.getColumn(), 9591 SrcLocStrSize); 9592 } 9593 9594 /// Emit the arrays used to pass the captures and map information to the 9595 /// offloading runtime library. If there is no map or capture information, 9596 /// return nullptr by reference. 9597 static void emitOffloadingArrays( 9598 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9599 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9600 bool IsNonContiguous = false) { 9601 CodeGenModule &CGM = CGF.CGM; 9602 ASTContext &Ctx = CGF.getContext(); 9603 9604 // Reset the array information. 9605 Info.clearArrayInfo(); 9606 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9607 9608 if (Info.NumberOfPtrs) { 9609 // Detect if we have any capture size requiring runtime evaluation of the 9610 // size so that a constant array could be eventually used. 9611 9612 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9613 QualType PointerArrayType = Ctx.getConstantArrayType( 9614 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9615 /*IndexTypeQuals=*/0); 9616 9617 Info.BasePointersArray = 9618 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9619 Info.PointersArray = 9620 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9621 Address MappersArray = 9622 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9623 Info.MappersArray = MappersArray.getPointer(); 9624 9625 // If we don't have any VLA types or other types that require runtime 9626 // evaluation, we can use a constant array for the map sizes, otherwise we 9627 // need to fill up the arrays as we do for the pointers. 9628 QualType Int64Ty = 9629 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9630 SmallVector<llvm::Constant *> ConstSizes( 9631 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9632 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9633 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9634 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9635 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9636 if (IsNonContiguous && (CombinedInfo.Types[I] & 9637 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9638 ConstSizes[I] = llvm::ConstantInt::get( 9639 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9640 else 9641 ConstSizes[I] = CI; 9642 continue; 9643 } 9644 } 9645 RuntimeSizes.set(I); 9646 } 9647 9648 if (RuntimeSizes.all()) { 9649 QualType SizeArrayType = Ctx.getConstantArrayType( 9650 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9651 /*IndexTypeQuals=*/0); 9652 Info.SizesArray = 9653 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9654 } else { 9655 auto *SizesArrayInit = llvm::ConstantArray::get( 9656 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9657 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9658 auto *SizesArrayGbl = new llvm::GlobalVariable( 9659 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9660 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9661 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9662 if (RuntimeSizes.any()) { 9663 QualType SizeArrayType = Ctx.getConstantArrayType( 9664 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9665 /*IndexTypeQuals=*/0); 9666 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9667 llvm::Value *GblConstPtr = 9668 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9669 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9670 CGF.Builder.CreateMemCpy( 9671 Buffer, 9672 Address(GblConstPtr, CGM.Int64Ty, 9673 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9674 /*DestWidth=*/64, /*Signed=*/false))), 9675 CGF.getTypeSize(SizeArrayType)); 9676 Info.SizesArray = Buffer.getPointer(); 9677 } else { 9678 Info.SizesArray = SizesArrayGbl; 9679 } 9680 } 9681 9682 // The map types are always constant so we don't need to generate code to 9683 // fill arrays. Instead, we create an array constant. 9684 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9685 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9686 std::string MaptypesName = 9687 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9688 auto *MapTypesArrayGbl = 9689 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9690 Info.MapTypesArray = MapTypesArrayGbl; 9691 9692 // The information types are only built if there is debug information 9693 // requested. 9694 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9695 Info.MapNamesArray = llvm::Constant::getNullValue( 9696 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9697 } else { 9698 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9699 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9700 }; 9701 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9702 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9703 std::string MapnamesName = 9704 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9705 auto *MapNamesArrayGbl = 9706 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9707 Info.MapNamesArray = MapNamesArrayGbl; 9708 } 9709 9710 // If there's a present map type modifier, it must not be applied to the end 9711 // of a region, so generate a separate map type array in that case. 9712 if (Info.separateBeginEndCalls()) { 9713 bool EndMapTypesDiffer = false; 9714 for (uint64_t &Type : Mapping) { 9715 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9716 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9717 EndMapTypesDiffer = true; 9718 } 9719 } 9720 if (EndMapTypesDiffer) { 9721 MapTypesArrayGbl = 9722 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9723 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9724 } 9725 } 9726 9727 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9728 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9729 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9730 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9731 Info.BasePointersArray, 0, I); 9732 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9733 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9734 Address BPAddr = 9735 Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9736 CGF.Builder.CreateStore(BPVal, BPAddr); 9737 9738 if (Info.requiresDevicePointerInfo()) 9739 if (const ValueDecl *DevVD = 9740 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9741 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9742 9743 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9744 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9745 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9746 Info.PointersArray, 0, I); 9747 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9748 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9749 Address PAddr = 9750 Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9751 CGF.Builder.CreateStore(PVal, PAddr); 9752 9753 if (RuntimeSizes.test(I)) { 9754 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9755 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9756 Info.SizesArray, 9757 /*Idx0=*/0, 9758 /*Idx1=*/I); 9759 Address SAddr = 9760 Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty)); 9761 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9762 CGM.Int64Ty, 9763 /*isSigned=*/true), 9764 SAddr); 9765 } 9766 9767 // Fill up the mapper array. 9768 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9769 if (CombinedInfo.Mappers[I]) { 9770 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9771 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9772 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9773 Info.HasMapper = true; 9774 } 9775 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9776 CGF.Builder.CreateStore(MFunc, MAddr); 9777 } 9778 } 9779 9780 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9781 Info.NumberOfPtrs == 0) 9782 return; 9783 9784 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9785 } 9786 9787 namespace { 9788 /// Additional arguments for emitOffloadingArraysArgument function. 9789 struct ArgumentsOptions { 9790 bool ForEndCall = false; 9791 ArgumentsOptions() = default; 9792 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9793 }; 9794 } // namespace 9795 9796 /// Emit the arguments to be passed to the runtime library based on the 9797 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9798 /// ForEndCall, emit map types to be passed for the end of the region instead of 9799 /// the beginning. 9800 static void emitOffloadingArraysArgument( 9801 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9802 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9803 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9804 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9805 const ArgumentsOptions &Options = ArgumentsOptions()) { 9806 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9807 "expected region end call to runtime only when end call is separate"); 9808 CodeGenModule &CGM = CGF.CGM; 9809 if (Info.NumberOfPtrs) { 9810 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9811 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9812 Info.BasePointersArray, 9813 /*Idx0=*/0, /*Idx1=*/0); 9814 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9815 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9816 Info.PointersArray, 9817 /*Idx0=*/0, 9818 /*Idx1=*/0); 9819 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9820 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9821 /*Idx0=*/0, /*Idx1=*/0); 9822 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9823 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9824 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9825 : Info.MapTypesArray, 9826 /*Idx0=*/0, 9827 /*Idx1=*/0); 9828 9829 // Only emit the mapper information arrays if debug information is 9830 // requested. 9831 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9832 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9833 else 9834 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9835 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9836 Info.MapNamesArray, 9837 /*Idx0=*/0, 9838 /*Idx1=*/0); 9839 // If there is no user-defined mapper, set the mapper array to nullptr to 9840 // avoid an unnecessary data privatization 9841 if (!Info.HasMapper) 9842 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9843 else 9844 MappersArrayArg = 9845 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9846 } else { 9847 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9848 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9849 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9850 MapTypesArrayArg = 9851 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9852 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9853 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9854 } 9855 } 9856 9857 /// Check for inner distribute directive. 9858 static const OMPExecutableDirective * 9859 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9860 const auto *CS = D.getInnermostCapturedStmt(); 9861 const auto *Body = 9862 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9863 const Stmt *ChildStmt = 9864 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9865 9866 if (const auto *NestedDir = 9867 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9868 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9869 switch (D.getDirectiveKind()) { 9870 case OMPD_target: 9871 if (isOpenMPDistributeDirective(DKind)) 9872 return NestedDir; 9873 if (DKind == OMPD_teams) { 9874 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9875 /*IgnoreCaptured=*/true); 9876 if (!Body) 9877 return nullptr; 9878 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9879 if (const auto *NND = 9880 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9881 DKind = NND->getDirectiveKind(); 9882 if (isOpenMPDistributeDirective(DKind)) 9883 return NND; 9884 } 9885 } 9886 return nullptr; 9887 case OMPD_target_teams: 9888 if (isOpenMPDistributeDirective(DKind)) 9889 return NestedDir; 9890 return nullptr; 9891 case OMPD_target_parallel: 9892 case OMPD_target_simd: 9893 case OMPD_target_parallel_for: 9894 case OMPD_target_parallel_for_simd: 9895 return nullptr; 9896 case OMPD_target_teams_distribute: 9897 case OMPD_target_teams_distribute_simd: 9898 case OMPD_target_teams_distribute_parallel_for: 9899 case OMPD_target_teams_distribute_parallel_for_simd: 9900 case OMPD_parallel: 9901 case OMPD_for: 9902 case OMPD_parallel_for: 9903 case OMPD_parallel_master: 9904 case OMPD_parallel_sections: 9905 case OMPD_for_simd: 9906 case OMPD_parallel_for_simd: 9907 case OMPD_cancel: 9908 case OMPD_cancellation_point: 9909 case OMPD_ordered: 9910 case OMPD_threadprivate: 9911 case OMPD_allocate: 9912 case OMPD_task: 9913 case OMPD_simd: 9914 case OMPD_tile: 9915 case OMPD_unroll: 9916 case OMPD_sections: 9917 case OMPD_section: 9918 case OMPD_single: 9919 case OMPD_master: 9920 case OMPD_critical: 9921 case OMPD_taskyield: 9922 case OMPD_barrier: 9923 case OMPD_taskwait: 9924 case OMPD_taskgroup: 9925 case OMPD_atomic: 9926 case OMPD_flush: 9927 case OMPD_depobj: 9928 case OMPD_scan: 9929 case OMPD_teams: 9930 case OMPD_target_data: 9931 case OMPD_target_exit_data: 9932 case OMPD_target_enter_data: 9933 case OMPD_distribute: 9934 case OMPD_distribute_simd: 9935 case OMPD_distribute_parallel_for: 9936 case OMPD_distribute_parallel_for_simd: 9937 case OMPD_teams_distribute: 9938 case OMPD_teams_distribute_simd: 9939 case OMPD_teams_distribute_parallel_for: 9940 case OMPD_teams_distribute_parallel_for_simd: 9941 case OMPD_target_update: 9942 case OMPD_declare_simd: 9943 case OMPD_declare_variant: 9944 case OMPD_begin_declare_variant: 9945 case OMPD_end_declare_variant: 9946 case OMPD_declare_target: 9947 case OMPD_end_declare_target: 9948 case OMPD_declare_reduction: 9949 case OMPD_declare_mapper: 9950 case OMPD_taskloop: 9951 case OMPD_taskloop_simd: 9952 case OMPD_master_taskloop: 9953 case OMPD_master_taskloop_simd: 9954 case OMPD_parallel_master_taskloop: 9955 case OMPD_parallel_master_taskloop_simd: 9956 case OMPD_requires: 9957 case OMPD_metadirective: 9958 case OMPD_unknown: 9959 default: 9960 llvm_unreachable("Unexpected directive."); 9961 } 9962 } 9963 9964 return nullptr; 9965 } 9966 9967 /// Emit the user-defined mapper function. The code generation follows the 9968 /// pattern in the example below. 9969 /// \code 9970 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9971 /// void *base, void *begin, 9972 /// int64_t size, int64_t type, 9973 /// void *name = nullptr) { 9974 /// // Allocate space for an array section first or add a base/begin for 9975 /// // pointer dereference. 9976 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9977 /// !maptype.IsDelete) 9978 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9979 /// size*sizeof(Ty), clearToFromMember(type)); 9980 /// // Map members. 9981 /// for (unsigned i = 0; i < size; i++) { 9982 /// // For each component specified by this mapper: 9983 /// for (auto c : begin[i]->all_components) { 9984 /// if (c.hasMapper()) 9985 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9986 /// c.arg_type, c.arg_name); 9987 /// else 9988 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9989 /// c.arg_begin, c.arg_size, c.arg_type, 9990 /// c.arg_name); 9991 /// } 9992 /// } 9993 /// // Delete the array section. 9994 /// if (size > 1 && maptype.IsDelete) 9995 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9996 /// size*sizeof(Ty), clearToFromMember(type)); 9997 /// } 9998 /// \endcode 9999 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 10000 CodeGenFunction *CGF) { 10001 if (UDMMap.count(D) > 0) 10002 return; 10003 ASTContext &C = CGM.getContext(); 10004 QualType Ty = D->getType(); 10005 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 10006 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10007 auto *MapperVarDecl = 10008 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 10009 SourceLocation Loc = D->getLocation(); 10010 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 10011 10012 // Prepare mapper function arguments and attributes. 10013 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 10014 C.VoidPtrTy, ImplicitParamDecl::Other); 10015 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 10016 ImplicitParamDecl::Other); 10017 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 10018 C.VoidPtrTy, ImplicitParamDecl::Other); 10019 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 10020 ImplicitParamDecl::Other); 10021 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 10022 ImplicitParamDecl::Other); 10023 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 10024 ImplicitParamDecl::Other); 10025 FunctionArgList Args; 10026 Args.push_back(&HandleArg); 10027 Args.push_back(&BaseArg); 10028 Args.push_back(&BeginArg); 10029 Args.push_back(&SizeArg); 10030 Args.push_back(&TypeArg); 10031 Args.push_back(&NameArg); 10032 const CGFunctionInfo &FnInfo = 10033 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 10034 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 10035 SmallString<64> TyStr; 10036 llvm::raw_svector_ostream Out(TyStr); 10037 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10038 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10039 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10040 Name, &CGM.getModule()); 10041 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10042 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10043 // Start the mapper function code generation. 10044 CodeGenFunction MapperCGF(CGM); 10045 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10046 // Compute the starting and end addresses of array elements. 10047 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10048 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10049 C.getPointerType(Int64Ty), Loc); 10050 // Prepare common arguments for array initiation and deletion. 10051 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10052 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10053 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10054 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10055 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10056 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10057 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10058 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10059 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10060 // Convert the size in bytes into the number of array elements. 10061 Size = MapperCGF.Builder.CreateExactUDiv( 10062 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10063 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10064 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10065 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10066 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10067 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10068 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10069 C.getPointerType(Int64Ty), Loc); 10070 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10071 MapperCGF.GetAddrOfLocalVar(&NameArg), 10072 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10073 10074 // Emit array initiation if this is an array section and \p MapType indicates 10075 // that memory allocation is required. 10076 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10077 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10078 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10079 10080 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10081 10082 // Emit the loop header block. 10083 MapperCGF.EmitBlock(HeadBB); 10084 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10085 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10086 // Evaluate whether the initial condition is satisfied. 10087 llvm::Value *IsEmpty = 10088 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10089 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10090 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10091 10092 // Emit the loop body block. 10093 MapperCGF.EmitBlock(BodyBB); 10094 llvm::BasicBlock *LastBB = BodyBB; 10095 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10096 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10097 PtrPHI->addIncoming(PtrBegin, EntryBB); 10098 Address PtrCurrent = 10099 Address::deprecated(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10100 .getAlignment() 10101 .alignmentOfArrayElement(ElementSize)); 10102 // Privatize the declared variable of mapper to be the current array element. 10103 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10104 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10105 (void)Scope.Privatize(); 10106 10107 // Get map clause information. Fill up the arrays with all mapped variables. 10108 MappableExprsHandler::MapCombinedInfoTy Info; 10109 MappableExprsHandler MEHandler(*D, MapperCGF); 10110 MEHandler.generateAllInfoForMapper(Info); 10111 10112 // Call the runtime API __tgt_mapper_num_components to get the number of 10113 // pre-existing components. 10114 llvm::Value *OffloadingArgs[] = {Handle}; 10115 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10116 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10117 OMPRTL___tgt_mapper_num_components), 10118 OffloadingArgs); 10119 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10120 PreviousSize, 10121 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10122 10123 // Fill up the runtime mapper handle for all components. 10124 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10125 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10126 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10127 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10128 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10129 llvm::Value *CurSizeArg = Info.Sizes[I]; 10130 llvm::Value *CurNameArg = 10131 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10132 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10133 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10134 10135 // Extract the MEMBER_OF field from the map type. 10136 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10137 llvm::Value *MemberMapType = 10138 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10139 10140 // Combine the map type inherited from user-defined mapper with that 10141 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10142 // bits of the \a MapType, which is the input argument of the mapper 10143 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10144 // bits of MemberMapType. 10145 // [OpenMP 5.0], 1.2.6. map-type decay. 10146 // | alloc | to | from | tofrom | release | delete 10147 // ---------------------------------------------------------- 10148 // alloc | alloc | alloc | alloc | alloc | release | delete 10149 // to | alloc | to | alloc | to | release | delete 10150 // from | alloc | alloc | from | from | release | delete 10151 // tofrom | alloc | to | from | tofrom | release | delete 10152 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10153 MapType, 10154 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10155 MappableExprsHandler::OMP_MAP_FROM)); 10156 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10157 llvm::BasicBlock *AllocElseBB = 10158 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10159 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10160 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10161 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10162 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10163 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10164 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10165 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10166 MapperCGF.EmitBlock(AllocBB); 10167 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10168 MemberMapType, 10169 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10170 MappableExprsHandler::OMP_MAP_FROM))); 10171 MapperCGF.Builder.CreateBr(EndBB); 10172 MapperCGF.EmitBlock(AllocElseBB); 10173 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10174 LeftToFrom, 10175 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10176 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10177 // In case of to, clear OMP_MAP_FROM. 10178 MapperCGF.EmitBlock(ToBB); 10179 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10180 MemberMapType, 10181 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10182 MapperCGF.Builder.CreateBr(EndBB); 10183 MapperCGF.EmitBlock(ToElseBB); 10184 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10185 LeftToFrom, 10186 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10187 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10188 // In case of from, clear OMP_MAP_TO. 10189 MapperCGF.EmitBlock(FromBB); 10190 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10191 MemberMapType, 10192 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10193 // In case of tofrom, do nothing. 10194 MapperCGF.EmitBlock(EndBB); 10195 LastBB = EndBB; 10196 llvm::PHINode *CurMapType = 10197 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10198 CurMapType->addIncoming(AllocMapType, AllocBB); 10199 CurMapType->addIncoming(ToMapType, ToBB); 10200 CurMapType->addIncoming(FromMapType, FromBB); 10201 CurMapType->addIncoming(MemberMapType, ToElseBB); 10202 10203 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10204 CurSizeArg, CurMapType, CurNameArg}; 10205 if (Info.Mappers[I]) { 10206 // Call the corresponding mapper function. 10207 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10208 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10209 assert(MapperFunc && "Expect a valid mapper function is available."); 10210 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10211 } else { 10212 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10213 // data structure. 10214 MapperCGF.EmitRuntimeCall( 10215 OMPBuilder.getOrCreateRuntimeFunction( 10216 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10217 OffloadingArgs); 10218 } 10219 } 10220 10221 // Update the pointer to point to the next element that needs to be mapped, 10222 // and check whether we have mapped all elements. 10223 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10224 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10225 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10226 PtrPHI->addIncoming(PtrNext, LastBB); 10227 llvm::Value *IsDone = 10228 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10229 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10230 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10231 10232 MapperCGF.EmitBlock(ExitBB); 10233 // Emit array deletion if this is an array section and \p MapType indicates 10234 // that deletion is required. 10235 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10236 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10237 10238 // Emit the function exit block. 10239 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10240 MapperCGF.FinishFunction(); 10241 UDMMap.try_emplace(D, Fn); 10242 if (CGF) { 10243 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10244 Decls.second.push_back(D); 10245 } 10246 } 10247 10248 /// Emit the array initialization or deletion portion for user-defined mapper 10249 /// code generation. First, it evaluates whether an array section is mapped and 10250 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10251 /// true, and \a MapType indicates to not delete this array, array 10252 /// initialization code is generated. If \a IsInit is false, and \a MapType 10253 /// indicates to not this array, array deletion code is generated. 10254 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10255 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10256 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10257 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10258 bool IsInit) { 10259 StringRef Prefix = IsInit ? ".init" : ".del"; 10260 10261 // Evaluate if this is an array section. 10262 llvm::BasicBlock *BodyBB = 10263 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10264 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10265 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10266 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10267 MapType, 10268 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10269 llvm::Value *DeleteCond; 10270 llvm::Value *Cond; 10271 if (IsInit) { 10272 // base != begin? 10273 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10274 // IsPtrAndObj? 10275 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10276 MapType, 10277 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10278 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10279 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10280 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10281 DeleteCond = MapperCGF.Builder.CreateIsNull( 10282 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10283 } else { 10284 Cond = IsArray; 10285 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10286 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10287 } 10288 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10289 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10290 10291 MapperCGF.EmitBlock(BodyBB); 10292 // Get the array size by multiplying element size and element number (i.e., \p 10293 // Size). 10294 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10295 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10296 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10297 // memory allocation/deletion purpose only. 10298 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10299 MapType, 10300 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10301 MappableExprsHandler::OMP_MAP_FROM))); 10302 MapTypeArg = MapperCGF.Builder.CreateOr( 10303 MapTypeArg, 10304 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10305 10306 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10307 // data structure. 10308 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10309 ArraySize, MapTypeArg, MapName}; 10310 MapperCGF.EmitRuntimeCall( 10311 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10312 OMPRTL___tgt_push_mapper_component), 10313 OffloadingArgs); 10314 } 10315 10316 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10317 const OMPDeclareMapperDecl *D) { 10318 auto I = UDMMap.find(D); 10319 if (I != UDMMap.end()) 10320 return I->second; 10321 emitUserDefinedMapper(D); 10322 return UDMMap.lookup(D); 10323 } 10324 10325 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10326 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10327 llvm::Value *DeviceID, 10328 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10329 const OMPLoopDirective &D)> 10330 SizeEmitter) { 10331 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10332 const OMPExecutableDirective *TD = &D; 10333 // Get nested teams distribute kind directive, if any. 10334 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10335 TD = getNestedDistributeDirective(CGM.getContext(), D); 10336 if (!TD) 10337 return; 10338 const auto *LD = cast<OMPLoopDirective>(TD); 10339 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10340 PrePostActionTy &) { 10341 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10342 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10343 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10344 CGF.EmitRuntimeCall( 10345 OMPBuilder.getOrCreateRuntimeFunction( 10346 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10347 Args); 10348 } 10349 }; 10350 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10351 } 10352 10353 void CGOpenMPRuntime::emitTargetCall( 10354 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10355 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10356 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10357 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10358 const OMPLoopDirective &D)> 10359 SizeEmitter) { 10360 if (!CGF.HaveInsertPoint()) 10361 return; 10362 10363 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10364 CGM.getLangOpts().OpenMPOffloadMandatory; 10365 10366 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10367 10368 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10369 D.hasClausesOfKind<OMPNowaitClause>(); 10370 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10371 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10372 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10373 PrePostActionTy &) { 10374 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10375 }; 10376 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10377 10378 CodeGenFunction::OMPTargetDataInfo InputInfo; 10379 llvm::Value *MapTypesArray = nullptr; 10380 llvm::Value *MapNamesArray = nullptr; 10381 // Generate code for the host fallback function. 10382 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10383 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10384 if (OffloadingMandatory) { 10385 CGF.Builder.CreateUnreachable(); 10386 } else { 10387 if (RequiresOuterTask) { 10388 CapturedVars.clear(); 10389 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10390 } 10391 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10392 } 10393 }; 10394 // Fill up the pointer arrays and transfer execution to the device. 10395 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10396 &MapNamesArray, SizeEmitter, 10397 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10398 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10399 // Reverse offloading is not supported, so just execute on the host. 10400 FallbackGen(CGF); 10401 return; 10402 } 10403 10404 // On top of the arrays that were filled up, the target offloading call 10405 // takes as arguments the device id as well as the host pointer. The host 10406 // pointer is used by the runtime library to identify the current target 10407 // region, so it only has to be unique and not necessarily point to 10408 // anything. It could be the pointer to the outlined function that 10409 // implements the target region, but we aren't using that so that the 10410 // compiler doesn't need to keep that, and could therefore inline the host 10411 // function if proven worthwhile during optimization. 10412 10413 // From this point on, we need to have an ID of the target region defined. 10414 assert(OutlinedFnID && "Invalid outlined function ID!"); 10415 (void)OutlinedFnID; 10416 10417 // Emit device ID if any. 10418 llvm::Value *DeviceID; 10419 if (Device.getPointer()) { 10420 assert((Device.getInt() == OMPC_DEVICE_unknown || 10421 Device.getInt() == OMPC_DEVICE_device_num) && 10422 "Expected device_num modifier."); 10423 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10424 DeviceID = 10425 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10426 } else { 10427 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10428 } 10429 10430 // Emit the number of elements in the offloading arrays. 10431 llvm::Value *PointerNum = 10432 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10433 10434 // Return value of the runtime offloading call. 10435 llvm::Value *Return; 10436 10437 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10438 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10439 10440 // Source location for the ident struct 10441 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10442 10443 // Emit tripcount for the target loop-based directive. 10444 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10445 10446 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10447 // The target region is an outlined function launched by the runtime 10448 // via calls __tgt_target() or __tgt_target_teams(). 10449 // 10450 // __tgt_target() launches a target region with one team and one thread, 10451 // executing a serial region. This master thread may in turn launch 10452 // more threads within its team upon encountering a parallel region, 10453 // however, no additional teams can be launched on the device. 10454 // 10455 // __tgt_target_teams() launches a target region with one or more teams, 10456 // each with one or more threads. This call is required for target 10457 // constructs such as: 10458 // 'target teams' 10459 // 'target' / 'teams' 10460 // 'target teams distribute parallel for' 10461 // 'target parallel' 10462 // and so on. 10463 // 10464 // Note that on the host and CPU targets, the runtime implementation of 10465 // these calls simply call the outlined function without forking threads. 10466 // The outlined functions themselves have runtime calls to 10467 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10468 // the compiler in emitTeamsCall() and emitParallelCall(). 10469 // 10470 // In contrast, on the NVPTX target, the implementation of 10471 // __tgt_target_teams() launches a GPU kernel with the requested number 10472 // of teams and threads so no additional calls to the runtime are required. 10473 if (NumTeams) { 10474 // If we have NumTeams defined this means that we have an enclosed teams 10475 // region. Therefore we also expect to have NumThreads defined. These two 10476 // values should be defined in the presence of a teams directive, 10477 // regardless of having any clauses associated. If the user is using teams 10478 // but no clauses, these two values will be the default that should be 10479 // passed to the runtime library - a 32-bit integer with the value zero. 10480 assert(NumThreads && "Thread limit expression should be available along " 10481 "with number of teams."); 10482 SmallVector<llvm::Value *> OffloadingArgs = { 10483 RTLoc, 10484 DeviceID, 10485 OutlinedFnID, 10486 PointerNum, 10487 InputInfo.BasePointersArray.getPointer(), 10488 InputInfo.PointersArray.getPointer(), 10489 InputInfo.SizesArray.getPointer(), 10490 MapTypesArray, 10491 MapNamesArray, 10492 InputInfo.MappersArray.getPointer(), 10493 NumTeams, 10494 NumThreads}; 10495 if (HasNowait) { 10496 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10497 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10498 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10499 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10500 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10501 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10502 } 10503 Return = CGF.EmitRuntimeCall( 10504 OMPBuilder.getOrCreateRuntimeFunction( 10505 CGM.getModule(), HasNowait 10506 ? OMPRTL___tgt_target_teams_nowait_mapper 10507 : OMPRTL___tgt_target_teams_mapper), 10508 OffloadingArgs); 10509 } else { 10510 SmallVector<llvm::Value *> OffloadingArgs = { 10511 RTLoc, 10512 DeviceID, 10513 OutlinedFnID, 10514 PointerNum, 10515 InputInfo.BasePointersArray.getPointer(), 10516 InputInfo.PointersArray.getPointer(), 10517 InputInfo.SizesArray.getPointer(), 10518 MapTypesArray, 10519 MapNamesArray, 10520 InputInfo.MappersArray.getPointer()}; 10521 if (HasNowait) { 10522 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10523 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10524 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10525 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10526 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10527 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10528 } 10529 Return = CGF.EmitRuntimeCall( 10530 OMPBuilder.getOrCreateRuntimeFunction( 10531 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10532 : OMPRTL___tgt_target_mapper), 10533 OffloadingArgs); 10534 } 10535 10536 // Check the error code and execute the host version if required. 10537 llvm::BasicBlock *OffloadFailedBlock = 10538 CGF.createBasicBlock("omp_offload.failed"); 10539 llvm::BasicBlock *OffloadContBlock = 10540 CGF.createBasicBlock("omp_offload.cont"); 10541 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10542 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10543 10544 CGF.EmitBlock(OffloadFailedBlock); 10545 FallbackGen(CGF); 10546 10547 CGF.EmitBranch(OffloadContBlock); 10548 10549 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10550 }; 10551 10552 // Notify that the host version must be executed. 10553 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10554 FallbackGen(CGF); 10555 }; 10556 10557 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10558 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10559 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10560 // Fill up the arrays with all the captured variables. 10561 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10562 10563 // Get mappable expression information. 10564 MappableExprsHandler MEHandler(D, CGF); 10565 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10566 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10567 10568 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10569 auto *CV = CapturedVars.begin(); 10570 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10571 CE = CS.capture_end(); 10572 CI != CE; ++CI, ++RI, ++CV) { 10573 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10574 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10575 10576 // VLA sizes are passed to the outlined region by copy and do not have map 10577 // information associated. 10578 if (CI->capturesVariableArrayType()) { 10579 CurInfo.Exprs.push_back(nullptr); 10580 CurInfo.BasePointers.push_back(*CV); 10581 CurInfo.Pointers.push_back(*CV); 10582 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10583 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10584 // Copy to the device as an argument. No need to retrieve it. 10585 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10586 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10587 MappableExprsHandler::OMP_MAP_IMPLICIT); 10588 CurInfo.Mappers.push_back(nullptr); 10589 } else { 10590 // If we have any information in the map clause, we use it, otherwise we 10591 // just do a default mapping. 10592 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10593 if (!CI->capturesThis()) 10594 MappedVarSet.insert(CI->getCapturedVar()); 10595 else 10596 MappedVarSet.insert(nullptr); 10597 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10598 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10599 // Generate correct mapping for variables captured by reference in 10600 // lambdas. 10601 if (CI->capturesVariable()) 10602 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10603 CurInfo, LambdaPointers); 10604 } 10605 // We expect to have at least an element of information for this capture. 10606 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10607 "Non-existing map pointer for capture!"); 10608 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10609 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10610 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10611 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10612 "Inconsistent map information sizes!"); 10613 10614 // If there is an entry in PartialStruct it means we have a struct with 10615 // individual members mapped. Emit an extra combined entry. 10616 if (PartialStruct.Base.isValid()) { 10617 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10618 MEHandler.emitCombinedEntry( 10619 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10620 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10621 } 10622 10623 // We need to append the results of this capture to what we already have. 10624 CombinedInfo.append(CurInfo); 10625 } 10626 // Adjust MEMBER_OF flags for the lambdas captures. 10627 MEHandler.adjustMemberOfForLambdaCaptures( 10628 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10629 CombinedInfo.Types); 10630 // Map any list items in a map clause that were not captures because they 10631 // weren't referenced within the construct. 10632 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10633 10634 TargetDataInfo Info; 10635 // Fill up the arrays and create the arguments. 10636 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10637 emitOffloadingArraysArgument( 10638 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10639 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10640 {/*ForEndCall=*/false}); 10641 10642 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10643 InputInfo.BasePointersArray = 10644 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 10645 InputInfo.PointersArray = 10646 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 10647 InputInfo.SizesArray = 10648 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 10649 InputInfo.MappersArray = 10650 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 10651 MapTypesArray = Info.MapTypesArray; 10652 MapNamesArray = Info.MapNamesArray; 10653 if (RequiresOuterTask) 10654 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10655 else 10656 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10657 }; 10658 10659 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10660 CodeGenFunction &CGF, PrePostActionTy &) { 10661 if (RequiresOuterTask) { 10662 CodeGenFunction::OMPTargetDataInfo InputInfo; 10663 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10664 } else { 10665 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10666 } 10667 }; 10668 10669 // If we have a target function ID it means that we need to support 10670 // offloading, otherwise, just execute on the host. We need to execute on host 10671 // regardless of the conditional in the if clause if, e.g., the user do not 10672 // specify target triples. 10673 if (OutlinedFnID) { 10674 if (IfCond) { 10675 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10676 } else { 10677 RegionCodeGenTy ThenRCG(TargetThenGen); 10678 ThenRCG(CGF); 10679 } 10680 } else { 10681 RegionCodeGenTy ElseRCG(TargetElseGen); 10682 ElseRCG(CGF); 10683 } 10684 } 10685 10686 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10687 StringRef ParentName) { 10688 if (!S) 10689 return; 10690 10691 // Codegen OMP target directives that offload compute to the device. 10692 bool RequiresDeviceCodegen = 10693 isa<OMPExecutableDirective>(S) && 10694 isOpenMPTargetExecutionDirective( 10695 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10696 10697 if (RequiresDeviceCodegen) { 10698 const auto &E = *cast<OMPExecutableDirective>(S); 10699 unsigned DeviceID; 10700 unsigned FileID; 10701 unsigned Line; 10702 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10703 FileID, Line); 10704 10705 // Is this a target region that should not be emitted as an entry point? If 10706 // so just signal we are done with this target region. 10707 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10708 ParentName, Line)) 10709 return; 10710 10711 switch (E.getDirectiveKind()) { 10712 case OMPD_target: 10713 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10714 cast<OMPTargetDirective>(E)); 10715 break; 10716 case OMPD_target_parallel: 10717 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10718 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10719 break; 10720 case OMPD_target_teams: 10721 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10722 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10723 break; 10724 case OMPD_target_teams_distribute: 10725 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10726 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10727 break; 10728 case OMPD_target_teams_distribute_simd: 10729 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10730 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10731 break; 10732 case OMPD_target_parallel_for: 10733 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10734 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10735 break; 10736 case OMPD_target_parallel_for_simd: 10737 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10738 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10739 break; 10740 case OMPD_target_simd: 10741 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10742 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10743 break; 10744 case OMPD_target_teams_distribute_parallel_for: 10745 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10746 CGM, ParentName, 10747 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10748 break; 10749 case OMPD_target_teams_distribute_parallel_for_simd: 10750 CodeGenFunction:: 10751 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10752 CGM, ParentName, 10753 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10754 break; 10755 case OMPD_parallel: 10756 case OMPD_for: 10757 case OMPD_parallel_for: 10758 case OMPD_parallel_master: 10759 case OMPD_parallel_sections: 10760 case OMPD_for_simd: 10761 case OMPD_parallel_for_simd: 10762 case OMPD_cancel: 10763 case OMPD_cancellation_point: 10764 case OMPD_ordered: 10765 case OMPD_threadprivate: 10766 case OMPD_allocate: 10767 case OMPD_task: 10768 case OMPD_simd: 10769 case OMPD_tile: 10770 case OMPD_unroll: 10771 case OMPD_sections: 10772 case OMPD_section: 10773 case OMPD_single: 10774 case OMPD_master: 10775 case OMPD_critical: 10776 case OMPD_taskyield: 10777 case OMPD_barrier: 10778 case OMPD_taskwait: 10779 case OMPD_taskgroup: 10780 case OMPD_atomic: 10781 case OMPD_flush: 10782 case OMPD_depobj: 10783 case OMPD_scan: 10784 case OMPD_teams: 10785 case OMPD_target_data: 10786 case OMPD_target_exit_data: 10787 case OMPD_target_enter_data: 10788 case OMPD_distribute: 10789 case OMPD_distribute_simd: 10790 case OMPD_distribute_parallel_for: 10791 case OMPD_distribute_parallel_for_simd: 10792 case OMPD_teams_distribute: 10793 case OMPD_teams_distribute_simd: 10794 case OMPD_teams_distribute_parallel_for: 10795 case OMPD_teams_distribute_parallel_for_simd: 10796 case OMPD_target_update: 10797 case OMPD_declare_simd: 10798 case OMPD_declare_variant: 10799 case OMPD_begin_declare_variant: 10800 case OMPD_end_declare_variant: 10801 case OMPD_declare_target: 10802 case OMPD_end_declare_target: 10803 case OMPD_declare_reduction: 10804 case OMPD_declare_mapper: 10805 case OMPD_taskloop: 10806 case OMPD_taskloop_simd: 10807 case OMPD_master_taskloop: 10808 case OMPD_master_taskloop_simd: 10809 case OMPD_parallel_master_taskloop: 10810 case OMPD_parallel_master_taskloop_simd: 10811 case OMPD_requires: 10812 case OMPD_metadirective: 10813 case OMPD_unknown: 10814 default: 10815 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10816 } 10817 return; 10818 } 10819 10820 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10821 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10822 return; 10823 10824 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10825 return; 10826 } 10827 10828 // If this is a lambda function, look into its body. 10829 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10830 S = L->getBody(); 10831 10832 // Keep looking for target regions recursively. 10833 for (const Stmt *II : S->children()) 10834 scanForTargetRegionsFunctions(II, ParentName); 10835 } 10836 10837 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10838 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10839 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10840 if (!DevTy) 10841 return false; 10842 // Do not emit device_type(nohost) functions for the host. 10843 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10844 return true; 10845 // Do not emit device_type(host) functions for the device. 10846 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10847 return true; 10848 return false; 10849 } 10850 10851 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10852 // If emitting code for the host, we do not process FD here. Instead we do 10853 // the normal code generation. 10854 if (!CGM.getLangOpts().OpenMPIsDevice) { 10855 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10856 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10857 CGM.getLangOpts().OpenMPIsDevice)) 10858 return true; 10859 return false; 10860 } 10861 10862 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10863 // Try to detect target regions in the function. 10864 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10865 StringRef Name = CGM.getMangledName(GD); 10866 scanForTargetRegionsFunctions(FD->getBody(), Name); 10867 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10868 CGM.getLangOpts().OpenMPIsDevice)) 10869 return true; 10870 } 10871 10872 // Do not to emit function if it is not marked as declare target. 10873 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10874 AlreadyEmittedTargetDecls.count(VD) == 0; 10875 } 10876 10877 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10878 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10879 CGM.getLangOpts().OpenMPIsDevice)) 10880 return true; 10881 10882 if (!CGM.getLangOpts().OpenMPIsDevice) 10883 return false; 10884 10885 // Check if there are Ctors/Dtors in this declaration and look for target 10886 // regions in it. We use the complete variant to produce the kernel name 10887 // mangling. 10888 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10889 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10890 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10891 StringRef ParentName = 10892 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10893 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10894 } 10895 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10896 StringRef ParentName = 10897 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10898 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10899 } 10900 } 10901 10902 // Do not to emit variable if it is not marked as declare target. 10903 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10904 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10905 cast<VarDecl>(GD.getDecl())); 10906 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10907 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10908 HasRequiresUnifiedSharedMemory)) { 10909 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10910 return true; 10911 } 10912 return false; 10913 } 10914 10915 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10916 llvm::Constant *Addr) { 10917 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10918 !CGM.getLangOpts().OpenMPIsDevice) 10919 return; 10920 10921 // If we have host/nohost variables, they do not need to be registered. 10922 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10923 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10924 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10925 return; 10926 10927 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10928 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10929 if (!Res) { 10930 if (CGM.getLangOpts().OpenMPIsDevice) { 10931 // Register non-target variables being emitted in device code (debug info 10932 // may cause this). 10933 StringRef VarName = CGM.getMangledName(VD); 10934 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10935 } 10936 return; 10937 } 10938 // Register declare target variables. 10939 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10940 StringRef VarName; 10941 CharUnits VarSize; 10942 llvm::GlobalValue::LinkageTypes Linkage; 10943 10944 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10945 !HasRequiresUnifiedSharedMemory) { 10946 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10947 VarName = CGM.getMangledName(VD); 10948 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10949 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10950 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10951 } else { 10952 VarSize = CharUnits::Zero(); 10953 } 10954 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10955 // Temp solution to prevent optimizations of the internal variables. 10956 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10957 // Do not create a "ref-variable" if the original is not also available 10958 // on the host. 10959 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10960 return; 10961 std::string RefName = getName({VarName, "ref"}); 10962 if (!CGM.GetGlobalValue(RefName)) { 10963 llvm::Constant *AddrRef = 10964 getOrCreateInternalVariable(Addr->getType(), RefName); 10965 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10966 GVAddrRef->setConstant(/*Val=*/true); 10967 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10968 GVAddrRef->setInitializer(Addr); 10969 CGM.addCompilerUsedGlobal(GVAddrRef); 10970 } 10971 } 10972 } else { 10973 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10974 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10975 HasRequiresUnifiedSharedMemory)) && 10976 "Declare target attribute must link or to with unified memory."); 10977 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10978 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10979 else 10980 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10981 10982 if (CGM.getLangOpts().OpenMPIsDevice) { 10983 VarName = Addr->getName(); 10984 Addr = nullptr; 10985 } else { 10986 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10987 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10988 } 10989 VarSize = CGM.getPointerSize(); 10990 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10991 } 10992 10993 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10994 VarName, Addr, VarSize, Flags, Linkage); 10995 } 10996 10997 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10998 if (isa<FunctionDecl>(GD.getDecl()) || 10999 isa<OMPDeclareReductionDecl>(GD.getDecl())) 11000 return emitTargetFunctions(GD); 11001 11002 return emitTargetGlobalVariable(GD); 11003 } 11004 11005 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 11006 for (const VarDecl *VD : DeferredGlobalVariables) { 11007 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 11008 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 11009 if (!Res) 11010 continue; 11011 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 11012 !HasRequiresUnifiedSharedMemory) { 11013 CGM.EmitGlobal(VD); 11014 } else { 11015 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 11016 (*Res == OMPDeclareTargetDeclAttr::MT_To && 11017 HasRequiresUnifiedSharedMemory)) && 11018 "Expected link clause or to clause with unified memory."); 11019 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 11020 } 11021 } 11022 } 11023 11024 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 11025 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 11026 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 11027 " Expected target-based directive."); 11028 } 11029 11030 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 11031 for (const OMPClause *Clause : D->clauselists()) { 11032 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 11033 HasRequiresUnifiedSharedMemory = true; 11034 } else if (const auto *AC = 11035 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 11036 switch (AC->getAtomicDefaultMemOrderKind()) { 11037 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 11038 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11039 break; 11040 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11041 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11042 break; 11043 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11044 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11045 break; 11046 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11047 break; 11048 } 11049 } 11050 } 11051 } 11052 11053 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11054 return RequiresAtomicOrdering; 11055 } 11056 11057 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11058 LangAS &AS) { 11059 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11060 return false; 11061 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11062 switch(A->getAllocatorType()) { 11063 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11064 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11065 // Not supported, fallback to the default mem space. 11066 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11067 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11068 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11069 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11070 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11071 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11072 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11073 AS = LangAS::Default; 11074 return true; 11075 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11076 llvm_unreachable("Expected predefined allocator for the variables with the " 11077 "static storage."); 11078 } 11079 return false; 11080 } 11081 11082 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11083 return HasRequiresUnifiedSharedMemory; 11084 } 11085 11086 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11087 CodeGenModule &CGM) 11088 : CGM(CGM) { 11089 if (CGM.getLangOpts().OpenMPIsDevice) { 11090 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11091 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11092 } 11093 } 11094 11095 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11096 if (CGM.getLangOpts().OpenMPIsDevice) 11097 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11098 } 11099 11100 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11101 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11102 return true; 11103 11104 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11105 // Do not to emit function if it is marked as declare target as it was already 11106 // emitted. 11107 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11108 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11109 if (auto *F = dyn_cast_or_null<llvm::Function>( 11110 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11111 return !F->isDeclaration(); 11112 return false; 11113 } 11114 return true; 11115 } 11116 11117 return !AlreadyEmittedTargetDecls.insert(D).second; 11118 } 11119 11120 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11121 // If we don't have entries or if we are emitting code for the device, we 11122 // don't need to do anything. 11123 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11124 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11125 (OffloadEntriesInfoManager.empty() && 11126 !HasEmittedDeclareTargetRegion && 11127 !HasEmittedTargetRegion)) 11128 return nullptr; 11129 11130 // Create and register the function that handles the requires directives. 11131 ASTContext &C = CGM.getContext(); 11132 11133 llvm::Function *RequiresRegFn; 11134 { 11135 CodeGenFunction CGF(CGM); 11136 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11137 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11138 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11139 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11140 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11141 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11142 // TODO: check for other requires clauses. 11143 // The requires directive takes effect only when a target region is 11144 // present in the compilation unit. Otherwise it is ignored and not 11145 // passed to the runtime. This avoids the runtime from throwing an error 11146 // for mismatching requires clauses across compilation units that don't 11147 // contain at least 1 target region. 11148 assert((HasEmittedTargetRegion || 11149 HasEmittedDeclareTargetRegion || 11150 !OffloadEntriesInfoManager.empty()) && 11151 "Target or declare target region expected."); 11152 if (HasRequiresUnifiedSharedMemory) 11153 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11154 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11155 CGM.getModule(), OMPRTL___tgt_register_requires), 11156 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11157 CGF.FinishFunction(); 11158 } 11159 return RequiresRegFn; 11160 } 11161 11162 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11163 const OMPExecutableDirective &D, 11164 SourceLocation Loc, 11165 llvm::Function *OutlinedFn, 11166 ArrayRef<llvm::Value *> CapturedVars) { 11167 if (!CGF.HaveInsertPoint()) 11168 return; 11169 11170 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11171 CodeGenFunction::RunCleanupsScope Scope(CGF); 11172 11173 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11174 llvm::Value *Args[] = { 11175 RTLoc, 11176 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11177 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11178 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11179 RealArgs.append(std::begin(Args), std::end(Args)); 11180 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11181 11182 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11183 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11184 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11185 } 11186 11187 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11188 const Expr *NumTeams, 11189 const Expr *ThreadLimit, 11190 SourceLocation Loc) { 11191 if (!CGF.HaveInsertPoint()) 11192 return; 11193 11194 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11195 11196 llvm::Value *NumTeamsVal = 11197 NumTeams 11198 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11199 CGF.CGM.Int32Ty, /* isSigned = */ true) 11200 : CGF.Builder.getInt32(0); 11201 11202 llvm::Value *ThreadLimitVal = 11203 ThreadLimit 11204 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11205 CGF.CGM.Int32Ty, /* isSigned = */ true) 11206 : CGF.Builder.getInt32(0); 11207 11208 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11209 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11210 ThreadLimitVal}; 11211 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11212 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11213 PushNumTeamsArgs); 11214 } 11215 11216 void CGOpenMPRuntime::emitTargetDataCalls( 11217 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11218 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11219 if (!CGF.HaveInsertPoint()) 11220 return; 11221 11222 // Action used to replace the default codegen action and turn privatization 11223 // off. 11224 PrePostActionTy NoPrivAction; 11225 11226 // Generate the code for the opening of the data environment. Capture all the 11227 // arguments of the runtime call by reference because they are used in the 11228 // closing of the region. 11229 auto &&BeginThenGen = [this, &D, Device, &Info, 11230 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11231 // Fill up the arrays with all the mapped variables. 11232 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11233 11234 // Get map clause information. 11235 MappableExprsHandler MEHandler(D, CGF); 11236 MEHandler.generateAllInfo(CombinedInfo); 11237 11238 // Fill up the arrays and create the arguments. 11239 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11240 /*IsNonContiguous=*/true); 11241 11242 llvm::Value *BasePointersArrayArg = nullptr; 11243 llvm::Value *PointersArrayArg = nullptr; 11244 llvm::Value *SizesArrayArg = nullptr; 11245 llvm::Value *MapTypesArrayArg = nullptr; 11246 llvm::Value *MapNamesArrayArg = nullptr; 11247 llvm::Value *MappersArrayArg = nullptr; 11248 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11249 SizesArrayArg, MapTypesArrayArg, 11250 MapNamesArrayArg, MappersArrayArg, Info); 11251 11252 // Emit device ID if any. 11253 llvm::Value *DeviceID = nullptr; 11254 if (Device) { 11255 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11256 CGF.Int64Ty, /*isSigned=*/true); 11257 } else { 11258 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11259 } 11260 11261 // Emit the number of elements in the offloading arrays. 11262 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11263 // 11264 // Source location for the ident struct 11265 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11266 11267 llvm::Value *OffloadingArgs[] = {RTLoc, 11268 DeviceID, 11269 PointerNum, 11270 BasePointersArrayArg, 11271 PointersArrayArg, 11272 SizesArrayArg, 11273 MapTypesArrayArg, 11274 MapNamesArrayArg, 11275 MappersArrayArg}; 11276 CGF.EmitRuntimeCall( 11277 OMPBuilder.getOrCreateRuntimeFunction( 11278 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11279 OffloadingArgs); 11280 11281 // If device pointer privatization is required, emit the body of the region 11282 // here. It will have to be duplicated: with and without privatization. 11283 if (!Info.CaptureDeviceAddrMap.empty()) 11284 CodeGen(CGF); 11285 }; 11286 11287 // Generate code for the closing of the data region. 11288 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11289 PrePostActionTy &) { 11290 assert(Info.isValid() && "Invalid data environment closing arguments."); 11291 11292 llvm::Value *BasePointersArrayArg = nullptr; 11293 llvm::Value *PointersArrayArg = nullptr; 11294 llvm::Value *SizesArrayArg = nullptr; 11295 llvm::Value *MapTypesArrayArg = nullptr; 11296 llvm::Value *MapNamesArrayArg = nullptr; 11297 llvm::Value *MappersArrayArg = nullptr; 11298 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11299 SizesArrayArg, MapTypesArrayArg, 11300 MapNamesArrayArg, MappersArrayArg, Info, 11301 {/*ForEndCall=*/true}); 11302 11303 // Emit device ID if any. 11304 llvm::Value *DeviceID = nullptr; 11305 if (Device) { 11306 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11307 CGF.Int64Ty, /*isSigned=*/true); 11308 } else { 11309 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11310 } 11311 11312 // Emit the number of elements in the offloading arrays. 11313 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11314 11315 // Source location for the ident struct 11316 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11317 11318 llvm::Value *OffloadingArgs[] = {RTLoc, 11319 DeviceID, 11320 PointerNum, 11321 BasePointersArrayArg, 11322 PointersArrayArg, 11323 SizesArrayArg, 11324 MapTypesArrayArg, 11325 MapNamesArrayArg, 11326 MappersArrayArg}; 11327 CGF.EmitRuntimeCall( 11328 OMPBuilder.getOrCreateRuntimeFunction( 11329 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11330 OffloadingArgs); 11331 }; 11332 11333 // If we need device pointer privatization, we need to emit the body of the 11334 // region with no privatization in the 'else' branch of the conditional. 11335 // Otherwise, we don't have to do anything. 11336 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11337 PrePostActionTy &) { 11338 if (!Info.CaptureDeviceAddrMap.empty()) { 11339 CodeGen.setAction(NoPrivAction); 11340 CodeGen(CGF); 11341 } 11342 }; 11343 11344 // We don't have to do anything to close the region if the if clause evaluates 11345 // to false. 11346 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11347 11348 if (IfCond) { 11349 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11350 } else { 11351 RegionCodeGenTy RCG(BeginThenGen); 11352 RCG(CGF); 11353 } 11354 11355 // If we don't require privatization of device pointers, we emit the body in 11356 // between the runtime calls. This avoids duplicating the body code. 11357 if (Info.CaptureDeviceAddrMap.empty()) { 11358 CodeGen.setAction(NoPrivAction); 11359 CodeGen(CGF); 11360 } 11361 11362 if (IfCond) { 11363 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11364 } else { 11365 RegionCodeGenTy RCG(EndThenGen); 11366 RCG(CGF); 11367 } 11368 } 11369 11370 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11371 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11372 const Expr *Device) { 11373 if (!CGF.HaveInsertPoint()) 11374 return; 11375 11376 assert((isa<OMPTargetEnterDataDirective>(D) || 11377 isa<OMPTargetExitDataDirective>(D) || 11378 isa<OMPTargetUpdateDirective>(D)) && 11379 "Expecting either target enter, exit data, or update directives."); 11380 11381 CodeGenFunction::OMPTargetDataInfo InputInfo; 11382 llvm::Value *MapTypesArray = nullptr; 11383 llvm::Value *MapNamesArray = nullptr; 11384 // Generate the code for the opening of the data environment. 11385 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11386 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11387 // Emit device ID if any. 11388 llvm::Value *DeviceID = nullptr; 11389 if (Device) { 11390 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11391 CGF.Int64Ty, /*isSigned=*/true); 11392 } else { 11393 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11394 } 11395 11396 // Emit the number of elements in the offloading arrays. 11397 llvm::Constant *PointerNum = 11398 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11399 11400 // Source location for the ident struct 11401 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11402 11403 llvm::Value *OffloadingArgs[] = {RTLoc, 11404 DeviceID, 11405 PointerNum, 11406 InputInfo.BasePointersArray.getPointer(), 11407 InputInfo.PointersArray.getPointer(), 11408 InputInfo.SizesArray.getPointer(), 11409 MapTypesArray, 11410 MapNamesArray, 11411 InputInfo.MappersArray.getPointer()}; 11412 11413 // Select the right runtime function call for each standalone 11414 // directive. 11415 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11416 RuntimeFunction RTLFn; 11417 switch (D.getDirectiveKind()) { 11418 case OMPD_target_enter_data: 11419 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11420 : OMPRTL___tgt_target_data_begin_mapper; 11421 break; 11422 case OMPD_target_exit_data: 11423 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11424 : OMPRTL___tgt_target_data_end_mapper; 11425 break; 11426 case OMPD_target_update: 11427 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11428 : OMPRTL___tgt_target_data_update_mapper; 11429 break; 11430 case OMPD_parallel: 11431 case OMPD_for: 11432 case OMPD_parallel_for: 11433 case OMPD_parallel_master: 11434 case OMPD_parallel_sections: 11435 case OMPD_for_simd: 11436 case OMPD_parallel_for_simd: 11437 case OMPD_cancel: 11438 case OMPD_cancellation_point: 11439 case OMPD_ordered: 11440 case OMPD_threadprivate: 11441 case OMPD_allocate: 11442 case OMPD_task: 11443 case OMPD_simd: 11444 case OMPD_tile: 11445 case OMPD_unroll: 11446 case OMPD_sections: 11447 case OMPD_section: 11448 case OMPD_single: 11449 case OMPD_master: 11450 case OMPD_critical: 11451 case OMPD_taskyield: 11452 case OMPD_barrier: 11453 case OMPD_taskwait: 11454 case OMPD_taskgroup: 11455 case OMPD_atomic: 11456 case OMPD_flush: 11457 case OMPD_depobj: 11458 case OMPD_scan: 11459 case OMPD_teams: 11460 case OMPD_target_data: 11461 case OMPD_distribute: 11462 case OMPD_distribute_simd: 11463 case OMPD_distribute_parallel_for: 11464 case OMPD_distribute_parallel_for_simd: 11465 case OMPD_teams_distribute: 11466 case OMPD_teams_distribute_simd: 11467 case OMPD_teams_distribute_parallel_for: 11468 case OMPD_teams_distribute_parallel_for_simd: 11469 case OMPD_declare_simd: 11470 case OMPD_declare_variant: 11471 case OMPD_begin_declare_variant: 11472 case OMPD_end_declare_variant: 11473 case OMPD_declare_target: 11474 case OMPD_end_declare_target: 11475 case OMPD_declare_reduction: 11476 case OMPD_declare_mapper: 11477 case OMPD_taskloop: 11478 case OMPD_taskloop_simd: 11479 case OMPD_master_taskloop: 11480 case OMPD_master_taskloop_simd: 11481 case OMPD_parallel_master_taskloop: 11482 case OMPD_parallel_master_taskloop_simd: 11483 case OMPD_target: 11484 case OMPD_target_simd: 11485 case OMPD_target_teams_distribute: 11486 case OMPD_target_teams_distribute_simd: 11487 case OMPD_target_teams_distribute_parallel_for: 11488 case OMPD_target_teams_distribute_parallel_for_simd: 11489 case OMPD_target_teams: 11490 case OMPD_target_parallel: 11491 case OMPD_target_parallel_for: 11492 case OMPD_target_parallel_for_simd: 11493 case OMPD_requires: 11494 case OMPD_metadirective: 11495 case OMPD_unknown: 11496 default: 11497 llvm_unreachable("Unexpected standalone target data directive."); 11498 break; 11499 } 11500 CGF.EmitRuntimeCall( 11501 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11502 OffloadingArgs); 11503 }; 11504 11505 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11506 &MapNamesArray](CodeGenFunction &CGF, 11507 PrePostActionTy &) { 11508 // Fill up the arrays with all the mapped variables. 11509 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11510 11511 // Get map clause information. 11512 MappableExprsHandler MEHandler(D, CGF); 11513 MEHandler.generateAllInfo(CombinedInfo); 11514 11515 TargetDataInfo Info; 11516 // Fill up the arrays and create the arguments. 11517 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11518 /*IsNonContiguous=*/true); 11519 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11520 D.hasClausesOfKind<OMPNowaitClause>(); 11521 emitOffloadingArraysArgument( 11522 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11523 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11524 {/*ForEndCall=*/false}); 11525 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11526 InputInfo.BasePointersArray = 11527 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 11528 InputInfo.PointersArray = 11529 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 11530 InputInfo.SizesArray = 11531 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 11532 InputInfo.MappersArray = 11533 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 11534 MapTypesArray = Info.MapTypesArray; 11535 MapNamesArray = Info.MapNamesArray; 11536 if (RequiresOuterTask) 11537 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11538 else 11539 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11540 }; 11541 11542 if (IfCond) { 11543 emitIfClause(CGF, IfCond, TargetThenGen, 11544 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11545 } else { 11546 RegionCodeGenTy ThenRCG(TargetThenGen); 11547 ThenRCG(CGF); 11548 } 11549 } 11550 11551 namespace { 11552 /// Kind of parameter in a function with 'declare simd' directive. 11553 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11554 /// Attribute set of the parameter. 11555 struct ParamAttrTy { 11556 ParamKindTy Kind = Vector; 11557 llvm::APSInt StrideOrArg; 11558 llvm::APSInt Alignment; 11559 }; 11560 } // namespace 11561 11562 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11563 ArrayRef<ParamAttrTy> ParamAttrs) { 11564 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11565 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11566 // of that clause. The VLEN value must be power of 2. 11567 // In other case the notion of the function`s "characteristic data type" (CDT) 11568 // is used to compute the vector length. 11569 // CDT is defined in the following order: 11570 // a) For non-void function, the CDT is the return type. 11571 // b) If the function has any non-uniform, non-linear parameters, then the 11572 // CDT is the type of the first such parameter. 11573 // c) If the CDT determined by a) or b) above is struct, union, or class 11574 // type which is pass-by-value (except for the type that maps to the 11575 // built-in complex data type), the characteristic data type is int. 11576 // d) If none of the above three cases is applicable, the CDT is int. 11577 // The VLEN is then determined based on the CDT and the size of vector 11578 // register of that ISA for which current vector version is generated. The 11579 // VLEN is computed using the formula below: 11580 // VLEN = sizeof(vector_register) / sizeof(CDT), 11581 // where vector register size specified in section 3.2.1 Registers and the 11582 // Stack Frame of original AMD64 ABI document. 11583 QualType RetType = FD->getReturnType(); 11584 if (RetType.isNull()) 11585 return 0; 11586 ASTContext &C = FD->getASTContext(); 11587 QualType CDT; 11588 if (!RetType.isNull() && !RetType->isVoidType()) { 11589 CDT = RetType; 11590 } else { 11591 unsigned Offset = 0; 11592 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11593 if (ParamAttrs[Offset].Kind == Vector) 11594 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11595 ++Offset; 11596 } 11597 if (CDT.isNull()) { 11598 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11599 if (ParamAttrs[I + Offset].Kind == Vector) { 11600 CDT = FD->getParamDecl(I)->getType(); 11601 break; 11602 } 11603 } 11604 } 11605 } 11606 if (CDT.isNull()) 11607 CDT = C.IntTy; 11608 CDT = CDT->getCanonicalTypeUnqualified(); 11609 if (CDT->isRecordType() || CDT->isUnionType()) 11610 CDT = C.IntTy; 11611 return C.getTypeSize(CDT); 11612 } 11613 11614 static void 11615 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11616 const llvm::APSInt &VLENVal, 11617 ArrayRef<ParamAttrTy> ParamAttrs, 11618 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11619 struct ISADataTy { 11620 char ISA; 11621 unsigned VecRegSize; 11622 }; 11623 ISADataTy ISAData[] = { 11624 { 11625 'b', 128 11626 }, // SSE 11627 { 11628 'c', 256 11629 }, // AVX 11630 { 11631 'd', 256 11632 }, // AVX2 11633 { 11634 'e', 512 11635 }, // AVX512 11636 }; 11637 llvm::SmallVector<char, 2> Masked; 11638 switch (State) { 11639 case OMPDeclareSimdDeclAttr::BS_Undefined: 11640 Masked.push_back('N'); 11641 Masked.push_back('M'); 11642 break; 11643 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11644 Masked.push_back('N'); 11645 break; 11646 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11647 Masked.push_back('M'); 11648 break; 11649 } 11650 for (char Mask : Masked) { 11651 for (const ISADataTy &Data : ISAData) { 11652 SmallString<256> Buffer; 11653 llvm::raw_svector_ostream Out(Buffer); 11654 Out << "_ZGV" << Data.ISA << Mask; 11655 if (!VLENVal) { 11656 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11657 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11658 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11659 } else { 11660 Out << VLENVal; 11661 } 11662 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11663 switch (ParamAttr.Kind){ 11664 case LinearWithVarStride: 11665 Out << 's' << ParamAttr.StrideOrArg; 11666 break; 11667 case Linear: 11668 Out << 'l'; 11669 if (ParamAttr.StrideOrArg != 1) 11670 Out << ParamAttr.StrideOrArg; 11671 break; 11672 case Uniform: 11673 Out << 'u'; 11674 break; 11675 case Vector: 11676 Out << 'v'; 11677 break; 11678 } 11679 if (!!ParamAttr.Alignment) 11680 Out << 'a' << ParamAttr.Alignment; 11681 } 11682 Out << '_' << Fn->getName(); 11683 Fn->addFnAttr(Out.str()); 11684 } 11685 } 11686 } 11687 11688 // This are the Functions that are needed to mangle the name of the 11689 // vector functions generated by the compiler, according to the rules 11690 // defined in the "Vector Function ABI specifications for AArch64", 11691 // available at 11692 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11693 11694 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11695 /// 11696 /// TODO: Need to implement the behavior for reference marked with a 11697 /// var or no linear modifiers (1.b in the section). For this, we 11698 /// need to extend ParamKindTy to support the linear modifiers. 11699 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11700 QT = QT.getCanonicalType(); 11701 11702 if (QT->isVoidType()) 11703 return false; 11704 11705 if (Kind == ParamKindTy::Uniform) 11706 return false; 11707 11708 if (Kind == ParamKindTy::Linear) 11709 return false; 11710 11711 // TODO: Handle linear references with modifiers 11712 11713 if (Kind == ParamKindTy::LinearWithVarStride) 11714 return false; 11715 11716 return true; 11717 } 11718 11719 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11720 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11721 QT = QT.getCanonicalType(); 11722 unsigned Size = C.getTypeSize(QT); 11723 11724 // Only scalars and complex within 16 bytes wide set PVB to true. 11725 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11726 return false; 11727 11728 if (QT->isFloatingType()) 11729 return true; 11730 11731 if (QT->isIntegerType()) 11732 return true; 11733 11734 if (QT->isPointerType()) 11735 return true; 11736 11737 // TODO: Add support for complex types (section 3.1.2, item 2). 11738 11739 return false; 11740 } 11741 11742 /// Computes the lane size (LS) of a return type or of an input parameter, 11743 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11744 /// TODO: Add support for references, section 3.2.1, item 1. 11745 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11746 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11747 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11748 if (getAArch64PBV(PTy, C)) 11749 return C.getTypeSize(PTy); 11750 } 11751 if (getAArch64PBV(QT, C)) 11752 return C.getTypeSize(QT); 11753 11754 return C.getTypeSize(C.getUIntPtrType()); 11755 } 11756 11757 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11758 // signature of the scalar function, as defined in 3.2.2 of the 11759 // AAVFABI. 11760 static std::tuple<unsigned, unsigned, bool> 11761 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11762 QualType RetType = FD->getReturnType().getCanonicalType(); 11763 11764 ASTContext &C = FD->getASTContext(); 11765 11766 bool OutputBecomesInput = false; 11767 11768 llvm::SmallVector<unsigned, 8> Sizes; 11769 if (!RetType->isVoidType()) { 11770 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11771 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11772 OutputBecomesInput = true; 11773 } 11774 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11775 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11776 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11777 } 11778 11779 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11780 // The LS of a function parameter / return value can only be a power 11781 // of 2, starting from 8 bits, up to 128. 11782 assert(llvm::all_of(Sizes, 11783 [](unsigned Size) { 11784 return Size == 8 || Size == 16 || Size == 32 || 11785 Size == 64 || Size == 128; 11786 }) && 11787 "Invalid size"); 11788 11789 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11790 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11791 OutputBecomesInput); 11792 } 11793 11794 /// Mangle the parameter part of the vector function name according to 11795 /// their OpenMP classification. The mangling function is defined in 11796 /// section 3.5 of the AAVFABI. 11797 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11798 SmallString<256> Buffer; 11799 llvm::raw_svector_ostream Out(Buffer); 11800 for (const auto &ParamAttr : ParamAttrs) { 11801 switch (ParamAttr.Kind) { 11802 case LinearWithVarStride: 11803 Out << "ls" << ParamAttr.StrideOrArg; 11804 break; 11805 case Linear: 11806 Out << 'l'; 11807 // Don't print the step value if it is not present or if it is 11808 // equal to 1. 11809 if (ParamAttr.StrideOrArg != 1) 11810 Out << ParamAttr.StrideOrArg; 11811 break; 11812 case Uniform: 11813 Out << 'u'; 11814 break; 11815 case Vector: 11816 Out << 'v'; 11817 break; 11818 } 11819 11820 if (!!ParamAttr.Alignment) 11821 Out << 'a' << ParamAttr.Alignment; 11822 } 11823 11824 return std::string(Out.str()); 11825 } 11826 11827 // Function used to add the attribute. The parameter `VLEN` is 11828 // templated to allow the use of "x" when targeting scalable functions 11829 // for SVE. 11830 template <typename T> 11831 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11832 char ISA, StringRef ParSeq, 11833 StringRef MangledName, bool OutputBecomesInput, 11834 llvm::Function *Fn) { 11835 SmallString<256> Buffer; 11836 llvm::raw_svector_ostream Out(Buffer); 11837 Out << Prefix << ISA << LMask << VLEN; 11838 if (OutputBecomesInput) 11839 Out << "v"; 11840 Out << ParSeq << "_" << MangledName; 11841 Fn->addFnAttr(Out.str()); 11842 } 11843 11844 // Helper function to generate the Advanced SIMD names depending on 11845 // the value of the NDS when simdlen is not present. 11846 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11847 StringRef Prefix, char ISA, 11848 StringRef ParSeq, StringRef MangledName, 11849 bool OutputBecomesInput, 11850 llvm::Function *Fn) { 11851 switch (NDS) { 11852 case 8: 11853 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11854 OutputBecomesInput, Fn); 11855 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11856 OutputBecomesInput, Fn); 11857 break; 11858 case 16: 11859 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11860 OutputBecomesInput, Fn); 11861 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11862 OutputBecomesInput, Fn); 11863 break; 11864 case 32: 11865 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11866 OutputBecomesInput, Fn); 11867 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11868 OutputBecomesInput, Fn); 11869 break; 11870 case 64: 11871 case 128: 11872 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11873 OutputBecomesInput, Fn); 11874 break; 11875 default: 11876 llvm_unreachable("Scalar type is too wide."); 11877 } 11878 } 11879 11880 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11881 static void emitAArch64DeclareSimdFunction( 11882 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11883 ArrayRef<ParamAttrTy> ParamAttrs, 11884 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11885 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11886 11887 // Get basic data for building the vector signature. 11888 const auto Data = getNDSWDS(FD, ParamAttrs); 11889 const unsigned NDS = std::get<0>(Data); 11890 const unsigned WDS = std::get<1>(Data); 11891 const bool OutputBecomesInput = std::get<2>(Data); 11892 11893 // Check the values provided via `simdlen` by the user. 11894 // 1. A `simdlen(1)` doesn't produce vector signatures, 11895 if (UserVLEN == 1) { 11896 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11897 DiagnosticsEngine::Warning, 11898 "The clause simdlen(1) has no effect when targeting aarch64."); 11899 CGM.getDiags().Report(SLoc, DiagID); 11900 return; 11901 } 11902 11903 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11904 // Advanced SIMD output. 11905 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11906 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11907 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11908 "power of 2 when targeting Advanced SIMD."); 11909 CGM.getDiags().Report(SLoc, DiagID); 11910 return; 11911 } 11912 11913 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11914 // limits. 11915 if (ISA == 's' && UserVLEN != 0) { 11916 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11917 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11918 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11919 "lanes in the architectural constraints " 11920 "for SVE (min is 128-bit, max is " 11921 "2048-bit, by steps of 128-bit)"); 11922 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11923 return; 11924 } 11925 } 11926 11927 // Sort out parameter sequence. 11928 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11929 StringRef Prefix = "_ZGV"; 11930 // Generate simdlen from user input (if any). 11931 if (UserVLEN) { 11932 if (ISA == 's') { 11933 // SVE generates only a masked function. 11934 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11935 OutputBecomesInput, Fn); 11936 } else { 11937 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11938 // Advanced SIMD generates one or two functions, depending on 11939 // the `[not]inbranch` clause. 11940 switch (State) { 11941 case OMPDeclareSimdDeclAttr::BS_Undefined: 11942 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11943 OutputBecomesInput, Fn); 11944 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11945 OutputBecomesInput, Fn); 11946 break; 11947 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11948 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11949 OutputBecomesInput, Fn); 11950 break; 11951 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11952 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11953 OutputBecomesInput, Fn); 11954 break; 11955 } 11956 } 11957 } else { 11958 // If no user simdlen is provided, follow the AAVFABI rules for 11959 // generating the vector length. 11960 if (ISA == 's') { 11961 // SVE, section 3.4.1, item 1. 11962 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11963 OutputBecomesInput, Fn); 11964 } else { 11965 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11966 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11967 // two vector names depending on the use of the clause 11968 // `[not]inbranch`. 11969 switch (State) { 11970 case OMPDeclareSimdDeclAttr::BS_Undefined: 11971 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11972 OutputBecomesInput, Fn); 11973 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11974 OutputBecomesInput, Fn); 11975 break; 11976 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11977 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11978 OutputBecomesInput, Fn); 11979 break; 11980 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11981 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11982 OutputBecomesInput, Fn); 11983 break; 11984 } 11985 } 11986 } 11987 } 11988 11989 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11990 llvm::Function *Fn) { 11991 ASTContext &C = CGM.getContext(); 11992 FD = FD->getMostRecentDecl(); 11993 // Map params to their positions in function decl. 11994 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11995 if (isa<CXXMethodDecl>(FD)) 11996 ParamPositions.try_emplace(FD, 0); 11997 unsigned ParamPos = ParamPositions.size(); 11998 for (const ParmVarDecl *P : FD->parameters()) { 11999 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 12000 ++ParamPos; 12001 } 12002 while (FD) { 12003 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 12004 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 12005 // Mark uniform parameters. 12006 for (const Expr *E : Attr->uniforms()) { 12007 E = E->IgnoreParenImpCasts(); 12008 unsigned Pos; 12009 if (isa<CXXThisExpr>(E)) { 12010 Pos = ParamPositions[FD]; 12011 } else { 12012 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12013 ->getCanonicalDecl(); 12014 Pos = ParamPositions[PVD]; 12015 } 12016 ParamAttrs[Pos].Kind = Uniform; 12017 } 12018 // Get alignment info. 12019 auto *NI = Attr->alignments_begin(); 12020 for (const Expr *E : Attr->aligneds()) { 12021 E = E->IgnoreParenImpCasts(); 12022 unsigned Pos; 12023 QualType ParmTy; 12024 if (isa<CXXThisExpr>(E)) { 12025 Pos = ParamPositions[FD]; 12026 ParmTy = E->getType(); 12027 } else { 12028 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12029 ->getCanonicalDecl(); 12030 Pos = ParamPositions[PVD]; 12031 ParmTy = PVD->getType(); 12032 } 12033 ParamAttrs[Pos].Alignment = 12034 (*NI) 12035 ? (*NI)->EvaluateKnownConstInt(C) 12036 : llvm::APSInt::getUnsigned( 12037 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 12038 .getQuantity()); 12039 ++NI; 12040 } 12041 // Mark linear parameters. 12042 auto *SI = Attr->steps_begin(); 12043 auto *MI = Attr->modifiers_begin(); 12044 for (const Expr *E : Attr->linears()) { 12045 E = E->IgnoreParenImpCasts(); 12046 unsigned Pos; 12047 // Rescaling factor needed to compute the linear parameter 12048 // value in the mangled name. 12049 unsigned PtrRescalingFactor = 1; 12050 if (isa<CXXThisExpr>(E)) { 12051 Pos = ParamPositions[FD]; 12052 } else { 12053 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12054 ->getCanonicalDecl(); 12055 Pos = ParamPositions[PVD]; 12056 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12057 PtrRescalingFactor = CGM.getContext() 12058 .getTypeSizeInChars(P->getPointeeType()) 12059 .getQuantity(); 12060 } 12061 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12062 ParamAttr.Kind = Linear; 12063 // Assuming a stride of 1, for `linear` without modifiers. 12064 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12065 if (*SI) { 12066 Expr::EvalResult Result; 12067 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12068 if (const auto *DRE = 12069 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12070 if (const auto *StridePVD = 12071 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 12072 ParamAttr.Kind = LinearWithVarStride; 12073 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12074 ParamPositions[StridePVD->getCanonicalDecl()]); 12075 } 12076 } 12077 } else { 12078 ParamAttr.StrideOrArg = Result.Val.getInt(); 12079 } 12080 } 12081 // If we are using a linear clause on a pointer, we need to 12082 // rescale the value of linear_step with the byte size of the 12083 // pointee type. 12084 if (Linear == ParamAttr.Kind) 12085 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12086 ++SI; 12087 ++MI; 12088 } 12089 llvm::APSInt VLENVal; 12090 SourceLocation ExprLoc; 12091 const Expr *VLENExpr = Attr->getSimdlen(); 12092 if (VLENExpr) { 12093 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12094 ExprLoc = VLENExpr->getExprLoc(); 12095 } 12096 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12097 if (CGM.getTriple().isX86()) { 12098 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12099 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12100 unsigned VLEN = VLENVal.getExtValue(); 12101 StringRef MangledName = Fn->getName(); 12102 if (CGM.getTarget().hasFeature("sve")) 12103 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12104 MangledName, 's', 128, Fn, ExprLoc); 12105 if (CGM.getTarget().hasFeature("neon")) 12106 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12107 MangledName, 'n', 128, Fn, ExprLoc); 12108 } 12109 } 12110 FD = FD->getPreviousDecl(); 12111 } 12112 } 12113 12114 namespace { 12115 /// Cleanup action for doacross support. 12116 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12117 public: 12118 static const int DoacrossFinArgs = 2; 12119 12120 private: 12121 llvm::FunctionCallee RTLFn; 12122 llvm::Value *Args[DoacrossFinArgs]; 12123 12124 public: 12125 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12126 ArrayRef<llvm::Value *> CallArgs) 12127 : RTLFn(RTLFn) { 12128 assert(CallArgs.size() == DoacrossFinArgs); 12129 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12130 } 12131 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12132 if (!CGF.HaveInsertPoint()) 12133 return; 12134 CGF.EmitRuntimeCall(RTLFn, Args); 12135 } 12136 }; 12137 } // namespace 12138 12139 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12140 const OMPLoopDirective &D, 12141 ArrayRef<Expr *> NumIterations) { 12142 if (!CGF.HaveInsertPoint()) 12143 return; 12144 12145 ASTContext &C = CGM.getContext(); 12146 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12147 RecordDecl *RD; 12148 if (KmpDimTy.isNull()) { 12149 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12150 // kmp_int64 lo; // lower 12151 // kmp_int64 up; // upper 12152 // kmp_int64 st; // stride 12153 // }; 12154 RD = C.buildImplicitRecord("kmp_dim"); 12155 RD->startDefinition(); 12156 addFieldToRecordDecl(C, RD, Int64Ty); 12157 addFieldToRecordDecl(C, RD, Int64Ty); 12158 addFieldToRecordDecl(C, RD, Int64Ty); 12159 RD->completeDefinition(); 12160 KmpDimTy = C.getRecordType(RD); 12161 } else { 12162 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12163 } 12164 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12165 QualType ArrayTy = 12166 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12167 12168 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12169 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12170 enum { LowerFD = 0, UpperFD, StrideFD }; 12171 // Fill dims with data. 12172 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12173 LValue DimsLVal = CGF.MakeAddrLValue( 12174 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12175 // dims.upper = num_iterations; 12176 LValue UpperLVal = CGF.EmitLValueForField( 12177 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12178 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12179 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12180 Int64Ty, NumIterations[I]->getExprLoc()); 12181 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12182 // dims.stride = 1; 12183 LValue StrideLVal = CGF.EmitLValueForField( 12184 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12185 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12186 StrideLVal); 12187 } 12188 12189 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12190 // kmp_int32 num_dims, struct kmp_dim * dims); 12191 llvm::Value *Args[] = { 12192 emitUpdateLocation(CGF, D.getBeginLoc()), 12193 getThreadID(CGF, D.getBeginLoc()), 12194 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12195 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12196 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12197 CGM.VoidPtrTy)}; 12198 12199 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12200 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12201 CGF.EmitRuntimeCall(RTLFn, Args); 12202 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12203 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12204 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12205 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12206 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12207 llvm::makeArrayRef(FiniArgs)); 12208 } 12209 12210 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12211 const OMPDependClause *C) { 12212 QualType Int64Ty = 12213 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12214 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12215 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12216 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12217 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12218 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12219 const Expr *CounterVal = C->getLoopData(I); 12220 assert(CounterVal); 12221 llvm::Value *CntVal = CGF.EmitScalarConversion( 12222 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12223 CounterVal->getExprLoc()); 12224 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12225 /*Volatile=*/false, Int64Ty); 12226 } 12227 llvm::Value *Args[] = { 12228 emitUpdateLocation(CGF, C->getBeginLoc()), 12229 getThreadID(CGF, C->getBeginLoc()), 12230 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12231 llvm::FunctionCallee RTLFn; 12232 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12233 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12234 OMPRTL___kmpc_doacross_post); 12235 } else { 12236 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12237 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12238 OMPRTL___kmpc_doacross_wait); 12239 } 12240 CGF.EmitRuntimeCall(RTLFn, Args); 12241 } 12242 12243 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12244 llvm::FunctionCallee Callee, 12245 ArrayRef<llvm::Value *> Args) const { 12246 assert(Loc.isValid() && "Outlined function call location must be valid."); 12247 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12248 12249 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12250 if (Fn->doesNotThrow()) { 12251 CGF.EmitNounwindRuntimeCall(Fn, Args); 12252 return; 12253 } 12254 } 12255 CGF.EmitRuntimeCall(Callee, Args); 12256 } 12257 12258 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12259 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12260 ArrayRef<llvm::Value *> Args) const { 12261 emitCall(CGF, Loc, OutlinedFn, Args); 12262 } 12263 12264 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12265 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12266 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12267 HasEmittedDeclareTargetRegion = true; 12268 } 12269 12270 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12271 const VarDecl *NativeParam, 12272 const VarDecl *TargetParam) const { 12273 return CGF.GetAddrOfLocalVar(NativeParam); 12274 } 12275 12276 /// Return allocator value from expression, or return a null allocator (default 12277 /// when no allocator specified). 12278 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12279 const Expr *Allocator) { 12280 llvm::Value *AllocVal; 12281 if (Allocator) { 12282 AllocVal = CGF.EmitScalarExpr(Allocator); 12283 // According to the standard, the original allocator type is a enum 12284 // (integer). Convert to pointer type, if required. 12285 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12286 CGF.getContext().VoidPtrTy, 12287 Allocator->getExprLoc()); 12288 } else { 12289 // If no allocator specified, it defaults to the null allocator. 12290 AllocVal = llvm::Constant::getNullValue( 12291 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12292 } 12293 return AllocVal; 12294 } 12295 12296 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12297 const VarDecl *VD) { 12298 if (!VD) 12299 return Address::invalid(); 12300 Address UntiedAddr = Address::invalid(); 12301 Address UntiedRealAddr = Address::invalid(); 12302 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12303 if (It != FunctionToUntiedTaskStackMap.end()) { 12304 const UntiedLocalVarsAddressesMap &UntiedData = 12305 UntiedLocalVarsStack[It->second]; 12306 auto I = UntiedData.find(VD); 12307 if (I != UntiedData.end()) { 12308 UntiedAddr = I->second.first; 12309 UntiedRealAddr = I->second.second; 12310 } 12311 } 12312 const VarDecl *CVD = VD->getCanonicalDecl(); 12313 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12314 // Use the default allocation. 12315 if (!isAllocatableDecl(VD)) 12316 return UntiedAddr; 12317 llvm::Value *Size; 12318 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12319 if (CVD->getType()->isVariablyModifiedType()) { 12320 Size = CGF.getTypeSize(CVD->getType()); 12321 // Align the size: ((size + align - 1) / align) * align 12322 Size = CGF.Builder.CreateNUWAdd( 12323 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12324 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12325 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12326 } else { 12327 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12328 Size = CGM.getSize(Sz.alignTo(Align)); 12329 } 12330 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12331 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12332 const Expr *Allocator = AA->getAllocator(); 12333 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12334 llvm::Value *Alignment = 12335 AA->getAlignment() 12336 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12337 CGM.SizeTy, /*isSigned=*/false) 12338 : nullptr; 12339 SmallVector<llvm::Value *, 4> Args; 12340 Args.push_back(ThreadID); 12341 if (Alignment) 12342 Args.push_back(Alignment); 12343 Args.push_back(Size); 12344 Args.push_back(AllocVal); 12345 llvm::omp::RuntimeFunction FnID = 12346 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12347 llvm::Value *Addr = CGF.EmitRuntimeCall( 12348 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12349 getName({CVD->getName(), ".void.addr"})); 12350 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12351 CGM.getModule(), OMPRTL___kmpc_free); 12352 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12353 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12354 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12355 if (UntiedAddr.isValid()) 12356 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12357 12358 // Cleanup action for allocate support. 12359 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12360 llvm::FunctionCallee RTLFn; 12361 SourceLocation::UIntTy LocEncoding; 12362 Address Addr; 12363 const Expr *AllocExpr; 12364 12365 public: 12366 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12367 SourceLocation::UIntTy LocEncoding, Address Addr, 12368 const Expr *AllocExpr) 12369 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12370 AllocExpr(AllocExpr) {} 12371 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12372 if (!CGF.HaveInsertPoint()) 12373 return; 12374 llvm::Value *Args[3]; 12375 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12376 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12377 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12378 Addr.getPointer(), CGF.VoidPtrTy); 12379 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12380 Args[2] = AllocVal; 12381 CGF.EmitRuntimeCall(RTLFn, Args); 12382 } 12383 }; 12384 Address VDAddr = UntiedRealAddr.isValid() 12385 ? UntiedRealAddr 12386 : Address::deprecated(Addr, Align); 12387 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12388 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12389 VDAddr, Allocator); 12390 if (UntiedRealAddr.isValid()) 12391 if (auto *Region = 12392 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12393 Region->emitUntiedSwitch(CGF); 12394 return VDAddr; 12395 } 12396 return UntiedAddr; 12397 } 12398 12399 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12400 const VarDecl *VD) const { 12401 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12402 if (It == FunctionToUntiedTaskStackMap.end()) 12403 return false; 12404 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12405 } 12406 12407 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12408 CodeGenModule &CGM, const OMPLoopDirective &S) 12409 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12410 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12411 if (!NeedToPush) 12412 return; 12413 NontemporalDeclsSet &DS = 12414 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12415 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12416 for (const Stmt *Ref : C->private_refs()) { 12417 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12418 const ValueDecl *VD; 12419 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12420 VD = DRE->getDecl(); 12421 } else { 12422 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12423 assert((ME->isImplicitCXXThis() || 12424 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12425 "Expected member of current class."); 12426 VD = ME->getMemberDecl(); 12427 } 12428 DS.insert(VD); 12429 } 12430 } 12431 } 12432 12433 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12434 if (!NeedToPush) 12435 return; 12436 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12437 } 12438 12439 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12440 CodeGenFunction &CGF, 12441 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12442 std::pair<Address, Address>> &LocalVars) 12443 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12444 if (!NeedToPush) 12445 return; 12446 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12447 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12448 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12449 } 12450 12451 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12452 if (!NeedToPush) 12453 return; 12454 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12455 } 12456 12457 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12458 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12459 12460 return llvm::any_of( 12461 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12462 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12463 } 12464 12465 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12466 const OMPExecutableDirective &S, 12467 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12468 const { 12469 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12470 // Vars in target/task regions must be excluded completely. 12471 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12472 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12473 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12474 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12475 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12476 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12477 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12478 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12479 } 12480 } 12481 // Exclude vars in private clauses. 12482 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12483 for (const Expr *Ref : C->varlists()) { 12484 if (!Ref->getType()->isScalarType()) 12485 continue; 12486 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12487 if (!DRE) 12488 continue; 12489 NeedToCheckForLPCs.insert(DRE->getDecl()); 12490 } 12491 } 12492 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12493 for (const Expr *Ref : C->varlists()) { 12494 if (!Ref->getType()->isScalarType()) 12495 continue; 12496 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12497 if (!DRE) 12498 continue; 12499 NeedToCheckForLPCs.insert(DRE->getDecl()); 12500 } 12501 } 12502 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12503 for (const Expr *Ref : C->varlists()) { 12504 if (!Ref->getType()->isScalarType()) 12505 continue; 12506 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12507 if (!DRE) 12508 continue; 12509 NeedToCheckForLPCs.insert(DRE->getDecl()); 12510 } 12511 } 12512 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12513 for (const Expr *Ref : C->varlists()) { 12514 if (!Ref->getType()->isScalarType()) 12515 continue; 12516 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12517 if (!DRE) 12518 continue; 12519 NeedToCheckForLPCs.insert(DRE->getDecl()); 12520 } 12521 } 12522 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12523 for (const Expr *Ref : C->varlists()) { 12524 if (!Ref->getType()->isScalarType()) 12525 continue; 12526 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12527 if (!DRE) 12528 continue; 12529 NeedToCheckForLPCs.insert(DRE->getDecl()); 12530 } 12531 } 12532 for (const Decl *VD : NeedToCheckForLPCs) { 12533 for (const LastprivateConditionalData &Data : 12534 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12535 if (Data.DeclToUniqueName.count(VD) > 0) { 12536 if (!Data.Disabled) 12537 NeedToAddForLPCsAsDisabled.insert(VD); 12538 break; 12539 } 12540 } 12541 } 12542 } 12543 12544 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12545 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12546 : CGM(CGF.CGM), 12547 Action((CGM.getLangOpts().OpenMP >= 50 && 12548 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12549 [](const OMPLastprivateClause *C) { 12550 return C->getKind() == 12551 OMPC_LASTPRIVATE_conditional; 12552 })) 12553 ? ActionToDo::PushAsLastprivateConditional 12554 : ActionToDo::DoNotPush) { 12555 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12556 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12557 return; 12558 assert(Action == ActionToDo::PushAsLastprivateConditional && 12559 "Expected a push action."); 12560 LastprivateConditionalData &Data = 12561 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12562 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12563 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12564 continue; 12565 12566 for (const Expr *Ref : C->varlists()) { 12567 Data.DeclToUniqueName.insert(std::make_pair( 12568 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12569 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12570 } 12571 } 12572 Data.IVLVal = IVLVal; 12573 Data.Fn = CGF.CurFn; 12574 } 12575 12576 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12577 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12578 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12579 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12580 if (CGM.getLangOpts().OpenMP < 50) 12581 return; 12582 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12583 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12584 if (!NeedToAddForLPCsAsDisabled.empty()) { 12585 Action = ActionToDo::DisableLastprivateConditional; 12586 LastprivateConditionalData &Data = 12587 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12588 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12589 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12590 Data.Fn = CGF.CurFn; 12591 Data.Disabled = true; 12592 } 12593 } 12594 12595 CGOpenMPRuntime::LastprivateConditionalRAII 12596 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12597 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12598 return LastprivateConditionalRAII(CGF, S); 12599 } 12600 12601 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12602 if (CGM.getLangOpts().OpenMP < 50) 12603 return; 12604 if (Action == ActionToDo::DisableLastprivateConditional) { 12605 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12606 "Expected list of disabled private vars."); 12607 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12608 } 12609 if (Action == ActionToDo::PushAsLastprivateConditional) { 12610 assert( 12611 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12612 "Expected list of lastprivate conditional vars."); 12613 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12614 } 12615 } 12616 12617 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12618 const VarDecl *VD) { 12619 ASTContext &C = CGM.getContext(); 12620 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12621 if (I == LastprivateConditionalToTypes.end()) 12622 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12623 QualType NewType; 12624 const FieldDecl *VDField; 12625 const FieldDecl *FiredField; 12626 LValue BaseLVal; 12627 auto VI = I->getSecond().find(VD); 12628 if (VI == I->getSecond().end()) { 12629 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12630 RD->startDefinition(); 12631 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12632 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12633 RD->completeDefinition(); 12634 NewType = C.getRecordType(RD); 12635 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12636 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12637 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12638 } else { 12639 NewType = std::get<0>(VI->getSecond()); 12640 VDField = std::get<1>(VI->getSecond()); 12641 FiredField = std::get<2>(VI->getSecond()); 12642 BaseLVal = std::get<3>(VI->getSecond()); 12643 } 12644 LValue FiredLVal = 12645 CGF.EmitLValueForField(BaseLVal, FiredField); 12646 CGF.EmitStoreOfScalar( 12647 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12648 FiredLVal); 12649 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12650 } 12651 12652 namespace { 12653 /// Checks if the lastprivate conditional variable is referenced in LHS. 12654 class LastprivateConditionalRefChecker final 12655 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12656 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12657 const Expr *FoundE = nullptr; 12658 const Decl *FoundD = nullptr; 12659 StringRef UniqueDeclName; 12660 LValue IVLVal; 12661 llvm::Function *FoundFn = nullptr; 12662 SourceLocation Loc; 12663 12664 public: 12665 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12666 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12667 llvm::reverse(LPM)) { 12668 auto It = D.DeclToUniqueName.find(E->getDecl()); 12669 if (It == D.DeclToUniqueName.end()) 12670 continue; 12671 if (D.Disabled) 12672 return false; 12673 FoundE = E; 12674 FoundD = E->getDecl()->getCanonicalDecl(); 12675 UniqueDeclName = It->second; 12676 IVLVal = D.IVLVal; 12677 FoundFn = D.Fn; 12678 break; 12679 } 12680 return FoundE == E; 12681 } 12682 bool VisitMemberExpr(const MemberExpr *E) { 12683 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12684 return false; 12685 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12686 llvm::reverse(LPM)) { 12687 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12688 if (It == D.DeclToUniqueName.end()) 12689 continue; 12690 if (D.Disabled) 12691 return false; 12692 FoundE = E; 12693 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12694 UniqueDeclName = It->second; 12695 IVLVal = D.IVLVal; 12696 FoundFn = D.Fn; 12697 break; 12698 } 12699 return FoundE == E; 12700 } 12701 bool VisitStmt(const Stmt *S) { 12702 for (const Stmt *Child : S->children()) { 12703 if (!Child) 12704 continue; 12705 if (const auto *E = dyn_cast<Expr>(Child)) 12706 if (!E->isGLValue()) 12707 continue; 12708 if (Visit(Child)) 12709 return true; 12710 } 12711 return false; 12712 } 12713 explicit LastprivateConditionalRefChecker( 12714 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12715 : LPM(LPM) {} 12716 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12717 getFoundData() const { 12718 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12719 } 12720 }; 12721 } // namespace 12722 12723 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12724 LValue IVLVal, 12725 StringRef UniqueDeclName, 12726 LValue LVal, 12727 SourceLocation Loc) { 12728 // Last updated loop counter for the lastprivate conditional var. 12729 // int<xx> last_iv = 0; 12730 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12731 llvm::Constant *LastIV = 12732 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12733 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12734 IVLVal.getAlignment().getAsAlign()); 12735 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12736 12737 // Last value of the lastprivate conditional. 12738 // decltype(priv_a) last_a; 12739 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12740 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12741 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12742 LValue LastLVal = CGF.MakeAddrLValue( 12743 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12744 12745 // Global loop counter. Required to handle inner parallel-for regions. 12746 // iv 12747 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12748 12749 // #pragma omp critical(a) 12750 // if (last_iv <= iv) { 12751 // last_iv = iv; 12752 // last_a = priv_a; 12753 // } 12754 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12755 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12756 Action.Enter(CGF); 12757 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12758 // (last_iv <= iv) ? Check if the variable is updated and store new 12759 // value in global var. 12760 llvm::Value *CmpRes; 12761 if (IVLVal.getType()->isSignedIntegerType()) { 12762 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12763 } else { 12764 assert(IVLVal.getType()->isUnsignedIntegerType() && 12765 "Loop iteration variable must be integer."); 12766 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12767 } 12768 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12769 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12770 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12771 // { 12772 CGF.EmitBlock(ThenBB); 12773 12774 // last_iv = iv; 12775 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12776 12777 // last_a = priv_a; 12778 switch (CGF.getEvaluationKind(LVal.getType())) { 12779 case TEK_Scalar: { 12780 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12781 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12782 break; 12783 } 12784 case TEK_Complex: { 12785 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12786 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12787 break; 12788 } 12789 case TEK_Aggregate: 12790 llvm_unreachable( 12791 "Aggregates are not supported in lastprivate conditional."); 12792 } 12793 // } 12794 CGF.EmitBranch(ExitBB); 12795 // There is no need to emit line number for unconditional branch. 12796 (void)ApplyDebugLocation::CreateEmpty(CGF); 12797 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12798 }; 12799 12800 if (CGM.getLangOpts().OpenMPSimd) { 12801 // Do not emit as a critical region as no parallel region could be emitted. 12802 RegionCodeGenTy ThenRCG(CodeGen); 12803 ThenRCG(CGF); 12804 } else { 12805 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12806 } 12807 } 12808 12809 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12810 const Expr *LHS) { 12811 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12812 return; 12813 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12814 if (!Checker.Visit(LHS)) 12815 return; 12816 const Expr *FoundE; 12817 const Decl *FoundD; 12818 StringRef UniqueDeclName; 12819 LValue IVLVal; 12820 llvm::Function *FoundFn; 12821 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12822 Checker.getFoundData(); 12823 if (FoundFn != CGF.CurFn) { 12824 // Special codegen for inner parallel regions. 12825 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12826 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12827 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12828 "Lastprivate conditional is not found in outer region."); 12829 QualType StructTy = std::get<0>(It->getSecond()); 12830 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12831 LValue PrivLVal = CGF.EmitLValue(FoundE); 12832 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12833 PrivLVal.getAddress(CGF), 12834 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12835 CGF.ConvertTypeForMem(StructTy)); 12836 LValue BaseLVal = 12837 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12838 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12839 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12840 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12841 FiredLVal, llvm::AtomicOrdering::Unordered, 12842 /*IsVolatile=*/true, /*isInit=*/false); 12843 return; 12844 } 12845 12846 // Private address of the lastprivate conditional in the current context. 12847 // priv_a 12848 LValue LVal = CGF.EmitLValue(FoundE); 12849 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12850 FoundE->getExprLoc()); 12851 } 12852 12853 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12854 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12855 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12856 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12857 return; 12858 auto Range = llvm::reverse(LastprivateConditionalStack); 12859 auto It = llvm::find_if( 12860 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12861 if (It == Range.end() || It->Fn != CGF.CurFn) 12862 return; 12863 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12864 assert(LPCI != LastprivateConditionalToTypes.end() && 12865 "Lastprivates must be registered already."); 12866 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12867 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12868 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12869 for (const auto &Pair : It->DeclToUniqueName) { 12870 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12871 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12872 continue; 12873 auto I = LPCI->getSecond().find(Pair.first); 12874 assert(I != LPCI->getSecond().end() && 12875 "Lastprivate must be rehistered already."); 12876 // bool Cmp = priv_a.Fired != 0; 12877 LValue BaseLVal = std::get<3>(I->getSecond()); 12878 LValue FiredLVal = 12879 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12880 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12881 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12882 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12883 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12884 // if (Cmp) { 12885 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12886 CGF.EmitBlock(ThenBB); 12887 Address Addr = CGF.GetAddrOfLocalVar(VD); 12888 LValue LVal; 12889 if (VD->getType()->isReferenceType()) 12890 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12891 AlignmentSource::Decl); 12892 else 12893 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12894 AlignmentSource::Decl); 12895 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12896 D.getBeginLoc()); 12897 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12898 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12899 // } 12900 } 12901 } 12902 12903 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12904 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12905 SourceLocation Loc) { 12906 if (CGF.getLangOpts().OpenMP < 50) 12907 return; 12908 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12909 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12910 "Unknown lastprivate conditional variable."); 12911 StringRef UniqueName = It->second; 12912 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12913 // The variable was not updated in the region - exit. 12914 if (!GV) 12915 return; 12916 LValue LPLVal = CGF.MakeAddrLValue( 12917 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12918 PrivLVal.getType().getNonReferenceType()); 12919 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12920 CGF.EmitStoreOfScalar(Res, PrivLVal); 12921 } 12922 12923 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12924 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12925 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12926 llvm_unreachable("Not supported in SIMD-only mode"); 12927 } 12928 12929 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12930 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12931 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12932 llvm_unreachable("Not supported in SIMD-only mode"); 12933 } 12934 12935 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12936 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12937 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12938 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12939 bool Tied, unsigned &NumberOfParts) { 12940 llvm_unreachable("Not supported in SIMD-only mode"); 12941 } 12942 12943 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12944 SourceLocation Loc, 12945 llvm::Function *OutlinedFn, 12946 ArrayRef<llvm::Value *> CapturedVars, 12947 const Expr *IfCond, 12948 llvm::Value *NumThreads) { 12949 llvm_unreachable("Not supported in SIMD-only mode"); 12950 } 12951 12952 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12953 CodeGenFunction &CGF, StringRef CriticalName, 12954 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12955 const Expr *Hint) { 12956 llvm_unreachable("Not supported in SIMD-only mode"); 12957 } 12958 12959 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12960 const RegionCodeGenTy &MasterOpGen, 12961 SourceLocation Loc) { 12962 llvm_unreachable("Not supported in SIMD-only mode"); 12963 } 12964 12965 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12966 const RegionCodeGenTy &MasterOpGen, 12967 SourceLocation Loc, 12968 const Expr *Filter) { 12969 llvm_unreachable("Not supported in SIMD-only mode"); 12970 } 12971 12972 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12973 SourceLocation Loc) { 12974 llvm_unreachable("Not supported in SIMD-only mode"); 12975 } 12976 12977 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12978 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12979 SourceLocation Loc) { 12980 llvm_unreachable("Not supported in SIMD-only mode"); 12981 } 12982 12983 void CGOpenMPSIMDRuntime::emitSingleRegion( 12984 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12985 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12986 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12987 ArrayRef<const Expr *> AssignmentOps) { 12988 llvm_unreachable("Not supported in SIMD-only mode"); 12989 } 12990 12991 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12992 const RegionCodeGenTy &OrderedOpGen, 12993 SourceLocation Loc, 12994 bool IsThreads) { 12995 llvm_unreachable("Not supported in SIMD-only mode"); 12996 } 12997 12998 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12999 SourceLocation Loc, 13000 OpenMPDirectiveKind Kind, 13001 bool EmitChecks, 13002 bool ForceSimpleCall) { 13003 llvm_unreachable("Not supported in SIMD-only mode"); 13004 } 13005 13006 void CGOpenMPSIMDRuntime::emitForDispatchInit( 13007 CodeGenFunction &CGF, SourceLocation Loc, 13008 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 13009 bool Ordered, const DispatchRTInput &DispatchValues) { 13010 llvm_unreachable("Not supported in SIMD-only mode"); 13011 } 13012 13013 void CGOpenMPSIMDRuntime::emitForStaticInit( 13014 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 13015 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 13016 llvm_unreachable("Not supported in SIMD-only mode"); 13017 } 13018 13019 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 13020 CodeGenFunction &CGF, SourceLocation Loc, 13021 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 13022 llvm_unreachable("Not supported in SIMD-only mode"); 13023 } 13024 13025 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 13026 SourceLocation Loc, 13027 unsigned IVSize, 13028 bool IVSigned) { 13029 llvm_unreachable("Not supported in SIMD-only mode"); 13030 } 13031 13032 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 13033 SourceLocation Loc, 13034 OpenMPDirectiveKind DKind) { 13035 llvm_unreachable("Not supported in SIMD-only mode"); 13036 } 13037 13038 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 13039 SourceLocation Loc, 13040 unsigned IVSize, bool IVSigned, 13041 Address IL, Address LB, 13042 Address UB, Address ST) { 13043 llvm_unreachable("Not supported in SIMD-only mode"); 13044 } 13045 13046 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 13047 llvm::Value *NumThreads, 13048 SourceLocation Loc) { 13049 llvm_unreachable("Not supported in SIMD-only mode"); 13050 } 13051 13052 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13053 ProcBindKind ProcBind, 13054 SourceLocation Loc) { 13055 llvm_unreachable("Not supported in SIMD-only mode"); 13056 } 13057 13058 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13059 const VarDecl *VD, 13060 Address VDAddr, 13061 SourceLocation Loc) { 13062 llvm_unreachable("Not supported in SIMD-only mode"); 13063 } 13064 13065 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13066 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13067 CodeGenFunction *CGF) { 13068 llvm_unreachable("Not supported in SIMD-only mode"); 13069 } 13070 13071 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13072 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13073 llvm_unreachable("Not supported in SIMD-only mode"); 13074 } 13075 13076 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13077 ArrayRef<const Expr *> Vars, 13078 SourceLocation Loc, 13079 llvm::AtomicOrdering AO) { 13080 llvm_unreachable("Not supported in SIMD-only mode"); 13081 } 13082 13083 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13084 const OMPExecutableDirective &D, 13085 llvm::Function *TaskFunction, 13086 QualType SharedsTy, Address Shareds, 13087 const Expr *IfCond, 13088 const OMPTaskDataTy &Data) { 13089 llvm_unreachable("Not supported in SIMD-only mode"); 13090 } 13091 13092 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13093 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13094 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13095 const Expr *IfCond, const OMPTaskDataTy &Data) { 13096 llvm_unreachable("Not supported in SIMD-only mode"); 13097 } 13098 13099 void CGOpenMPSIMDRuntime::emitReduction( 13100 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13101 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13102 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13103 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13104 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13105 ReductionOps, Options); 13106 } 13107 13108 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13109 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13110 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13111 llvm_unreachable("Not supported in SIMD-only mode"); 13112 } 13113 13114 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13115 SourceLocation Loc, 13116 bool IsWorksharingReduction) { 13117 llvm_unreachable("Not supported in SIMD-only mode"); 13118 } 13119 13120 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13121 SourceLocation Loc, 13122 ReductionCodeGen &RCG, 13123 unsigned N) { 13124 llvm_unreachable("Not supported in SIMD-only mode"); 13125 } 13126 13127 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13128 SourceLocation Loc, 13129 llvm::Value *ReductionsPtr, 13130 LValue SharedLVal) { 13131 llvm_unreachable("Not supported in SIMD-only mode"); 13132 } 13133 13134 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13135 SourceLocation Loc, 13136 const OMPTaskDataTy &Data) { 13137 llvm_unreachable("Not supported in SIMD-only mode"); 13138 } 13139 13140 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13141 CodeGenFunction &CGF, SourceLocation Loc, 13142 OpenMPDirectiveKind CancelRegion) { 13143 llvm_unreachable("Not supported in SIMD-only mode"); 13144 } 13145 13146 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13147 SourceLocation Loc, const Expr *IfCond, 13148 OpenMPDirectiveKind CancelRegion) { 13149 llvm_unreachable("Not supported in SIMD-only mode"); 13150 } 13151 13152 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13153 const OMPExecutableDirective &D, StringRef ParentName, 13154 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13155 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13156 llvm_unreachable("Not supported in SIMD-only mode"); 13157 } 13158 13159 void CGOpenMPSIMDRuntime::emitTargetCall( 13160 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13161 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13162 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13163 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13164 const OMPLoopDirective &D)> 13165 SizeEmitter) { 13166 llvm_unreachable("Not supported in SIMD-only mode"); 13167 } 13168 13169 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13170 llvm_unreachable("Not supported in SIMD-only mode"); 13171 } 13172 13173 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13174 llvm_unreachable("Not supported in SIMD-only mode"); 13175 } 13176 13177 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13178 return false; 13179 } 13180 13181 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13182 const OMPExecutableDirective &D, 13183 SourceLocation Loc, 13184 llvm::Function *OutlinedFn, 13185 ArrayRef<llvm::Value *> CapturedVars) { 13186 llvm_unreachable("Not supported in SIMD-only mode"); 13187 } 13188 13189 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13190 const Expr *NumTeams, 13191 const Expr *ThreadLimit, 13192 SourceLocation Loc) { 13193 llvm_unreachable("Not supported in SIMD-only mode"); 13194 } 13195 13196 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13197 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13198 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13199 llvm_unreachable("Not supported in SIMD-only mode"); 13200 } 13201 13202 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13203 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13204 const Expr *Device) { 13205 llvm_unreachable("Not supported in SIMD-only mode"); 13206 } 13207 13208 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13209 const OMPLoopDirective &D, 13210 ArrayRef<Expr *> NumIterations) { 13211 llvm_unreachable("Not supported in SIMD-only mode"); 13212 } 13213 13214 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13215 const OMPDependClause *C) { 13216 llvm_unreachable("Not supported in SIMD-only mode"); 13217 } 13218 13219 const VarDecl * 13220 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13221 const VarDecl *NativeParam) const { 13222 llvm_unreachable("Not supported in SIMD-only mode"); 13223 } 13224 13225 Address 13226 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13227 const VarDecl *NativeParam, 13228 const VarDecl *TargetParam) const { 13229 llvm_unreachable("Not supported in SIMD-only mode"); 13230 } 13231