1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 46 using namespace clang; 47 using namespace CodeGen; 48 using namespace llvm::omp; 49 50 namespace { 51 /// Base class for handling code generation inside OpenMP regions. 52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 53 public: 54 /// Kinds of OpenMP regions used in codegen. 55 enum CGOpenMPRegionKind { 56 /// Region with outlined function for standalone 'parallel' 57 /// directive. 58 ParallelOutlinedRegion, 59 /// Region with outlined function for standalone 'task' directive. 60 TaskOutlinedRegion, 61 /// Region for constructs that do not require function outlining, 62 /// like 'for', 'sections', 'atomic' etc. directives. 63 InlinedRegion, 64 /// Region with outlined function for standalone 'target' directive. 65 TargetRegion, 66 }; 67 68 CGOpenMPRegionInfo(const CapturedStmt &CS, 69 const CGOpenMPRegionKind RegionKind, 70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 71 bool HasCancel) 72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 74 75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 77 bool HasCancel) 78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 79 Kind(Kind), HasCancel(HasCancel) {} 80 81 /// Get a variable or parameter for storing global thread id 82 /// inside OpenMP construct. 83 virtual const VarDecl *getThreadIDVariable() const = 0; 84 85 /// Emit the captured statement body. 86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 87 88 /// Get an LValue for the current ThreadID variable. 89 /// \return LValue for thread id variable. This LValue always has type int32*. 90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 91 92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 93 94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 95 96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 97 98 bool hasCancel() const { return HasCancel; } 99 100 static bool classof(const CGCapturedStmtInfo *Info) { 101 return Info->getKind() == CR_OpenMP; 102 } 103 104 ~CGOpenMPRegionInfo() override = default; 105 106 protected: 107 CGOpenMPRegionKind RegionKind; 108 RegionCodeGenTy CodeGen; 109 OpenMPDirectiveKind Kind; 110 bool HasCancel; 111 }; 112 113 /// API for captured statement code generation in OpenMP constructs. 114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 115 public: 116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 117 const RegionCodeGenTy &CodeGen, 118 OpenMPDirectiveKind Kind, bool HasCancel, 119 StringRef HelperName) 120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 121 HasCancel), 122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 124 } 125 126 /// Get a variable or parameter for storing global thread id 127 /// inside OpenMP construct. 128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 129 130 /// Get the name of the capture helper. 131 StringRef getHelperName() const override { return HelperName; } 132 133 static bool classof(const CGCapturedStmtInfo *Info) { 134 return CGOpenMPRegionInfo::classof(Info) && 135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 136 ParallelOutlinedRegion; 137 } 138 139 private: 140 /// A variable or parameter storing global thread id for OpenMP 141 /// constructs. 142 const VarDecl *ThreadIDVar; 143 StringRef HelperName; 144 }; 145 146 /// API for captured statement code generation in OpenMP constructs. 147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 148 public: 149 class UntiedTaskActionTy final : public PrePostActionTy { 150 bool Untied; 151 const VarDecl *PartIDVar; 152 const RegionCodeGenTy UntiedCodeGen; 153 llvm::SwitchInst *UntiedSwitch = nullptr; 154 155 public: 156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 157 const RegionCodeGenTy &UntiedCodeGen) 158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 159 void Enter(CodeGenFunction &CGF) override { 160 if (Untied) { 161 // Emit task switching point. 162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 llvm::Value *Res = 166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 169 CGF.EmitBlock(DoneBB); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 173 CGF.Builder.GetInsertBlock()); 174 emitUntiedSwitch(CGF); 175 } 176 } 177 void emitUntiedSwitch(CodeGenFunction &CGF) const { 178 if (Untied) { 179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 180 CGF.GetAddrOfLocalVar(PartIDVar), 181 PartIDVar->getType()->castAs<PointerType>()); 182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 183 PartIdLVal); 184 UntiedCodeGen(CGF); 185 CodeGenFunction::JumpDest CurPoint = 186 CGF.getJumpDestInCurrentScope(".untied.next."); 187 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 190 CGF.Builder.GetInsertBlock()); 191 CGF.EmitBranchThroughCleanup(CurPoint); 192 CGF.EmitBlock(CurPoint.getBlock()); 193 } 194 } 195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 196 }; 197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 198 const VarDecl *ThreadIDVar, 199 const RegionCodeGenTy &CodeGen, 200 OpenMPDirectiveKind Kind, bool HasCancel, 201 const UntiedTaskActionTy &Action) 202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 203 ThreadIDVar(ThreadIDVar), Action(Action) { 204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 205 } 206 207 /// Get a variable or parameter for storing global thread id 208 /// inside OpenMP construct. 209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 210 211 /// Get an LValue for the current ThreadID variable. 212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 213 214 /// Get the name of the capture helper. 215 StringRef getHelperName() const override { return ".omp_outlined."; } 216 217 void emitUntiedSwitch(CodeGenFunction &CGF) override { 218 Action.emitUntiedSwitch(CGF); 219 } 220 221 static bool classof(const CGCapturedStmtInfo *Info) { 222 return CGOpenMPRegionInfo::classof(Info) && 223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 224 TaskOutlinedRegion; 225 } 226 227 private: 228 /// A variable or parameter storing global thread id for OpenMP 229 /// constructs. 230 const VarDecl *ThreadIDVar; 231 /// Action for emitting code for untied tasks. 232 const UntiedTaskActionTy &Action; 233 }; 234 235 /// API for inlined captured statement code generation in OpenMP 236 /// constructs. 237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 238 public: 239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 240 const RegionCodeGenTy &CodeGen, 241 OpenMPDirectiveKind Kind, bool HasCancel) 242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 243 OldCSI(OldCSI), 244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 245 246 // Retrieve the value of the context parameter. 247 llvm::Value *getContextValue() const override { 248 if (OuterRegionInfo) 249 return OuterRegionInfo->getContextValue(); 250 llvm_unreachable("No context value for inlined OpenMP region"); 251 } 252 253 void setContextValue(llvm::Value *V) override { 254 if (OuterRegionInfo) { 255 OuterRegionInfo->setContextValue(V); 256 return; 257 } 258 llvm_unreachable("No context value for inlined OpenMP region"); 259 } 260 261 /// Lookup the captured field decl for a variable. 262 const FieldDecl *lookup(const VarDecl *VD) const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->lookup(VD); 265 // If there is no outer outlined region,no need to lookup in a list of 266 // captured variables, we can use the original one. 267 return nullptr; 268 } 269 270 FieldDecl *getThisFieldDecl() const override { 271 if (OuterRegionInfo) 272 return OuterRegionInfo->getThisFieldDecl(); 273 return nullptr; 274 } 275 276 /// Get a variable or parameter for storing global thread id 277 /// inside OpenMP construct. 278 const VarDecl *getThreadIDVariable() const override { 279 if (OuterRegionInfo) 280 return OuterRegionInfo->getThreadIDVariable(); 281 return nullptr; 282 } 283 284 /// Get an LValue for the current ThreadID variable. 285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 286 if (OuterRegionInfo) 287 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 288 llvm_unreachable("No LValue for inlined OpenMP construct"); 289 } 290 291 /// Get the name of the capture helper. 292 StringRef getHelperName() const override { 293 if (auto *OuterRegionInfo = getOldCSI()) 294 return OuterRegionInfo->getHelperName(); 295 llvm_unreachable("No helper name for inlined OpenMP construct"); 296 } 297 298 void emitUntiedSwitch(CodeGenFunction &CGF) override { 299 if (OuterRegionInfo) 300 OuterRegionInfo->emitUntiedSwitch(CGF); 301 } 302 303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 304 305 static bool classof(const CGCapturedStmtInfo *Info) { 306 return CGOpenMPRegionInfo::classof(Info) && 307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 308 } 309 310 ~CGOpenMPInlinedRegionInfo() override = default; 311 312 private: 313 /// CodeGen info about outer OpenMP region. 314 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 315 CGOpenMPRegionInfo *OuterRegionInfo; 316 }; 317 318 /// API for captured statement code generation in OpenMP target 319 /// constructs. For this captures, implicit parameters are used instead of the 320 /// captured fields. The name of the target region has to be unique in a given 321 /// application so it is provided by the client, because only the client has 322 /// the information to generate that. 323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 324 public: 325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 326 const RegionCodeGenTy &CodeGen, StringRef HelperName) 327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 328 /*HasCancel=*/false), 329 HelperName(HelperName) {} 330 331 /// This is unused for target regions because each starts executing 332 /// with a single thread. 333 const VarDecl *getThreadIDVariable() const override { return nullptr; } 334 335 /// Get the name of the capture helper. 336 StringRef getHelperName() const override { return HelperName; } 337 338 static bool classof(const CGCapturedStmtInfo *Info) { 339 return CGOpenMPRegionInfo::classof(Info) && 340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 341 } 342 343 private: 344 StringRef HelperName; 345 }; 346 347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 348 llvm_unreachable("No codegen for expressions"); 349 } 350 /// API for generation of expressions captured in a innermost OpenMP 351 /// region. 352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 353 public: 354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 356 OMPD_unknown, 357 /*HasCancel=*/false), 358 PrivScope(CGF) { 359 // Make sure the globals captured in the provided statement are local by 360 // using the privatization logic. We assume the same variable is not 361 // captured more than once. 362 for (const auto &C : CS.captures()) { 363 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 364 continue; 365 366 const VarDecl *VD = C.getCapturedVar(); 367 if (VD->isLocalVarDeclOrParm()) 368 continue; 369 370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 371 /*RefersToEnclosingVariableOrCapture=*/false, 372 VD->getType().getNonReferenceType(), VK_LValue, 373 C.getLocation()); 374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 if (DRD) 691 SrcAddr = 692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 693 694 llvm::Value *SrcBegin = nullptr; 695 if (DRD) 696 SrcBegin = SrcAddr.getPointer(); 697 llvm::Value *DestBegin = DestAddr.getPointer(); 698 // Cast from pointer to array type to pointer to single element. 699 llvm::Value *DestEnd = 700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, SrcAddr.getElementType(), 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, DestAddr.getElementType(), 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 746 "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 753 "omp.arraycpy.dest.element"); 754 // Check whether we've reached the end. 755 llvm::Value *Done = 756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 759 760 // Done. 761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 762 } 763 764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 765 return CGF.EmitOMPSharedLValue(E); 766 } 767 768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 769 const Expr *E) { 770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 772 return LValue(); 773 } 774 775 void ReductionCodeGen::emitAggregateInitialization( 776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 777 const OMPDeclareReductionDecl *DRD) { 778 // Emit VarDecl with copy init for arrays. 779 // Get the address of the original variable captured in current 780 // captured region. 781 const auto *PrivateVD = 782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 783 bool EmitDeclareReductionInit = 784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 786 EmitDeclareReductionInit, 787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 788 : PrivateVD->getInit(), 789 DRD, SharedAddr); 790 } 791 792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 793 ArrayRef<const Expr *> Origs, 794 ArrayRef<const Expr *> Privates, 795 ArrayRef<const Expr *> ReductionOps) { 796 ClausesData.reserve(Shareds.size()); 797 SharedAddresses.reserve(Shareds.size()); 798 Sizes.reserve(Shareds.size()); 799 BaseDecls.reserve(Shareds.size()); 800 const auto *IOrig = Origs.begin(); 801 const auto *IPriv = Privates.begin(); 802 const auto *IRed = ReductionOps.begin(); 803 for (const Expr *Ref : Shareds) { 804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 805 std::advance(IOrig, 1); 806 std::advance(IPriv, 1); 807 std::advance(IRed, 1); 808 } 809 } 810 811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 813 "Number of generated lvalues must be exactly N."); 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 816 SharedAddresses.emplace_back(First, Second); 817 if (ClausesData[N].Shared == ClausesData[N].Ref) { 818 OrigAddresses.emplace_back(First, Second); 819 } else { 820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 822 OrigAddresses.emplace_back(First, Second); 823 } 824 } 825 826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 827 const auto *PrivateVD = 828 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 829 QualType PrivateType = PrivateVD->getType(); 830 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 831 if (!PrivateType->isVariablyModifiedType()) { 832 Sizes.emplace_back( 833 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 834 nullptr); 835 return; 836 } 837 llvm::Value *Size; 838 llvm::Value *SizeInChars; 839 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 840 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 841 if (AsArraySection) { 842 Size = CGF.Builder.CreatePtrDiff(ElemType, 843 OrigAddresses[N].second.getPointer(CGF), 844 OrigAddresses[N].first.getPointer(CGF)); 845 Size = CGF.Builder.CreateNUWAdd( 846 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 847 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 848 } else { 849 SizeInChars = 850 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 851 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 852 } 853 Sizes.emplace_back(SizeInChars, Size); 854 CodeGenFunction::OpaqueValueMapping OpaqueMap( 855 CGF, 856 cast<OpaqueValueExpr>( 857 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 858 RValue::get(Size)); 859 CGF.EmitVariablyModifiedType(PrivateType); 860 } 861 862 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 863 llvm::Value *Size) { 864 const auto *PrivateVD = 865 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 866 QualType PrivateType = PrivateVD->getType(); 867 if (!PrivateType->isVariablyModifiedType()) { 868 assert(!Size && !Sizes[N].second && 869 "Size should be nullptr for non-variably modified reduction " 870 "items."); 871 return; 872 } 873 CodeGenFunction::OpaqueValueMapping OpaqueMap( 874 CGF, 875 cast<OpaqueValueExpr>( 876 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 877 RValue::get(Size)); 878 CGF.EmitVariablyModifiedType(PrivateType); 879 } 880 881 void ReductionCodeGen::emitInitialization( 882 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 883 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 884 assert(SharedAddresses.size() > N && "No variable was generated"); 885 const auto *PrivateVD = 886 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 887 const OMPDeclareReductionDecl *DRD = 888 getReductionInit(ClausesData[N].ReductionOp); 889 QualType PrivateType = PrivateVD->getType(); 890 PrivateAddr = CGF.Builder.CreateElementBitCast( 891 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 892 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 893 if (DRD && DRD->getInitializer()) 894 (void)DefaultInit(CGF); 895 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 896 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 897 (void)DefaultInit(CGF); 898 QualType SharedType = SharedAddresses[N].first.getType(); 899 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 900 PrivateAddr, SharedAddr, SharedType); 901 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 902 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 903 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 904 PrivateVD->getType().getQualifiers(), 905 /*IsInitializer=*/false); 906 } 907 } 908 909 bool ReductionCodeGen::needCleanups(unsigned N) { 910 const auto *PrivateVD = 911 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 912 QualType PrivateType = PrivateVD->getType(); 913 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 914 return DTorKind != QualType::DK_none; 915 } 916 917 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 918 Address PrivateAddr) { 919 const auto *PrivateVD = 920 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 921 QualType PrivateType = PrivateVD->getType(); 922 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 923 if (needCleanups(N)) { 924 PrivateAddr = CGF.Builder.CreateElementBitCast( 925 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 926 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 927 } 928 } 929 930 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 931 LValue BaseLV) { 932 BaseTy = BaseTy.getNonReferenceType(); 933 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 934 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 935 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 936 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 937 } else { 938 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 939 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 940 } 941 BaseTy = BaseTy->getPointeeType(); 942 } 943 return CGF.MakeAddrLValue( 944 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 945 CGF.ConvertTypeForMem(ElTy)), 946 BaseLV.getType(), BaseLV.getBaseInfo(), 947 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 948 } 949 950 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 951 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 952 llvm::Value *Addr) { 953 Address Tmp = Address::invalid(); 954 Address TopTmp = Address::invalid(); 955 Address MostTopTmp = Address::invalid(); 956 BaseTy = BaseTy.getNonReferenceType(); 957 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 958 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 959 Tmp = CGF.CreateMemTemp(BaseTy); 960 if (TopTmp.isValid()) 961 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 962 else 963 MostTopTmp = Tmp; 964 TopTmp = Tmp; 965 BaseTy = BaseTy->getPointeeType(); 966 } 967 llvm::Type *Ty = BaseLVType; 968 if (Tmp.isValid()) 969 Ty = Tmp.getElementType(); 970 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 971 if (Tmp.isValid()) { 972 CGF.Builder.CreateStore(Addr, Tmp); 973 return MostTopTmp; 974 } 975 return Address::deprecated(Addr, BaseLVAlignment); 976 } 977 978 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 979 const VarDecl *OrigVD = nullptr; 980 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 981 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 982 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 983 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 984 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 985 Base = TempASE->getBase()->IgnoreParenImpCasts(); 986 DE = cast<DeclRefExpr>(Base); 987 OrigVD = cast<VarDecl>(DE->getDecl()); 988 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 989 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 990 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 991 Base = TempASE->getBase()->IgnoreParenImpCasts(); 992 DE = cast<DeclRefExpr>(Base); 993 OrigVD = cast<VarDecl>(DE->getDecl()); 994 } 995 return OrigVD; 996 } 997 998 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 999 Address PrivateAddr) { 1000 const DeclRefExpr *DE; 1001 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1002 BaseDecls.emplace_back(OrigVD); 1003 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1004 LValue BaseLValue = 1005 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1006 OriginalBaseLValue); 1007 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1008 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1009 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1010 SharedAddr.getPointer()); 1011 llvm::Value *PrivatePointer = 1012 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1013 PrivateAddr.getPointer(), SharedAddr.getType()); 1014 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1015 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1016 return castToBase(CGF, OrigVD->getType(), 1017 SharedAddresses[N].first.getType(), 1018 OriginalBaseLValue.getAddress(CGF).getType(), 1019 OriginalBaseLValue.getAlignment(), Ptr); 1020 } 1021 BaseDecls.emplace_back( 1022 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1023 return PrivateAddr; 1024 } 1025 1026 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1027 const OMPDeclareReductionDecl *DRD = 1028 getReductionInit(ClausesData[N].ReductionOp); 1029 return DRD && DRD->getInitializer(); 1030 } 1031 1032 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1033 return CGF.EmitLoadOfPointerLValue( 1034 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1035 getThreadIDVariable()->getType()->castAs<PointerType>()); 1036 } 1037 1038 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1039 if (!CGF.HaveInsertPoint()) 1040 return; 1041 // 1.2.2 OpenMP Language Terminology 1042 // Structured block - An executable statement with a single entry at the 1043 // top and a single exit at the bottom. 1044 // The point of exit cannot be a branch out of the structured block. 1045 // longjmp() and throw() must not violate the entry/exit criteria. 1046 CGF.EHStack.pushTerminate(); 1047 if (S) 1048 CGF.incrementProfileCounter(S); 1049 CodeGen(CGF); 1050 CGF.EHStack.popTerminate(); 1051 } 1052 1053 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1054 CodeGenFunction &CGF) { 1055 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1056 getThreadIDVariable()->getType(), 1057 AlignmentSource::Decl); 1058 } 1059 1060 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1061 QualType FieldTy) { 1062 auto *Field = FieldDecl::Create( 1063 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1064 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1065 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1066 Field->setAccess(AS_public); 1067 DC->addDecl(Field); 1068 return Field; 1069 } 1070 1071 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1072 StringRef Separator) 1073 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1074 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1075 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1076 1077 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1078 OMPBuilder.initialize(); 1079 loadOffloadInfoMetadata(); 1080 } 1081 1082 void CGOpenMPRuntime::clear() { 1083 InternalVars.clear(); 1084 // Clean non-target variable declarations possibly used only in debug info. 1085 for (const auto &Data : EmittedNonTargetVariables) { 1086 if (!Data.getValue().pointsToAliveValue()) 1087 continue; 1088 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1089 if (!GV) 1090 continue; 1091 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1092 continue; 1093 GV->eraseFromParent(); 1094 } 1095 } 1096 1097 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1098 SmallString<128> Buffer; 1099 llvm::raw_svector_ostream OS(Buffer); 1100 StringRef Sep = FirstSeparator; 1101 for (StringRef Part : Parts) { 1102 OS << Sep << Part; 1103 Sep = Separator; 1104 } 1105 return std::string(OS.str()); 1106 } 1107 1108 static llvm::Function * 1109 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1110 const Expr *CombinerInitializer, const VarDecl *In, 1111 const VarDecl *Out, bool IsCombiner) { 1112 // void .omp_combiner.(Ty *in, Ty *out); 1113 ASTContext &C = CGM.getContext(); 1114 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1115 FunctionArgList Args; 1116 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1117 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1118 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1119 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1120 Args.push_back(&OmpOutParm); 1121 Args.push_back(&OmpInParm); 1122 const CGFunctionInfo &FnInfo = 1123 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1124 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1125 std::string Name = CGM.getOpenMPRuntime().getName( 1126 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1127 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1128 Name, &CGM.getModule()); 1129 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1130 if (CGM.getLangOpts().Optimize) { 1131 Fn->removeFnAttr(llvm::Attribute::NoInline); 1132 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1133 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1134 } 1135 CodeGenFunction CGF(CGM); 1136 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1137 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1138 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1139 Out->getLocation()); 1140 CodeGenFunction::OMPPrivateScope Scope(CGF); 1141 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1142 Scope.addPrivate( 1143 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1144 .getAddress(CGF)); 1145 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1146 Scope.addPrivate( 1147 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1148 .getAddress(CGF)); 1149 (void)Scope.Privatize(); 1150 if (!IsCombiner && Out->hasInit() && 1151 !CGF.isTrivialInitializer(Out->getInit())) { 1152 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1153 Out->getType().getQualifiers(), 1154 /*IsInitializer=*/true); 1155 } 1156 if (CombinerInitializer) 1157 CGF.EmitIgnoredExpr(CombinerInitializer); 1158 Scope.ForceCleanup(); 1159 CGF.FinishFunction(); 1160 return Fn; 1161 } 1162 1163 void CGOpenMPRuntime::emitUserDefinedReduction( 1164 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1165 if (UDRMap.count(D) > 0) 1166 return; 1167 llvm::Function *Combiner = emitCombinerOrInitializer( 1168 CGM, D->getType(), D->getCombiner(), 1169 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1170 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1171 /*IsCombiner=*/true); 1172 llvm::Function *Initializer = nullptr; 1173 if (const Expr *Init = D->getInitializer()) { 1174 Initializer = emitCombinerOrInitializer( 1175 CGM, D->getType(), 1176 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1177 : nullptr, 1178 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1179 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1180 /*IsCombiner=*/false); 1181 } 1182 UDRMap.try_emplace(D, Combiner, Initializer); 1183 if (CGF) { 1184 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1185 Decls.second.push_back(D); 1186 } 1187 } 1188 1189 std::pair<llvm::Function *, llvm::Function *> 1190 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1191 auto I = UDRMap.find(D); 1192 if (I != UDRMap.end()) 1193 return I->second; 1194 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1195 return UDRMap.lookup(D); 1196 } 1197 1198 namespace { 1199 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1200 // Builder if one is present. 1201 struct PushAndPopStackRAII { 1202 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1203 bool HasCancel, llvm::omp::Directive Kind) 1204 : OMPBuilder(OMPBuilder) { 1205 if (!OMPBuilder) 1206 return; 1207 1208 // The following callback is the crucial part of clangs cleanup process. 1209 // 1210 // NOTE: 1211 // Once the OpenMPIRBuilder is used to create parallel regions (and 1212 // similar), the cancellation destination (Dest below) is determined via 1213 // IP. That means if we have variables to finalize we split the block at IP, 1214 // use the new block (=BB) as destination to build a JumpDest (via 1215 // getJumpDestInCurrentScope(BB)) which then is fed to 1216 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1217 // to push & pop an FinalizationInfo object. 1218 // The FiniCB will still be needed but at the point where the 1219 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1220 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1221 assert(IP.getBlock()->end() == IP.getPoint() && 1222 "Clang CG should cause non-terminated block!"); 1223 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1224 CGF.Builder.restoreIP(IP); 1225 CodeGenFunction::JumpDest Dest = 1226 CGF.getOMPCancelDestination(OMPD_parallel); 1227 CGF.EmitBranchThroughCleanup(Dest); 1228 }; 1229 1230 // TODO: Remove this once we emit parallel regions through the 1231 // OpenMPIRBuilder as it can do this setup internally. 1232 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1233 OMPBuilder->pushFinalizationCB(std::move(FI)); 1234 } 1235 ~PushAndPopStackRAII() { 1236 if (OMPBuilder) 1237 OMPBuilder->popFinalizationCB(); 1238 } 1239 llvm::OpenMPIRBuilder *OMPBuilder; 1240 }; 1241 } // namespace 1242 1243 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1244 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1245 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1246 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1247 assert(ThreadIDVar->getType()->isPointerType() && 1248 "thread id variable must be of type kmp_int32 *"); 1249 CodeGenFunction CGF(CGM, true); 1250 bool HasCancel = false; 1251 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1252 HasCancel = OPD->hasCancel(); 1253 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1254 HasCancel = OPD->hasCancel(); 1255 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1256 HasCancel = OPSD->hasCancel(); 1257 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1258 HasCancel = OPFD->hasCancel(); 1259 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1260 HasCancel = OPFD->hasCancel(); 1261 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 else if (const auto *OPFD = 1264 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = 1267 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 1270 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1271 // parallel region to make cancellation barriers work properly. 1272 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1273 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1274 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1275 HasCancel, OutlinedHelperName); 1276 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1277 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1278 } 1279 1280 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1281 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1282 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1283 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1284 return emitParallelOrTeamsOutlinedFunction( 1285 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1286 } 1287 1288 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1290 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1291 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1292 return emitParallelOrTeamsOutlinedFunction( 1293 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1294 } 1295 1296 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1297 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1298 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1299 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1300 bool Tied, unsigned &NumberOfParts) { 1301 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1302 PrePostActionTy &) { 1303 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1304 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1305 llvm::Value *TaskArgs[] = { 1306 UpLoc, ThreadID, 1307 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1308 TaskTVar->getType()->castAs<PointerType>()) 1309 .getPointer(CGF)}; 1310 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1311 CGM.getModule(), OMPRTL___kmpc_omp_task), 1312 TaskArgs); 1313 }; 1314 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1315 UntiedCodeGen); 1316 CodeGen.setAction(Action); 1317 assert(!ThreadIDVar->getType()->isPointerType() && 1318 "thread id variable must be of type kmp_int32 for tasks"); 1319 const OpenMPDirectiveKind Region = 1320 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1321 : OMPD_task; 1322 const CapturedStmt *CS = D.getCapturedStmt(Region); 1323 bool HasCancel = false; 1324 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1327 HasCancel = TD->hasCancel(); 1328 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1329 HasCancel = TD->hasCancel(); 1330 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 1333 CodeGenFunction CGF(CGM, true); 1334 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1335 InnermostKind, HasCancel, Action); 1336 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1337 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1338 if (!Tied) 1339 NumberOfParts = Action.getNumberOfParts(); 1340 return Res; 1341 } 1342 1343 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1344 const RecordDecl *RD, const CGRecordLayout &RL, 1345 ArrayRef<llvm::Constant *> Data) { 1346 llvm::StructType *StructTy = RL.getLLVMType(); 1347 unsigned PrevIdx = 0; 1348 ConstantInitBuilder CIBuilder(CGM); 1349 const auto *DI = Data.begin(); 1350 for (const FieldDecl *FD : RD->fields()) { 1351 unsigned Idx = RL.getLLVMFieldNo(FD); 1352 // Fill the alignment. 1353 for (unsigned I = PrevIdx; I < Idx; ++I) 1354 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1355 PrevIdx = Idx + 1; 1356 Fields.add(*DI); 1357 ++DI; 1358 } 1359 } 1360 1361 template <class... As> 1362 static llvm::GlobalVariable * 1363 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1364 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1365 As &&... Args) { 1366 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1367 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1368 ConstantInitBuilder CIBuilder(CGM); 1369 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1370 buildStructValue(Fields, CGM, RD, RL, Data); 1371 return Fields.finishAndCreateGlobal( 1372 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1373 std::forward<As>(Args)...); 1374 } 1375 1376 template <typename T> 1377 static void 1378 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1379 ArrayRef<llvm::Constant *> Data, 1380 T &Parent) { 1381 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1382 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1383 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1384 buildStructValue(Fields, CGM, RD, RL, Data); 1385 Fields.finishAndAddTo(Parent); 1386 } 1387 1388 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1389 bool AtCurrentPoint) { 1390 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1391 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1392 1393 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1394 if (AtCurrentPoint) { 1395 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1396 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1397 } else { 1398 Elem.second.ServiceInsertPt = 1399 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1400 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1401 } 1402 } 1403 1404 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1405 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1406 if (Elem.second.ServiceInsertPt) { 1407 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1408 Elem.second.ServiceInsertPt = nullptr; 1409 Ptr->eraseFromParent(); 1410 } 1411 } 1412 1413 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1414 SourceLocation Loc, 1415 SmallString<128> &Buffer) { 1416 llvm::raw_svector_ostream OS(Buffer); 1417 // Build debug location 1418 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1419 OS << ";" << PLoc.getFilename() << ";"; 1420 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1421 OS << FD->getQualifiedNameAsString(); 1422 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1423 return OS.str(); 1424 } 1425 1426 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1427 SourceLocation Loc, 1428 unsigned Flags) { 1429 uint32_t SrcLocStrSize; 1430 llvm::Constant *SrcLocStr; 1431 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1432 Loc.isInvalid()) { 1433 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1434 } else { 1435 std::string FunctionName; 1436 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1437 FunctionName = FD->getQualifiedNameAsString(); 1438 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1439 const char *FileName = PLoc.getFilename(); 1440 unsigned Line = PLoc.getLine(); 1441 unsigned Column = PLoc.getColumn(); 1442 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1443 Column, SrcLocStrSize); 1444 } 1445 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1446 return OMPBuilder.getOrCreateIdent( 1447 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1448 } 1449 1450 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1451 SourceLocation Loc) { 1452 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1453 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1454 // the clang invariants used below might be broken. 1455 if (CGM.getLangOpts().OpenMPIRBuilder) { 1456 SmallString<128> Buffer; 1457 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1458 uint32_t SrcLocStrSize; 1459 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1460 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1461 return OMPBuilder.getOrCreateThreadID( 1462 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1463 } 1464 1465 llvm::Value *ThreadID = nullptr; 1466 // Check whether we've already cached a load of the thread id in this 1467 // function. 1468 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1469 if (I != OpenMPLocThreadIDMap.end()) { 1470 ThreadID = I->second.ThreadID; 1471 if (ThreadID != nullptr) 1472 return ThreadID; 1473 } 1474 // If exceptions are enabled, do not use parameter to avoid possible crash. 1475 if (auto *OMPRegionInfo = 1476 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1477 if (OMPRegionInfo->getThreadIDVariable()) { 1478 // Check if this an outlined function with thread id passed as argument. 1479 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1480 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1481 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1482 !CGF.getLangOpts().CXXExceptions || 1483 CGF.Builder.GetInsertBlock() == TopBlock || 1484 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1485 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1486 TopBlock || 1487 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1488 CGF.Builder.GetInsertBlock()) { 1489 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1490 // If value loaded in entry block, cache it and use it everywhere in 1491 // function. 1492 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1493 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1494 Elem.second.ThreadID = ThreadID; 1495 } 1496 return ThreadID; 1497 } 1498 } 1499 } 1500 1501 // This is not an outlined function region - need to call __kmpc_int32 1502 // kmpc_global_thread_num(ident_t *loc). 1503 // Generate thread id value and cache this value for use across the 1504 // function. 1505 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1506 if (!Elem.second.ServiceInsertPt) 1507 setLocThreadIdInsertPt(CGF); 1508 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1509 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1510 llvm::CallInst *Call = CGF.Builder.CreateCall( 1511 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1512 OMPRTL___kmpc_global_thread_num), 1513 emitUpdateLocation(CGF, Loc)); 1514 Call->setCallingConv(CGF.getRuntimeCC()); 1515 Elem.second.ThreadID = Call; 1516 return Call; 1517 } 1518 1519 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1520 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1521 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1522 clearLocThreadIdInsertPt(CGF); 1523 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1524 } 1525 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1526 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1527 UDRMap.erase(D); 1528 FunctionUDRMap.erase(CGF.CurFn); 1529 } 1530 auto I = FunctionUDMMap.find(CGF.CurFn); 1531 if (I != FunctionUDMMap.end()) { 1532 for(const auto *D : I->second) 1533 UDMMap.erase(D); 1534 FunctionUDMMap.erase(I); 1535 } 1536 LastprivateConditionalToTypes.erase(CGF.CurFn); 1537 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1538 } 1539 1540 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1541 return OMPBuilder.IdentPtr; 1542 } 1543 1544 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1545 if (!Kmpc_MicroTy) { 1546 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1547 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1548 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1549 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1550 } 1551 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1552 } 1553 1554 llvm::FunctionCallee 1555 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1556 bool IsGPUDistribute) { 1557 assert((IVSize == 32 || IVSize == 64) && 1558 "IV size is not compatible with the omp runtime"); 1559 StringRef Name; 1560 if (IsGPUDistribute) 1561 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1562 : "__kmpc_distribute_static_init_4u") 1563 : (IVSigned ? "__kmpc_distribute_static_init_8" 1564 : "__kmpc_distribute_static_init_8u"); 1565 else 1566 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1567 : "__kmpc_for_static_init_4u") 1568 : (IVSigned ? "__kmpc_for_static_init_8" 1569 : "__kmpc_for_static_init_8u"); 1570 1571 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1572 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1573 llvm::Type *TypeParams[] = { 1574 getIdentTyPointerTy(), // loc 1575 CGM.Int32Ty, // tid 1576 CGM.Int32Ty, // schedtype 1577 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1578 PtrTy, // p_lower 1579 PtrTy, // p_upper 1580 PtrTy, // p_stride 1581 ITy, // incr 1582 ITy // chunk 1583 }; 1584 auto *FnTy = 1585 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1586 return CGM.CreateRuntimeFunction(FnTy, Name); 1587 } 1588 1589 llvm::FunctionCallee 1590 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1591 assert((IVSize == 32 || IVSize == 64) && 1592 "IV size is not compatible with the omp runtime"); 1593 StringRef Name = 1594 IVSize == 32 1595 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1596 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1597 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1598 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1599 CGM.Int32Ty, // tid 1600 CGM.Int32Ty, // schedtype 1601 ITy, // lower 1602 ITy, // upper 1603 ITy, // stride 1604 ITy // chunk 1605 }; 1606 auto *FnTy = 1607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1608 return CGM.CreateRuntimeFunction(FnTy, Name); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = 1616 IVSize == 32 1617 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1618 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1619 llvm::Type *TypeParams[] = { 1620 getIdentTyPointerTy(), // loc 1621 CGM.Int32Ty, // tid 1622 }; 1623 auto *FnTy = 1624 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1625 return CGM.CreateRuntimeFunction(FnTy, Name); 1626 } 1627 1628 llvm::FunctionCallee 1629 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1630 assert((IVSize == 32 || IVSize == 64) && 1631 "IV size is not compatible with the omp runtime"); 1632 StringRef Name = 1633 IVSize == 32 1634 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1635 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1636 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1637 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1638 llvm::Type *TypeParams[] = { 1639 getIdentTyPointerTy(), // loc 1640 CGM.Int32Ty, // tid 1641 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1642 PtrTy, // p_lower 1643 PtrTy, // p_upper 1644 PtrTy // p_stride 1645 }; 1646 auto *FnTy = 1647 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1648 return CGM.CreateRuntimeFunction(FnTy, Name); 1649 } 1650 1651 /// Obtain information that uniquely identifies a target entry. This 1652 /// consists of the file and device IDs as well as line number associated with 1653 /// the relevant entry source location. 1654 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1655 unsigned &DeviceID, unsigned &FileID, 1656 unsigned &LineNum) { 1657 SourceManager &SM = C.getSourceManager(); 1658 1659 // The loc should be always valid and have a file ID (the user cannot use 1660 // #pragma directives in macros) 1661 1662 assert(Loc.isValid() && "Source location is expected to be always valid."); 1663 1664 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1665 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1666 1667 llvm::sys::fs::UniqueID ID; 1668 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1669 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1670 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1671 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1672 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1673 << PLoc.getFilename() << EC.message(); 1674 } 1675 1676 DeviceID = ID.getDevice(); 1677 FileID = ID.getFile(); 1678 LineNum = PLoc.getLine(); 1679 } 1680 1681 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1682 if (CGM.getLangOpts().OpenMPSimd) 1683 return Address::invalid(); 1684 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1685 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1686 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1687 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1688 HasRequiresUnifiedSharedMemory))) { 1689 SmallString<64> PtrName; 1690 { 1691 llvm::raw_svector_ostream OS(PtrName); 1692 OS << CGM.getMangledName(GlobalDecl(VD)); 1693 if (!VD->isExternallyVisible()) { 1694 unsigned DeviceID, FileID, Line; 1695 getTargetEntryUniqueInfo(CGM.getContext(), 1696 VD->getCanonicalDecl()->getBeginLoc(), 1697 DeviceID, FileID, Line); 1698 OS << llvm::format("_%x", FileID); 1699 } 1700 OS << "_decl_tgt_ref_ptr"; 1701 } 1702 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1703 if (!Ptr) { 1704 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1705 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1706 PtrName); 1707 1708 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1709 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1710 1711 if (!CGM.getLangOpts().OpenMPIsDevice) 1712 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1713 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1714 } 1715 return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD)); 1716 } 1717 return Address::invalid(); 1718 } 1719 1720 llvm::Constant * 1721 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1722 assert(!CGM.getLangOpts().OpenMPUseTLS || 1723 !CGM.getContext().getTargetInfo().isTLSSupported()); 1724 // Lookup the entry, lazily creating it if necessary. 1725 std::string Suffix = getName({"cache", ""}); 1726 return getOrCreateInternalVariable( 1727 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1728 } 1729 1730 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1731 const VarDecl *VD, 1732 Address VDAddr, 1733 SourceLocation Loc) { 1734 if (CGM.getLangOpts().OpenMPUseTLS && 1735 CGM.getContext().getTargetInfo().isTLSSupported()) 1736 return VDAddr; 1737 1738 llvm::Type *VarTy = VDAddr.getElementType(); 1739 llvm::Value *Args[] = { 1740 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1741 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1742 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1743 getOrCreateThreadPrivateCache(VD)}; 1744 return Address::deprecated( 1745 CGF.EmitRuntimeCall( 1746 OMPBuilder.getOrCreateRuntimeFunction( 1747 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1748 Args), 1749 VDAddr.getAlignment()); 1750 } 1751 1752 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1753 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1754 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1755 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1756 // library. 1757 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1758 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1759 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1760 OMPLoc); 1761 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1762 // to register constructor/destructor for variable. 1763 llvm::Value *Args[] = { 1764 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1765 Ctor, CopyCtor, Dtor}; 1766 CGF.EmitRuntimeCall( 1767 OMPBuilder.getOrCreateRuntimeFunction( 1768 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1769 Args); 1770 } 1771 1772 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1773 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1774 bool PerformInit, CodeGenFunction *CGF) { 1775 if (CGM.getLangOpts().OpenMPUseTLS && 1776 CGM.getContext().getTargetInfo().isTLSSupported()) 1777 return nullptr; 1778 1779 VD = VD->getDefinition(CGM.getContext()); 1780 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1781 QualType ASTTy = VD->getType(); 1782 1783 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1784 const Expr *Init = VD->getAnyInitializer(); 1785 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1786 // Generate function that re-emits the declaration's initializer into the 1787 // threadprivate copy of the variable VD 1788 CodeGenFunction CtorCGF(CGM); 1789 FunctionArgList Args; 1790 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1791 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1792 ImplicitParamDecl::Other); 1793 Args.push_back(&Dst); 1794 1795 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1796 CGM.getContext().VoidPtrTy, Args); 1797 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1798 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1799 llvm::Function *Fn = 1800 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1801 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1802 Args, Loc, Loc); 1803 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1804 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1805 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1806 Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment()); 1807 Arg = CtorCGF.Builder.CreateElementBitCast( 1808 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1809 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1810 /*IsInitializer=*/true); 1811 ArgVal = CtorCGF.EmitLoadOfScalar( 1812 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1813 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1814 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1815 CtorCGF.FinishFunction(); 1816 Ctor = Fn; 1817 } 1818 if (VD->getType().isDestructedType() != QualType::DK_none) { 1819 // Generate function that emits destructor call for the threadprivate copy 1820 // of the variable VD 1821 CodeGenFunction DtorCGF(CGM); 1822 FunctionArgList Args; 1823 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1824 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1825 ImplicitParamDecl::Other); 1826 Args.push_back(&Dst); 1827 1828 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1829 CGM.getContext().VoidTy, Args); 1830 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1831 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1832 llvm::Function *Fn = 1833 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1834 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1835 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1836 Loc, Loc); 1837 // Create a scope with an artificial location for the body of this function. 1838 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1839 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1840 DtorCGF.GetAddrOfLocalVar(&Dst), 1841 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1842 DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()), 1843 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1844 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1845 DtorCGF.FinishFunction(); 1846 Dtor = Fn; 1847 } 1848 // Do not emit init function if it is not required. 1849 if (!Ctor && !Dtor) 1850 return nullptr; 1851 1852 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1853 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1854 /*isVarArg=*/false) 1855 ->getPointerTo(); 1856 // Copying constructor for the threadprivate variable. 1857 // Must be NULL - reserved by runtime, but currently it requires that this 1858 // parameter is always NULL. Otherwise it fires assertion. 1859 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1860 if (Ctor == nullptr) { 1861 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 Ctor = llvm::Constant::getNullValue(CtorTy); 1865 } 1866 if (Dtor == nullptr) { 1867 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1868 /*isVarArg=*/false) 1869 ->getPointerTo(); 1870 Dtor = llvm::Constant::getNullValue(DtorTy); 1871 } 1872 if (!CGF) { 1873 auto *InitFunctionTy = 1874 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1875 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1876 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1877 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1878 CodeGenFunction InitCGF(CGM); 1879 FunctionArgList ArgList; 1880 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1881 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1882 Loc, Loc); 1883 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1884 InitCGF.FinishFunction(); 1885 return InitFunction; 1886 } 1887 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1888 } 1889 return nullptr; 1890 } 1891 1892 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1893 llvm::GlobalVariable *Addr, 1894 bool PerformInit) { 1895 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1896 !CGM.getLangOpts().OpenMPIsDevice) 1897 return false; 1898 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1899 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1900 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1901 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1902 HasRequiresUnifiedSharedMemory)) 1903 return CGM.getLangOpts().OpenMPIsDevice; 1904 VD = VD->getDefinition(CGM.getContext()); 1905 assert(VD && "Unknown VarDecl"); 1906 1907 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1908 return CGM.getLangOpts().OpenMPIsDevice; 1909 1910 QualType ASTTy = VD->getType(); 1911 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1912 1913 // Produce the unique prefix to identify the new target regions. We use 1914 // the source location of the variable declaration which we know to not 1915 // conflict with any target region. 1916 unsigned DeviceID; 1917 unsigned FileID; 1918 unsigned Line; 1919 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1920 SmallString<128> Buffer, Out; 1921 { 1922 llvm::raw_svector_ostream OS(Buffer); 1923 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1924 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1925 } 1926 1927 const Expr *Init = VD->getAnyInitializer(); 1928 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1929 llvm::Constant *Ctor; 1930 llvm::Constant *ID; 1931 if (CGM.getLangOpts().OpenMPIsDevice) { 1932 // Generate function that re-emits the declaration's initializer into 1933 // the threadprivate copy of the variable VD 1934 CodeGenFunction CtorCGF(CGM); 1935 1936 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1937 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1938 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1939 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1940 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1941 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1942 FunctionArgList(), Loc, Loc); 1943 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1944 llvm::Constant *AddrInAS0 = Addr; 1945 if (Addr->getAddressSpace() != 0) 1946 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1947 Addr, llvm::PointerType::getWithSamePointeeType( 1948 cast<llvm::PointerType>(Addr->getType()), 0)); 1949 CtorCGF.EmitAnyExprToMem( 1950 Init, 1951 Address::deprecated(AddrInAS0, CGM.getContext().getDeclAlign(VD)), 1952 Init->getType().getQualifiers(), 1953 /*IsInitializer=*/true); 1954 CtorCGF.FinishFunction(); 1955 Ctor = Fn; 1956 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1957 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1958 } else { 1959 Ctor = new llvm::GlobalVariable( 1960 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1961 llvm::GlobalValue::PrivateLinkage, 1962 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1963 ID = Ctor; 1964 } 1965 1966 // Register the information for the entry associated with the constructor. 1967 Out.clear(); 1968 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1969 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1970 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1971 } 1972 if (VD->getType().isDestructedType() != QualType::DK_none) { 1973 llvm::Constant *Dtor; 1974 llvm::Constant *ID; 1975 if (CGM.getLangOpts().OpenMPIsDevice) { 1976 // Generate function that emits destructor call for the threadprivate 1977 // copy of the variable VD 1978 CodeGenFunction DtorCGF(CGM); 1979 1980 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1981 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1982 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1983 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1984 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1985 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1986 FunctionArgList(), Loc, Loc); 1987 // Create a scope with an artificial location for the body of this 1988 // function. 1989 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1990 llvm::Constant *AddrInAS0 = Addr; 1991 if (Addr->getAddressSpace() != 0) 1992 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1993 Addr, llvm::PointerType::getWithSamePointeeType( 1994 cast<llvm::PointerType>(Addr->getType()), 0)); 1995 DtorCGF.emitDestroy( 1996 Address::deprecated(AddrInAS0, CGM.getContext().getDeclAlign(VD)), 1997 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1998 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1999 DtorCGF.FinishFunction(); 2000 Dtor = Fn; 2001 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2002 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2003 } else { 2004 Dtor = new llvm::GlobalVariable( 2005 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2006 llvm::GlobalValue::PrivateLinkage, 2007 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2008 ID = Dtor; 2009 } 2010 // Register the information for the entry associated with the destructor. 2011 Out.clear(); 2012 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2013 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2014 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2015 } 2016 return CGM.getLangOpts().OpenMPIsDevice; 2017 } 2018 2019 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2020 QualType VarType, 2021 StringRef Name) { 2022 std::string Suffix = getName({"artificial", ""}); 2023 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2024 llvm::GlobalVariable *GAddr = 2025 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2026 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2027 CGM.getTarget().isTLSSupported()) { 2028 GAddr->setThreadLocal(/*Val=*/true); 2029 return Address(GAddr, GAddr->getValueType(), 2030 CGM.getContext().getTypeAlignInChars(VarType)); 2031 } 2032 std::string CacheSuffix = getName({"cache", ""}); 2033 llvm::Value *Args[] = { 2034 emitUpdateLocation(CGF, SourceLocation()), 2035 getThreadID(CGF, SourceLocation()), 2036 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2037 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2038 /*isSigned=*/false), 2039 getOrCreateInternalVariable( 2040 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2041 return Address( 2042 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2043 CGF.EmitRuntimeCall( 2044 OMPBuilder.getOrCreateRuntimeFunction( 2045 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2046 Args), 2047 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2048 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2049 } 2050 2051 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2052 const RegionCodeGenTy &ThenGen, 2053 const RegionCodeGenTy &ElseGen) { 2054 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2055 2056 // If the condition constant folds and can be elided, try to avoid emitting 2057 // the condition and the dead arm of the if/else. 2058 bool CondConstant; 2059 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2060 if (CondConstant) 2061 ThenGen(CGF); 2062 else 2063 ElseGen(CGF); 2064 return; 2065 } 2066 2067 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2068 // emit the conditional branch. 2069 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2070 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2071 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2072 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2073 2074 // Emit the 'then' code. 2075 CGF.EmitBlock(ThenBlock); 2076 ThenGen(CGF); 2077 CGF.EmitBranch(ContBlock); 2078 // Emit the 'else' code if present. 2079 // There is no need to emit line number for unconditional branch. 2080 (void)ApplyDebugLocation::CreateEmpty(CGF); 2081 CGF.EmitBlock(ElseBlock); 2082 ElseGen(CGF); 2083 // There is no need to emit line number for unconditional branch. 2084 (void)ApplyDebugLocation::CreateEmpty(CGF); 2085 CGF.EmitBranch(ContBlock); 2086 // Emit the continuation block for code after the if. 2087 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2088 } 2089 2090 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2091 llvm::Function *OutlinedFn, 2092 ArrayRef<llvm::Value *> CapturedVars, 2093 const Expr *IfCond, 2094 llvm::Value *NumThreads) { 2095 if (!CGF.HaveInsertPoint()) 2096 return; 2097 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2098 auto &M = CGM.getModule(); 2099 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2100 this](CodeGenFunction &CGF, PrePostActionTy &) { 2101 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2102 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2103 llvm::Value *Args[] = { 2104 RTLoc, 2105 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2106 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2107 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2108 RealArgs.append(std::begin(Args), std::end(Args)); 2109 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2110 2111 llvm::FunctionCallee RTLFn = 2112 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2113 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2114 }; 2115 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2116 this](CodeGenFunction &CGF, PrePostActionTy &) { 2117 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2118 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2119 // Build calls: 2120 // __kmpc_serialized_parallel(&Loc, GTid); 2121 llvm::Value *Args[] = {RTLoc, ThreadID}; 2122 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2123 M, OMPRTL___kmpc_serialized_parallel), 2124 Args); 2125 2126 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2127 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2128 Address ZeroAddrBound = 2129 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2130 /*Name=*/".bound.zero.addr"); 2131 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2132 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2133 // ThreadId for serialized parallels is 0. 2134 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2135 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2136 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2137 2138 // Ensure we do not inline the function. This is trivially true for the ones 2139 // passed to __kmpc_fork_call but the ones called in serialized regions 2140 // could be inlined. This is not a perfect but it is closer to the invariant 2141 // we want, namely, every data environment starts with a new function. 2142 // TODO: We should pass the if condition to the runtime function and do the 2143 // handling there. Much cleaner code. 2144 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2145 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2146 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2147 2148 // __kmpc_end_serialized_parallel(&Loc, GTid); 2149 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2150 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2151 M, OMPRTL___kmpc_end_serialized_parallel), 2152 EndArgs); 2153 }; 2154 if (IfCond) { 2155 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2156 } else { 2157 RegionCodeGenTy ThenRCG(ThenGen); 2158 ThenRCG(CGF); 2159 } 2160 } 2161 2162 // If we're inside an (outlined) parallel region, use the region info's 2163 // thread-ID variable (it is passed in a first argument of the outlined function 2164 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2165 // regular serial code region, get thread ID by calling kmp_int32 2166 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2167 // return the address of that temp. 2168 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2169 SourceLocation Loc) { 2170 if (auto *OMPRegionInfo = 2171 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2172 if (OMPRegionInfo->getThreadIDVariable()) 2173 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2174 2175 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2176 QualType Int32Ty = 2177 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2178 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2179 CGF.EmitStoreOfScalar(ThreadID, 2180 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2181 2182 return ThreadIDTemp; 2183 } 2184 2185 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2186 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2187 SmallString<256> Buffer; 2188 llvm::raw_svector_ostream Out(Buffer); 2189 Out << Name; 2190 StringRef RuntimeName = Out.str(); 2191 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2192 if (Elem.second) { 2193 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2194 "OMP internal variable has different type than requested"); 2195 return &*Elem.second; 2196 } 2197 2198 return Elem.second = new llvm::GlobalVariable( 2199 CGM.getModule(), Ty, /*IsConstant*/ false, 2200 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2201 Elem.first(), /*InsertBefore=*/nullptr, 2202 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2203 } 2204 2205 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2206 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2207 std::string Name = getName({Prefix, "var"}); 2208 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2209 } 2210 2211 namespace { 2212 /// Common pre(post)-action for different OpenMP constructs. 2213 class CommonActionTy final : public PrePostActionTy { 2214 llvm::FunctionCallee EnterCallee; 2215 ArrayRef<llvm::Value *> EnterArgs; 2216 llvm::FunctionCallee ExitCallee; 2217 ArrayRef<llvm::Value *> ExitArgs; 2218 bool Conditional; 2219 llvm::BasicBlock *ContBlock = nullptr; 2220 2221 public: 2222 CommonActionTy(llvm::FunctionCallee EnterCallee, 2223 ArrayRef<llvm::Value *> EnterArgs, 2224 llvm::FunctionCallee ExitCallee, 2225 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2226 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2227 ExitArgs(ExitArgs), Conditional(Conditional) {} 2228 void Enter(CodeGenFunction &CGF) override { 2229 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2230 if (Conditional) { 2231 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2232 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2233 ContBlock = CGF.createBasicBlock("omp_if.end"); 2234 // Generate the branch (If-stmt) 2235 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2236 CGF.EmitBlock(ThenBlock); 2237 } 2238 } 2239 void Done(CodeGenFunction &CGF) { 2240 // Emit the rest of blocks/branches 2241 CGF.EmitBranch(ContBlock); 2242 CGF.EmitBlock(ContBlock, true); 2243 } 2244 void Exit(CodeGenFunction &CGF) override { 2245 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2246 } 2247 }; 2248 } // anonymous namespace 2249 2250 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2251 StringRef CriticalName, 2252 const RegionCodeGenTy &CriticalOpGen, 2253 SourceLocation Loc, const Expr *Hint) { 2254 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2255 // CriticalOpGen(); 2256 // __kmpc_end_critical(ident_t *, gtid, Lock); 2257 // Prepare arguments and build a call to __kmpc_critical 2258 if (!CGF.HaveInsertPoint()) 2259 return; 2260 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2261 getCriticalRegionLock(CriticalName)}; 2262 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2263 std::end(Args)); 2264 if (Hint) { 2265 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2266 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2267 } 2268 CommonActionTy Action( 2269 OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), 2271 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2272 EnterArgs, 2273 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2274 OMPRTL___kmpc_end_critical), 2275 Args); 2276 CriticalOpGen.setAction(Action); 2277 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2278 } 2279 2280 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2281 const RegionCodeGenTy &MasterOpGen, 2282 SourceLocation Loc) { 2283 if (!CGF.HaveInsertPoint()) 2284 return; 2285 // if(__kmpc_master(ident_t *, gtid)) { 2286 // MasterOpGen(); 2287 // __kmpc_end_master(ident_t *, gtid); 2288 // } 2289 // Prepare arguments and build a call to __kmpc_master 2290 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2291 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_master), 2293 Args, 2294 OMPBuilder.getOrCreateRuntimeFunction( 2295 CGM.getModule(), OMPRTL___kmpc_end_master), 2296 Args, 2297 /*Conditional=*/true); 2298 MasterOpGen.setAction(Action); 2299 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2300 Action.Done(CGF); 2301 } 2302 2303 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2304 const RegionCodeGenTy &MaskedOpGen, 2305 SourceLocation Loc, const Expr *Filter) { 2306 if (!CGF.HaveInsertPoint()) 2307 return; 2308 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2309 // MaskedOpGen(); 2310 // __kmpc_end_masked(iden_t *, gtid); 2311 // } 2312 // Prepare arguments and build a call to __kmpc_masked 2313 llvm::Value *FilterVal = Filter 2314 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2315 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2316 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2317 FilterVal}; 2318 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2319 getThreadID(CGF, Loc)}; 2320 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2321 CGM.getModule(), OMPRTL___kmpc_masked), 2322 Args, 2323 OMPBuilder.getOrCreateRuntimeFunction( 2324 CGM.getModule(), OMPRTL___kmpc_end_masked), 2325 ArgsEnd, 2326 /*Conditional=*/true); 2327 MaskedOpGen.setAction(Action); 2328 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2329 Action.Done(CGF); 2330 } 2331 2332 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2333 SourceLocation Loc) { 2334 if (!CGF.HaveInsertPoint()) 2335 return; 2336 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2337 OMPBuilder.createTaskyield(CGF.Builder); 2338 } else { 2339 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2340 llvm::Value *Args[] = { 2341 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2342 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2343 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2344 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2345 Args); 2346 } 2347 2348 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2349 Region->emitUntiedSwitch(CGF); 2350 } 2351 2352 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2353 const RegionCodeGenTy &TaskgroupOpGen, 2354 SourceLocation Loc) { 2355 if (!CGF.HaveInsertPoint()) 2356 return; 2357 // __kmpc_taskgroup(ident_t *, gtid); 2358 // TaskgroupOpGen(); 2359 // __kmpc_end_taskgroup(ident_t *, gtid); 2360 // Prepare arguments and build a call to __kmpc_taskgroup 2361 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2362 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2363 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2364 Args, 2365 OMPBuilder.getOrCreateRuntimeFunction( 2366 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2367 Args); 2368 TaskgroupOpGen.setAction(Action); 2369 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2370 } 2371 2372 /// Given an array of pointers to variables, project the address of a 2373 /// given variable. 2374 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2375 unsigned Index, const VarDecl *Var) { 2376 // Pull out the pointer to the variable. 2377 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2378 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2379 2380 Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var)); 2381 Addr = CGF.Builder.CreateElementBitCast( 2382 Addr, CGF.ConvertTypeForMem(Var->getType())); 2383 return Addr; 2384 } 2385 2386 static llvm::Value *emitCopyprivateCopyFunction( 2387 CodeGenModule &CGM, llvm::Type *ArgsType, 2388 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2389 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2390 SourceLocation Loc) { 2391 ASTContext &C = CGM.getContext(); 2392 // void copy_func(void *LHSArg, void *RHSArg); 2393 FunctionArgList Args; 2394 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2395 ImplicitParamDecl::Other); 2396 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2397 ImplicitParamDecl::Other); 2398 Args.push_back(&LHSArg); 2399 Args.push_back(&RHSArg); 2400 const auto &CGFI = 2401 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2402 std::string Name = 2403 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2404 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2405 llvm::GlobalValue::InternalLinkage, Name, 2406 &CGM.getModule()); 2407 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2408 Fn->setDoesNotRecurse(); 2409 CodeGenFunction CGF(CGM); 2410 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2411 // Dest = (void*[n])(LHSArg); 2412 // Src = (void*[n])(RHSArg); 2413 Address LHS = Address::deprecated( 2414 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2415 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 2416 CGF.getPointerAlign()); 2417 Address RHS = Address::deprecated( 2418 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2419 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 2420 CGF.getPointerAlign()); 2421 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2422 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2423 // ... 2424 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2425 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2426 const auto *DestVar = 2427 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2428 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2429 2430 const auto *SrcVar = 2431 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2432 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2433 2434 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2435 QualType Type = VD->getType(); 2436 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2437 } 2438 CGF.FinishFunction(); 2439 return Fn; 2440 } 2441 2442 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2443 const RegionCodeGenTy &SingleOpGen, 2444 SourceLocation Loc, 2445 ArrayRef<const Expr *> CopyprivateVars, 2446 ArrayRef<const Expr *> SrcExprs, 2447 ArrayRef<const Expr *> DstExprs, 2448 ArrayRef<const Expr *> AssignmentOps) { 2449 if (!CGF.HaveInsertPoint()) 2450 return; 2451 assert(CopyprivateVars.size() == SrcExprs.size() && 2452 CopyprivateVars.size() == DstExprs.size() && 2453 CopyprivateVars.size() == AssignmentOps.size()); 2454 ASTContext &C = CGM.getContext(); 2455 // int32 did_it = 0; 2456 // if(__kmpc_single(ident_t *, gtid)) { 2457 // SingleOpGen(); 2458 // __kmpc_end_single(ident_t *, gtid); 2459 // did_it = 1; 2460 // } 2461 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2462 // <copy_func>, did_it); 2463 2464 Address DidIt = Address::invalid(); 2465 if (!CopyprivateVars.empty()) { 2466 // int32 did_it = 0; 2467 QualType KmpInt32Ty = 2468 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2469 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2470 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2471 } 2472 // Prepare arguments and build a call to __kmpc_single 2473 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2474 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2475 CGM.getModule(), OMPRTL___kmpc_single), 2476 Args, 2477 OMPBuilder.getOrCreateRuntimeFunction( 2478 CGM.getModule(), OMPRTL___kmpc_end_single), 2479 Args, 2480 /*Conditional=*/true); 2481 SingleOpGen.setAction(Action); 2482 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2483 if (DidIt.isValid()) { 2484 // did_it = 1; 2485 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2486 } 2487 Action.Done(CGF); 2488 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2489 // <copy_func>, did_it); 2490 if (DidIt.isValid()) { 2491 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2492 QualType CopyprivateArrayTy = C.getConstantArrayType( 2493 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2494 /*IndexTypeQuals=*/0); 2495 // Create a list of all private variables for copyprivate. 2496 Address CopyprivateList = 2497 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2498 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2499 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2500 CGF.Builder.CreateStore( 2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2502 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2503 CGF.VoidPtrTy), 2504 Elem); 2505 } 2506 // Build function that copies private values from single region to all other 2507 // threads in the corresponding parallel region. 2508 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2509 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2510 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2511 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2512 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2513 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2514 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2515 llvm::Value *Args[] = { 2516 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2517 getThreadID(CGF, Loc), // i32 <gtid> 2518 BufSize, // size_t <buf_size> 2519 CL.getPointer(), // void *<copyprivate list> 2520 CpyFn, // void (*) (void *, void *) <copy_func> 2521 DidItVal // i32 did_it 2522 }; 2523 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2524 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2525 Args); 2526 } 2527 } 2528 2529 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2530 const RegionCodeGenTy &OrderedOpGen, 2531 SourceLocation Loc, bool IsThreads) { 2532 if (!CGF.HaveInsertPoint()) 2533 return; 2534 // __kmpc_ordered(ident_t *, gtid); 2535 // OrderedOpGen(); 2536 // __kmpc_end_ordered(ident_t *, gtid); 2537 // Prepare arguments and build a call to __kmpc_ordered 2538 if (IsThreads) { 2539 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2540 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2541 CGM.getModule(), OMPRTL___kmpc_ordered), 2542 Args, 2543 OMPBuilder.getOrCreateRuntimeFunction( 2544 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2545 Args); 2546 OrderedOpGen.setAction(Action); 2547 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2548 return; 2549 } 2550 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2551 } 2552 2553 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2554 unsigned Flags; 2555 if (Kind == OMPD_for) 2556 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2557 else if (Kind == OMPD_sections) 2558 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2559 else if (Kind == OMPD_single) 2560 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2561 else if (Kind == OMPD_barrier) 2562 Flags = OMP_IDENT_BARRIER_EXPL; 2563 else 2564 Flags = OMP_IDENT_BARRIER_IMPL; 2565 return Flags; 2566 } 2567 2568 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2569 CodeGenFunction &CGF, const OMPLoopDirective &S, 2570 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2571 // Check if the loop directive is actually a doacross loop directive. In this 2572 // case choose static, 1 schedule. 2573 if (llvm::any_of( 2574 S.getClausesOfKind<OMPOrderedClause>(), 2575 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2576 ScheduleKind = OMPC_SCHEDULE_static; 2577 // Chunk size is 1 in this case. 2578 llvm::APInt ChunkSize(32, 1); 2579 ChunkExpr = IntegerLiteral::Create( 2580 CGF.getContext(), ChunkSize, 2581 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2582 SourceLocation()); 2583 } 2584 } 2585 2586 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2587 OpenMPDirectiveKind Kind, bool EmitChecks, 2588 bool ForceSimpleCall) { 2589 // Check if we should use the OMPBuilder 2590 auto *OMPRegionInfo = 2591 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2592 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2593 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2594 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2595 return; 2596 } 2597 2598 if (!CGF.HaveInsertPoint()) 2599 return; 2600 // Build call __kmpc_cancel_barrier(loc, thread_id); 2601 // Build call __kmpc_barrier(loc, thread_id); 2602 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2603 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2604 // thread_id); 2605 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2606 getThreadID(CGF, Loc)}; 2607 if (OMPRegionInfo) { 2608 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2609 llvm::Value *Result = CGF.EmitRuntimeCall( 2610 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2611 OMPRTL___kmpc_cancel_barrier), 2612 Args); 2613 if (EmitChecks) { 2614 // if (__kmpc_cancel_barrier()) { 2615 // exit from construct; 2616 // } 2617 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2618 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2619 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2620 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2621 CGF.EmitBlock(ExitBB); 2622 // exit from construct; 2623 CodeGenFunction::JumpDest CancelDestination = 2624 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2625 CGF.EmitBranchThroughCleanup(CancelDestination); 2626 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2627 } 2628 return; 2629 } 2630 } 2631 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2632 CGM.getModule(), OMPRTL___kmpc_barrier), 2633 Args); 2634 } 2635 2636 /// Map the OpenMP loop schedule to the runtime enumeration. 2637 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2638 bool Chunked, bool Ordered) { 2639 switch (ScheduleKind) { 2640 case OMPC_SCHEDULE_static: 2641 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2642 : (Ordered ? OMP_ord_static : OMP_sch_static); 2643 case OMPC_SCHEDULE_dynamic: 2644 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2645 case OMPC_SCHEDULE_guided: 2646 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2647 case OMPC_SCHEDULE_runtime: 2648 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2649 case OMPC_SCHEDULE_auto: 2650 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2651 case OMPC_SCHEDULE_unknown: 2652 assert(!Chunked && "chunk was specified but schedule kind not known"); 2653 return Ordered ? OMP_ord_static : OMP_sch_static; 2654 } 2655 llvm_unreachable("Unexpected runtime schedule"); 2656 } 2657 2658 /// Map the OpenMP distribute schedule to the runtime enumeration. 2659 static OpenMPSchedType 2660 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2661 // only static is allowed for dist_schedule 2662 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2663 } 2664 2665 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2666 bool Chunked) const { 2667 OpenMPSchedType Schedule = 2668 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2669 return Schedule == OMP_sch_static; 2670 } 2671 2672 bool CGOpenMPRuntime::isStaticNonchunked( 2673 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2674 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2675 return Schedule == OMP_dist_sch_static; 2676 } 2677 2678 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2679 bool Chunked) const { 2680 OpenMPSchedType Schedule = 2681 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2682 return Schedule == OMP_sch_static_chunked; 2683 } 2684 2685 bool CGOpenMPRuntime::isStaticChunked( 2686 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2687 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2688 return Schedule == OMP_dist_sch_static_chunked; 2689 } 2690 2691 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2692 OpenMPSchedType Schedule = 2693 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2694 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2695 return Schedule != OMP_sch_static; 2696 } 2697 2698 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2699 OpenMPScheduleClauseModifier M1, 2700 OpenMPScheduleClauseModifier M2) { 2701 int Modifier = 0; 2702 switch (M1) { 2703 case OMPC_SCHEDULE_MODIFIER_monotonic: 2704 Modifier = OMP_sch_modifier_monotonic; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2707 Modifier = OMP_sch_modifier_nonmonotonic; 2708 break; 2709 case OMPC_SCHEDULE_MODIFIER_simd: 2710 if (Schedule == OMP_sch_static_chunked) 2711 Schedule = OMP_sch_static_balanced_chunked; 2712 break; 2713 case OMPC_SCHEDULE_MODIFIER_last: 2714 case OMPC_SCHEDULE_MODIFIER_unknown: 2715 break; 2716 } 2717 switch (M2) { 2718 case OMPC_SCHEDULE_MODIFIER_monotonic: 2719 Modifier = OMP_sch_modifier_monotonic; 2720 break; 2721 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2722 Modifier = OMP_sch_modifier_nonmonotonic; 2723 break; 2724 case OMPC_SCHEDULE_MODIFIER_simd: 2725 if (Schedule == OMP_sch_static_chunked) 2726 Schedule = OMP_sch_static_balanced_chunked; 2727 break; 2728 case OMPC_SCHEDULE_MODIFIER_last: 2729 case OMPC_SCHEDULE_MODIFIER_unknown: 2730 break; 2731 } 2732 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2733 // If the static schedule kind is specified or if the ordered clause is 2734 // specified, and if the nonmonotonic modifier is not specified, the effect is 2735 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2736 // modifier is specified, the effect is as if the nonmonotonic modifier is 2737 // specified. 2738 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2739 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2740 Schedule == OMP_sch_static_balanced_chunked || 2741 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2742 Schedule == OMP_dist_sch_static_chunked || 2743 Schedule == OMP_dist_sch_static)) 2744 Modifier = OMP_sch_modifier_nonmonotonic; 2745 } 2746 return Schedule | Modifier; 2747 } 2748 2749 void CGOpenMPRuntime::emitForDispatchInit( 2750 CodeGenFunction &CGF, SourceLocation Loc, 2751 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2752 bool Ordered, const DispatchRTInput &DispatchValues) { 2753 if (!CGF.HaveInsertPoint()) 2754 return; 2755 OpenMPSchedType Schedule = getRuntimeSchedule( 2756 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2757 assert(Ordered || 2758 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2759 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2760 Schedule != OMP_sch_static_balanced_chunked)); 2761 // Call __kmpc_dispatch_init( 2762 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2763 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2764 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2765 2766 // If the Chunk was not specified in the clause - use default value 1. 2767 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2768 : CGF.Builder.getIntN(IVSize, 1); 2769 llvm::Value *Args[] = { 2770 emitUpdateLocation(CGF, Loc), 2771 getThreadID(CGF, Loc), 2772 CGF.Builder.getInt32(addMonoNonMonoModifier( 2773 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2774 DispatchValues.LB, // Lower 2775 DispatchValues.UB, // Upper 2776 CGF.Builder.getIntN(IVSize, 1), // Stride 2777 Chunk // Chunk 2778 }; 2779 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2780 } 2781 2782 static void emitForStaticInitCall( 2783 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2784 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2785 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2786 const CGOpenMPRuntime::StaticRTInput &Values) { 2787 if (!CGF.HaveInsertPoint()) 2788 return; 2789 2790 assert(!Values.Ordered); 2791 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2792 Schedule == OMP_sch_static_balanced_chunked || 2793 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2794 Schedule == OMP_dist_sch_static || 2795 Schedule == OMP_dist_sch_static_chunked); 2796 2797 // Call __kmpc_for_static_init( 2798 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2799 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2800 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2801 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2802 llvm::Value *Chunk = Values.Chunk; 2803 if (Chunk == nullptr) { 2804 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2805 Schedule == OMP_dist_sch_static) && 2806 "expected static non-chunked schedule"); 2807 // If the Chunk was not specified in the clause - use default value 1. 2808 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2809 } else { 2810 assert((Schedule == OMP_sch_static_chunked || 2811 Schedule == OMP_sch_static_balanced_chunked || 2812 Schedule == OMP_ord_static_chunked || 2813 Schedule == OMP_dist_sch_static_chunked) && 2814 "expected static chunked schedule"); 2815 } 2816 llvm::Value *Args[] = { 2817 UpdateLocation, 2818 ThreadId, 2819 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2820 M2)), // Schedule type 2821 Values.IL.getPointer(), // &isLastIter 2822 Values.LB.getPointer(), // &LB 2823 Values.UB.getPointer(), // &UB 2824 Values.ST.getPointer(), // &Stride 2825 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2826 Chunk // Chunk 2827 }; 2828 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2829 } 2830 2831 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2832 SourceLocation Loc, 2833 OpenMPDirectiveKind DKind, 2834 const OpenMPScheduleTy &ScheduleKind, 2835 const StaticRTInput &Values) { 2836 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2837 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2838 assert(isOpenMPWorksharingDirective(DKind) && 2839 "Expected loop-based or sections-based directive."); 2840 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2841 isOpenMPLoopDirective(DKind) 2842 ? OMP_IDENT_WORK_LOOP 2843 : OMP_IDENT_WORK_SECTIONS); 2844 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2845 llvm::FunctionCallee StaticInitFunction = 2846 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2847 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2848 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2849 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2850 } 2851 2852 void CGOpenMPRuntime::emitDistributeStaticInit( 2853 CodeGenFunction &CGF, SourceLocation Loc, 2854 OpenMPDistScheduleClauseKind SchedKind, 2855 const CGOpenMPRuntime::StaticRTInput &Values) { 2856 OpenMPSchedType ScheduleNum = 2857 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2858 llvm::Value *UpdatedLocation = 2859 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2860 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2861 llvm::FunctionCallee StaticInitFunction; 2862 bool isGPUDistribute = 2863 CGM.getLangOpts().OpenMPIsDevice && 2864 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2865 StaticInitFunction = createForStaticInitFunction( 2866 Values.IVSize, Values.IVSigned, isGPUDistribute); 2867 2868 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2869 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2870 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2871 } 2872 2873 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2874 SourceLocation Loc, 2875 OpenMPDirectiveKind DKind) { 2876 if (!CGF.HaveInsertPoint()) 2877 return; 2878 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2879 llvm::Value *Args[] = { 2880 emitUpdateLocation(CGF, Loc, 2881 isOpenMPDistributeDirective(DKind) 2882 ? OMP_IDENT_WORK_DISTRIBUTE 2883 : isOpenMPLoopDirective(DKind) 2884 ? OMP_IDENT_WORK_LOOP 2885 : OMP_IDENT_WORK_SECTIONS), 2886 getThreadID(CGF, Loc)}; 2887 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2888 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2889 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2890 CGF.EmitRuntimeCall( 2891 OMPBuilder.getOrCreateRuntimeFunction( 2892 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2893 Args); 2894 else 2895 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2896 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2897 Args); 2898 } 2899 2900 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2901 SourceLocation Loc, 2902 unsigned IVSize, 2903 bool IVSigned) { 2904 if (!CGF.HaveInsertPoint()) 2905 return; 2906 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2907 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2908 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2909 } 2910 2911 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2912 SourceLocation Loc, unsigned IVSize, 2913 bool IVSigned, Address IL, 2914 Address LB, Address UB, 2915 Address ST) { 2916 // Call __kmpc_dispatch_next( 2917 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2918 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2919 // kmp_int[32|64] *p_stride); 2920 llvm::Value *Args[] = { 2921 emitUpdateLocation(CGF, Loc), 2922 getThreadID(CGF, Loc), 2923 IL.getPointer(), // &isLastIter 2924 LB.getPointer(), // &Lower 2925 UB.getPointer(), // &Upper 2926 ST.getPointer() // &Stride 2927 }; 2928 llvm::Value *Call = 2929 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2930 return CGF.EmitScalarConversion( 2931 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2932 CGF.getContext().BoolTy, Loc); 2933 } 2934 2935 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2936 llvm::Value *NumThreads, 2937 SourceLocation Loc) { 2938 if (!CGF.HaveInsertPoint()) 2939 return; 2940 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2941 llvm::Value *Args[] = { 2942 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2943 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2944 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2945 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2946 Args); 2947 } 2948 2949 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2950 ProcBindKind ProcBind, 2951 SourceLocation Loc) { 2952 if (!CGF.HaveInsertPoint()) 2953 return; 2954 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2955 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2956 llvm::Value *Args[] = { 2957 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2958 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2959 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2960 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2961 Args); 2962 } 2963 2964 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2965 SourceLocation Loc, llvm::AtomicOrdering AO) { 2966 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2967 OMPBuilder.createFlush(CGF.Builder); 2968 } else { 2969 if (!CGF.HaveInsertPoint()) 2970 return; 2971 // Build call void __kmpc_flush(ident_t *loc) 2972 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2973 CGM.getModule(), OMPRTL___kmpc_flush), 2974 emitUpdateLocation(CGF, Loc)); 2975 } 2976 } 2977 2978 namespace { 2979 /// Indexes of fields for type kmp_task_t. 2980 enum KmpTaskTFields { 2981 /// List of shared variables. 2982 KmpTaskTShareds, 2983 /// Task routine. 2984 KmpTaskTRoutine, 2985 /// Partition id for the untied tasks. 2986 KmpTaskTPartId, 2987 /// Function with call of destructors for private variables. 2988 Data1, 2989 /// Task priority. 2990 Data2, 2991 /// (Taskloops only) Lower bound. 2992 KmpTaskTLowerBound, 2993 /// (Taskloops only) Upper bound. 2994 KmpTaskTUpperBound, 2995 /// (Taskloops only) Stride. 2996 KmpTaskTStride, 2997 /// (Taskloops only) Is last iteration flag. 2998 KmpTaskTLastIter, 2999 /// (Taskloops only) Reduction data. 3000 KmpTaskTReductions, 3001 }; 3002 } // anonymous namespace 3003 3004 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3005 return OffloadEntriesTargetRegion.empty() && 3006 OffloadEntriesDeviceGlobalVar.empty(); 3007 } 3008 3009 /// Initialize target region entry. 3010 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3011 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3012 StringRef ParentName, unsigned LineNum, 3013 unsigned Order) { 3014 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3015 "only required for the device " 3016 "code generation."); 3017 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3018 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3019 OMPTargetRegionEntryTargetRegion); 3020 ++OffloadingEntriesNum; 3021 } 3022 3023 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3024 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3025 StringRef ParentName, unsigned LineNum, 3026 llvm::Constant *Addr, llvm::Constant *ID, 3027 OMPTargetRegionEntryKind Flags) { 3028 // If we are emitting code for a target, the entry is already initialized, 3029 // only has to be registered. 3030 if (CGM.getLangOpts().OpenMPIsDevice) { 3031 // This could happen if the device compilation is invoked standalone. 3032 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3033 return; 3034 auto &Entry = 3035 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3036 Entry.setAddress(Addr); 3037 Entry.setID(ID); 3038 Entry.setFlags(Flags); 3039 } else { 3040 if (Flags == 3041 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3042 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3043 /*IgnoreAddressId*/ true)) 3044 return; 3045 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3046 "Target region entry already registered!"); 3047 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3048 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3049 ++OffloadingEntriesNum; 3050 } 3051 } 3052 3053 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3054 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3055 bool IgnoreAddressId) const { 3056 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3057 if (PerDevice == OffloadEntriesTargetRegion.end()) 3058 return false; 3059 auto PerFile = PerDevice->second.find(FileID); 3060 if (PerFile == PerDevice->second.end()) 3061 return false; 3062 auto PerParentName = PerFile->second.find(ParentName); 3063 if (PerParentName == PerFile->second.end()) 3064 return false; 3065 auto PerLine = PerParentName->second.find(LineNum); 3066 if (PerLine == PerParentName->second.end()) 3067 return false; 3068 // Fail if this entry is already registered. 3069 if (!IgnoreAddressId && 3070 (PerLine->second.getAddress() || PerLine->second.getID())) 3071 return false; 3072 return true; 3073 } 3074 3075 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3076 const OffloadTargetRegionEntryInfoActTy &Action) { 3077 // Scan all target region entries and perform the provided action. 3078 for (const auto &D : OffloadEntriesTargetRegion) 3079 for (const auto &F : D.second) 3080 for (const auto &P : F.second) 3081 for (const auto &L : P.second) 3082 Action(D.first, F.first, P.first(), L.first, L.second); 3083 } 3084 3085 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3086 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3087 OMPTargetGlobalVarEntryKind Flags, 3088 unsigned Order) { 3089 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3090 "only required for the device " 3091 "code generation."); 3092 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3093 ++OffloadingEntriesNum; 3094 } 3095 3096 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3097 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3098 CharUnits VarSize, 3099 OMPTargetGlobalVarEntryKind Flags, 3100 llvm::GlobalValue::LinkageTypes Linkage) { 3101 if (CGM.getLangOpts().OpenMPIsDevice) { 3102 // This could happen if the device compilation is invoked standalone. 3103 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3104 return; 3105 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3106 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3107 if (Entry.getVarSize().isZero()) { 3108 Entry.setVarSize(VarSize); 3109 Entry.setLinkage(Linkage); 3110 } 3111 return; 3112 } 3113 Entry.setVarSize(VarSize); 3114 Entry.setLinkage(Linkage); 3115 Entry.setAddress(Addr); 3116 } else { 3117 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3118 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3119 assert(Entry.isValid() && Entry.getFlags() == Flags && 3120 "Entry not initialized!"); 3121 if (Entry.getVarSize().isZero()) { 3122 Entry.setVarSize(VarSize); 3123 Entry.setLinkage(Linkage); 3124 } 3125 return; 3126 } 3127 OffloadEntriesDeviceGlobalVar.try_emplace( 3128 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3129 ++OffloadingEntriesNum; 3130 } 3131 } 3132 3133 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3134 actOnDeviceGlobalVarEntriesInfo( 3135 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3136 // Scan all target region entries and perform the provided action. 3137 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3138 Action(E.getKey(), E.getValue()); 3139 } 3140 3141 void CGOpenMPRuntime::createOffloadEntry( 3142 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3143 llvm::GlobalValue::LinkageTypes Linkage) { 3144 StringRef Name = Addr->getName(); 3145 llvm::Module &M = CGM.getModule(); 3146 llvm::LLVMContext &C = M.getContext(); 3147 3148 // Create constant string with the name. 3149 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3150 3151 std::string StringName = getName({"omp_offloading", "entry_name"}); 3152 auto *Str = new llvm::GlobalVariable( 3153 M, StrPtrInit->getType(), /*isConstant=*/true, 3154 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3155 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3156 3157 llvm::Constant *Data[] = { 3158 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3159 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3160 llvm::ConstantInt::get(CGM.SizeTy, Size), 3161 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3162 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3163 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3164 llvm::GlobalVariable *Entry = createGlobalStruct( 3165 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3166 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3167 3168 // The entry has to be created in the section the linker expects it to be. 3169 Entry->setSection("omp_offloading_entries"); 3170 } 3171 3172 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3173 // Emit the offloading entries and metadata so that the device codegen side 3174 // can easily figure out what to emit. The produced metadata looks like 3175 // this: 3176 // 3177 // !omp_offload.info = !{!1, ...} 3178 // 3179 // Right now we only generate metadata for function that contain target 3180 // regions. 3181 3182 // If we are in simd mode or there are no entries, we don't need to do 3183 // anything. 3184 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3185 return; 3186 3187 llvm::Module &M = CGM.getModule(); 3188 llvm::LLVMContext &C = M.getContext(); 3189 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3190 SourceLocation, StringRef>, 3191 16> 3192 OrderedEntries(OffloadEntriesInfoManager.size()); 3193 llvm::SmallVector<StringRef, 16> ParentFunctions( 3194 OffloadEntriesInfoManager.size()); 3195 3196 // Auxiliary methods to create metadata values and strings. 3197 auto &&GetMDInt = [this](unsigned V) { 3198 return llvm::ConstantAsMetadata::get( 3199 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3200 }; 3201 3202 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3203 3204 // Create the offloading info metadata node. 3205 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3206 3207 // Create function that emits metadata for each target region entry; 3208 auto &&TargetRegionMetadataEmitter = 3209 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3210 &GetMDString]( 3211 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3212 unsigned Line, 3213 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3214 // Generate metadata for target regions. Each entry of this metadata 3215 // contains: 3216 // - Entry 0 -> Kind of this type of metadata (0). 3217 // - Entry 1 -> Device ID of the file where the entry was identified. 3218 // - Entry 2 -> File ID of the file where the entry was identified. 3219 // - Entry 3 -> Mangled name of the function where the entry was 3220 // identified. 3221 // - Entry 4 -> Line in the file where the entry was identified. 3222 // - Entry 5 -> Order the entry was created. 3223 // The first element of the metadata node is the kind. 3224 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3225 GetMDInt(FileID), GetMDString(ParentName), 3226 GetMDInt(Line), GetMDInt(E.getOrder())}; 3227 3228 SourceLocation Loc; 3229 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3230 E = CGM.getContext().getSourceManager().fileinfo_end(); 3231 I != E; ++I) { 3232 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3233 I->getFirst()->getUniqueID().getFile() == FileID) { 3234 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3235 I->getFirst(), Line, 1); 3236 break; 3237 } 3238 } 3239 // Save this entry in the right position of the ordered entries array. 3240 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3241 ParentFunctions[E.getOrder()] = ParentName; 3242 3243 // Add metadata to the named metadata node. 3244 MD->addOperand(llvm::MDNode::get(C, Ops)); 3245 }; 3246 3247 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3248 TargetRegionMetadataEmitter); 3249 3250 // Create function that emits metadata for each device global variable entry; 3251 auto &&DeviceGlobalVarMetadataEmitter = 3252 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3253 MD](StringRef MangledName, 3254 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3255 &E) { 3256 // Generate metadata for global variables. Each entry of this metadata 3257 // contains: 3258 // - Entry 0 -> Kind of this type of metadata (1). 3259 // - Entry 1 -> Mangled name of the variable. 3260 // - Entry 2 -> Declare target kind. 3261 // - Entry 3 -> Order the entry was created. 3262 // The first element of the metadata node is the kind. 3263 llvm::Metadata *Ops[] = { 3264 GetMDInt(E.getKind()), GetMDString(MangledName), 3265 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3266 3267 // Save this entry in the right position of the ordered entries array. 3268 OrderedEntries[E.getOrder()] = 3269 std::make_tuple(&E, SourceLocation(), MangledName); 3270 3271 // Add metadata to the named metadata node. 3272 MD->addOperand(llvm::MDNode::get(C, Ops)); 3273 }; 3274 3275 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3276 DeviceGlobalVarMetadataEmitter); 3277 3278 for (const auto &E : OrderedEntries) { 3279 assert(std::get<0>(E) && "All ordered entries must exist!"); 3280 if (const auto *CE = 3281 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3282 std::get<0>(E))) { 3283 if (!CE->getID() || !CE->getAddress()) { 3284 // Do not blame the entry if the parent funtion is not emitted. 3285 StringRef FnName = ParentFunctions[CE->getOrder()]; 3286 if (!CGM.GetGlobalValue(FnName)) 3287 continue; 3288 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3289 DiagnosticsEngine::Error, 3290 "Offloading entry for target region in %0 is incorrect: either the " 3291 "address or the ID is invalid."); 3292 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3293 continue; 3294 } 3295 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3296 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3297 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3298 OffloadEntryInfoDeviceGlobalVar>( 3299 std::get<0>(E))) { 3300 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3301 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3302 CE->getFlags()); 3303 switch (Flags) { 3304 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3305 if (CGM.getLangOpts().OpenMPIsDevice && 3306 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3307 continue; 3308 if (!CE->getAddress()) { 3309 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3310 DiagnosticsEngine::Error, "Offloading entry for declare target " 3311 "variable %0 is incorrect: the " 3312 "address is invalid."); 3313 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3314 continue; 3315 } 3316 // The vaiable has no definition - no need to add the entry. 3317 if (CE->getVarSize().isZero()) 3318 continue; 3319 break; 3320 } 3321 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3322 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3323 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3324 "Declaret target link address is set."); 3325 if (CGM.getLangOpts().OpenMPIsDevice) 3326 continue; 3327 if (!CE->getAddress()) { 3328 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3329 DiagnosticsEngine::Error, 3330 "Offloading entry for declare target variable is incorrect: the " 3331 "address is invalid."); 3332 CGM.getDiags().Report(DiagID); 3333 continue; 3334 } 3335 break; 3336 } 3337 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3338 CE->getVarSize().getQuantity(), Flags, 3339 CE->getLinkage()); 3340 } else { 3341 llvm_unreachable("Unsupported entry kind."); 3342 } 3343 } 3344 } 3345 3346 /// Loads all the offload entries information from the host IR 3347 /// metadata. 3348 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3349 // If we are in target mode, load the metadata from the host IR. This code has 3350 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3351 3352 if (!CGM.getLangOpts().OpenMPIsDevice) 3353 return; 3354 3355 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3356 return; 3357 3358 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3359 if (auto EC = Buf.getError()) { 3360 CGM.getDiags().Report(diag::err_cannot_open_file) 3361 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3362 return; 3363 } 3364 3365 llvm::LLVMContext C; 3366 auto ME = expectedToErrorOrAndEmitErrors( 3367 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3368 3369 if (auto EC = ME.getError()) { 3370 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3371 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3372 CGM.getDiags().Report(DiagID) 3373 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3374 return; 3375 } 3376 3377 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3378 if (!MD) 3379 return; 3380 3381 for (llvm::MDNode *MN : MD->operands()) { 3382 auto &&GetMDInt = [MN](unsigned Idx) { 3383 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3384 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3385 }; 3386 3387 auto &&GetMDString = [MN](unsigned Idx) { 3388 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3389 return V->getString(); 3390 }; 3391 3392 switch (GetMDInt(0)) { 3393 default: 3394 llvm_unreachable("Unexpected metadata!"); 3395 break; 3396 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3397 OffloadingEntryInfoTargetRegion: 3398 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3399 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3400 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3401 /*Order=*/GetMDInt(5)); 3402 break; 3403 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3404 OffloadingEntryInfoDeviceGlobalVar: 3405 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3406 /*MangledName=*/GetMDString(1), 3407 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3408 /*Flags=*/GetMDInt(2)), 3409 /*Order=*/GetMDInt(3)); 3410 break; 3411 } 3412 } 3413 } 3414 3415 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3416 if (!KmpRoutineEntryPtrTy) { 3417 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3418 ASTContext &C = CGM.getContext(); 3419 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3420 FunctionProtoType::ExtProtoInfo EPI; 3421 KmpRoutineEntryPtrQTy = C.getPointerType( 3422 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3423 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3424 } 3425 } 3426 3427 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3428 // Make sure the type of the entry is already created. This is the type we 3429 // have to create: 3430 // struct __tgt_offload_entry{ 3431 // void *addr; // Pointer to the offload entry info. 3432 // // (function or global) 3433 // char *name; // Name of the function or global. 3434 // size_t size; // Size of the entry info (0 if it a function). 3435 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3436 // int32_t reserved; // Reserved, to use by the runtime library. 3437 // }; 3438 if (TgtOffloadEntryQTy.isNull()) { 3439 ASTContext &C = CGM.getContext(); 3440 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3441 RD->startDefinition(); 3442 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3443 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3444 addFieldToRecordDecl(C, RD, C.getSizeType()); 3445 addFieldToRecordDecl( 3446 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3447 addFieldToRecordDecl( 3448 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3449 RD->completeDefinition(); 3450 RD->addAttr(PackedAttr::CreateImplicit(C)); 3451 TgtOffloadEntryQTy = C.getRecordType(RD); 3452 } 3453 return TgtOffloadEntryQTy; 3454 } 3455 3456 namespace { 3457 struct PrivateHelpersTy { 3458 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3459 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3460 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3461 PrivateElemInit(PrivateElemInit) {} 3462 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3463 const Expr *OriginalRef = nullptr; 3464 const VarDecl *Original = nullptr; 3465 const VarDecl *PrivateCopy = nullptr; 3466 const VarDecl *PrivateElemInit = nullptr; 3467 bool isLocalPrivate() const { 3468 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3469 } 3470 }; 3471 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3472 } // anonymous namespace 3473 3474 static bool isAllocatableDecl(const VarDecl *VD) { 3475 const VarDecl *CVD = VD->getCanonicalDecl(); 3476 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3477 return false; 3478 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3479 // Use the default allocation. 3480 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3481 !AA->getAllocator()); 3482 } 3483 3484 static RecordDecl * 3485 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3486 if (!Privates.empty()) { 3487 ASTContext &C = CGM.getContext(); 3488 // Build struct .kmp_privates_t. { 3489 // /* private vars */ 3490 // }; 3491 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3492 RD->startDefinition(); 3493 for (const auto &Pair : Privates) { 3494 const VarDecl *VD = Pair.second.Original; 3495 QualType Type = VD->getType().getNonReferenceType(); 3496 // If the private variable is a local variable with lvalue ref type, 3497 // allocate the pointer instead of the pointee type. 3498 if (Pair.second.isLocalPrivate()) { 3499 if (VD->getType()->isLValueReferenceType()) 3500 Type = C.getPointerType(Type); 3501 if (isAllocatableDecl(VD)) 3502 Type = C.getPointerType(Type); 3503 } 3504 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3505 if (VD->hasAttrs()) { 3506 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3507 E(VD->getAttrs().end()); 3508 I != E; ++I) 3509 FD->addAttr(*I); 3510 } 3511 } 3512 RD->completeDefinition(); 3513 return RD; 3514 } 3515 return nullptr; 3516 } 3517 3518 static RecordDecl * 3519 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3520 QualType KmpInt32Ty, 3521 QualType KmpRoutineEntryPointerQTy) { 3522 ASTContext &C = CGM.getContext(); 3523 // Build struct kmp_task_t { 3524 // void * shareds; 3525 // kmp_routine_entry_t routine; 3526 // kmp_int32 part_id; 3527 // kmp_cmplrdata_t data1; 3528 // kmp_cmplrdata_t data2; 3529 // For taskloops additional fields: 3530 // kmp_uint64 lb; 3531 // kmp_uint64 ub; 3532 // kmp_int64 st; 3533 // kmp_int32 liter; 3534 // void * reductions; 3535 // }; 3536 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3537 UD->startDefinition(); 3538 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3539 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3540 UD->completeDefinition(); 3541 QualType KmpCmplrdataTy = C.getRecordType(UD); 3542 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3543 RD->startDefinition(); 3544 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3545 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3546 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3547 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3548 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3549 if (isOpenMPTaskLoopDirective(Kind)) { 3550 QualType KmpUInt64Ty = 3551 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3552 QualType KmpInt64Ty = 3553 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3554 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3555 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3556 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3557 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3558 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3559 } 3560 RD->completeDefinition(); 3561 return RD; 3562 } 3563 3564 static RecordDecl * 3565 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3566 ArrayRef<PrivateDataTy> Privates) { 3567 ASTContext &C = CGM.getContext(); 3568 // Build struct kmp_task_t_with_privates { 3569 // kmp_task_t task_data; 3570 // .kmp_privates_t. privates; 3571 // }; 3572 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3573 RD->startDefinition(); 3574 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3575 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3576 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3577 RD->completeDefinition(); 3578 return RD; 3579 } 3580 3581 /// Emit a proxy function which accepts kmp_task_t as the second 3582 /// argument. 3583 /// \code 3584 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3585 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3586 /// For taskloops: 3587 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3588 /// tt->reductions, tt->shareds); 3589 /// return 0; 3590 /// } 3591 /// \endcode 3592 static llvm::Function * 3593 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3594 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3595 QualType KmpTaskTWithPrivatesPtrQTy, 3596 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3597 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3598 llvm::Value *TaskPrivatesMap) { 3599 ASTContext &C = CGM.getContext(); 3600 FunctionArgList Args; 3601 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3602 ImplicitParamDecl::Other); 3603 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3604 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3605 ImplicitParamDecl::Other); 3606 Args.push_back(&GtidArg); 3607 Args.push_back(&TaskTypeArg); 3608 const auto &TaskEntryFnInfo = 3609 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3610 llvm::FunctionType *TaskEntryTy = 3611 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3612 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3613 auto *TaskEntry = llvm::Function::Create( 3614 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3615 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3616 TaskEntry->setDoesNotRecurse(); 3617 CodeGenFunction CGF(CGM); 3618 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3619 Loc, Loc); 3620 3621 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3622 // tt, 3623 // For taskloops: 3624 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3625 // tt->task_data.shareds); 3626 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3627 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3628 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3629 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3630 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3631 const auto *KmpTaskTWithPrivatesQTyRD = 3632 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3633 LValue Base = 3634 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3635 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3636 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3637 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3638 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3639 3640 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3641 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3642 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3643 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3644 CGF.ConvertTypeForMem(SharedsPtrTy)); 3645 3646 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3647 llvm::Value *PrivatesParam; 3648 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3649 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3650 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3651 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3652 } else { 3653 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3654 } 3655 3656 llvm::Value *CommonArgs[] = { 3657 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3658 CGF.Builder 3659 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3660 CGF.VoidPtrTy, CGF.Int8Ty) 3661 .getPointer()}; 3662 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3663 std::end(CommonArgs)); 3664 if (isOpenMPTaskLoopDirective(Kind)) { 3665 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3666 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3667 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3668 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3669 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3670 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3671 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3672 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3673 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3674 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3675 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3676 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3677 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3678 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3679 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3680 CallArgs.push_back(LBParam); 3681 CallArgs.push_back(UBParam); 3682 CallArgs.push_back(StParam); 3683 CallArgs.push_back(LIParam); 3684 CallArgs.push_back(RParam); 3685 } 3686 CallArgs.push_back(SharedsParam); 3687 3688 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3689 CallArgs); 3690 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3691 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3692 CGF.FinishFunction(); 3693 return TaskEntry; 3694 } 3695 3696 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3697 SourceLocation Loc, 3698 QualType KmpInt32Ty, 3699 QualType KmpTaskTWithPrivatesPtrQTy, 3700 QualType KmpTaskTWithPrivatesQTy) { 3701 ASTContext &C = CGM.getContext(); 3702 FunctionArgList Args; 3703 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3704 ImplicitParamDecl::Other); 3705 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3706 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3707 ImplicitParamDecl::Other); 3708 Args.push_back(&GtidArg); 3709 Args.push_back(&TaskTypeArg); 3710 const auto &DestructorFnInfo = 3711 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3712 llvm::FunctionType *DestructorFnTy = 3713 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3714 std::string Name = 3715 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3716 auto *DestructorFn = 3717 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3718 Name, &CGM.getModule()); 3719 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3720 DestructorFnInfo); 3721 DestructorFn->setDoesNotRecurse(); 3722 CodeGenFunction CGF(CGM); 3723 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3724 Args, Loc, Loc); 3725 3726 LValue Base = CGF.EmitLoadOfPointerLValue( 3727 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3728 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3729 const auto *KmpTaskTWithPrivatesQTyRD = 3730 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3731 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3732 Base = CGF.EmitLValueForField(Base, *FI); 3733 for (const auto *Field : 3734 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3735 if (QualType::DestructionKind DtorKind = 3736 Field->getType().isDestructedType()) { 3737 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3738 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3739 } 3740 } 3741 CGF.FinishFunction(); 3742 return DestructorFn; 3743 } 3744 3745 /// Emit a privates mapping function for correct handling of private and 3746 /// firstprivate variables. 3747 /// \code 3748 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3749 /// **noalias priv1,..., <tyn> **noalias privn) { 3750 /// *priv1 = &.privates.priv1; 3751 /// ...; 3752 /// *privn = &.privates.privn; 3753 /// } 3754 /// \endcode 3755 static llvm::Value * 3756 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3757 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3758 ArrayRef<PrivateDataTy> Privates) { 3759 ASTContext &C = CGM.getContext(); 3760 FunctionArgList Args; 3761 ImplicitParamDecl TaskPrivatesArg( 3762 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3763 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3764 ImplicitParamDecl::Other); 3765 Args.push_back(&TaskPrivatesArg); 3766 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3767 unsigned Counter = 1; 3768 for (const Expr *E : Data.PrivateVars) { 3769 Args.push_back(ImplicitParamDecl::Create( 3770 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3771 C.getPointerType(C.getPointerType(E->getType())) 3772 .withConst() 3773 .withRestrict(), 3774 ImplicitParamDecl::Other)); 3775 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3776 PrivateVarsPos[VD] = Counter; 3777 ++Counter; 3778 } 3779 for (const Expr *E : Data.FirstprivateVars) { 3780 Args.push_back(ImplicitParamDecl::Create( 3781 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3782 C.getPointerType(C.getPointerType(E->getType())) 3783 .withConst() 3784 .withRestrict(), 3785 ImplicitParamDecl::Other)); 3786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3787 PrivateVarsPos[VD] = Counter; 3788 ++Counter; 3789 } 3790 for (const Expr *E : Data.LastprivateVars) { 3791 Args.push_back(ImplicitParamDecl::Create( 3792 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3793 C.getPointerType(C.getPointerType(E->getType())) 3794 .withConst() 3795 .withRestrict(), 3796 ImplicitParamDecl::Other)); 3797 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3798 PrivateVarsPos[VD] = Counter; 3799 ++Counter; 3800 } 3801 for (const VarDecl *VD : Data.PrivateLocals) { 3802 QualType Ty = VD->getType().getNonReferenceType(); 3803 if (VD->getType()->isLValueReferenceType()) 3804 Ty = C.getPointerType(Ty); 3805 if (isAllocatableDecl(VD)) 3806 Ty = C.getPointerType(Ty); 3807 Args.push_back(ImplicitParamDecl::Create( 3808 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3809 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3810 ImplicitParamDecl::Other)); 3811 PrivateVarsPos[VD] = Counter; 3812 ++Counter; 3813 } 3814 const auto &TaskPrivatesMapFnInfo = 3815 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3816 llvm::FunctionType *TaskPrivatesMapTy = 3817 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3818 std::string Name = 3819 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3820 auto *TaskPrivatesMap = llvm::Function::Create( 3821 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3822 &CGM.getModule()); 3823 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3824 TaskPrivatesMapFnInfo); 3825 if (CGM.getLangOpts().Optimize) { 3826 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3827 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3828 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3829 } 3830 CodeGenFunction CGF(CGM); 3831 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3832 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3833 3834 // *privi = &.privates.privi; 3835 LValue Base = CGF.EmitLoadOfPointerLValue( 3836 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3837 TaskPrivatesArg.getType()->castAs<PointerType>()); 3838 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3839 Counter = 0; 3840 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3841 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3842 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3843 LValue RefLVal = 3844 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3845 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3846 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3847 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3848 ++Counter; 3849 } 3850 CGF.FinishFunction(); 3851 return TaskPrivatesMap; 3852 } 3853 3854 /// Emit initialization for private variables in task-based directives. 3855 static void emitPrivatesInit(CodeGenFunction &CGF, 3856 const OMPExecutableDirective &D, 3857 Address KmpTaskSharedsPtr, LValue TDBase, 3858 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3859 QualType SharedsTy, QualType SharedsPtrTy, 3860 const OMPTaskDataTy &Data, 3861 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3862 ASTContext &C = CGF.getContext(); 3863 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3864 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3865 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3866 ? OMPD_taskloop 3867 : OMPD_task; 3868 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3869 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3870 LValue SrcBase; 3871 bool IsTargetTask = 3872 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3873 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3874 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3875 // PointersArray, SizesArray, and MappersArray. The original variables for 3876 // these arrays are not captured and we get their addresses explicitly. 3877 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3878 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3879 SrcBase = CGF.MakeAddrLValue( 3880 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3881 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3882 CGF.ConvertTypeForMem(SharedsTy)), 3883 SharedsTy); 3884 } 3885 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3886 for (const PrivateDataTy &Pair : Privates) { 3887 // Do not initialize private locals. 3888 if (Pair.second.isLocalPrivate()) { 3889 ++FI; 3890 continue; 3891 } 3892 const VarDecl *VD = Pair.second.PrivateCopy; 3893 const Expr *Init = VD->getAnyInitializer(); 3894 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3895 !CGF.isTrivialInitializer(Init)))) { 3896 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3897 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3898 const VarDecl *OriginalVD = Pair.second.Original; 3899 // Check if the variable is the target-based BasePointersArray, 3900 // PointersArray, SizesArray, or MappersArray. 3901 LValue SharedRefLValue; 3902 QualType Type = PrivateLValue.getType(); 3903 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3904 if (IsTargetTask && !SharedField) { 3905 assert(isa<ImplicitParamDecl>(OriginalVD) && 3906 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3907 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3908 ->getNumParams() == 0 && 3909 isa<TranslationUnitDecl>( 3910 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3911 ->getDeclContext()) && 3912 "Expected artificial target data variable."); 3913 SharedRefLValue = 3914 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3915 } else if (ForDup) { 3916 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3917 SharedRefLValue = CGF.MakeAddrLValue( 3918 SharedRefLValue.getAddress(CGF).withAlignment( 3919 C.getDeclAlign(OriginalVD)), 3920 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3921 SharedRefLValue.getTBAAInfo()); 3922 } else if (CGF.LambdaCaptureFields.count( 3923 Pair.second.Original->getCanonicalDecl()) > 0 || 3924 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3925 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3926 } else { 3927 // Processing for implicitly captured variables. 3928 InlinedOpenMPRegionRAII Region( 3929 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3930 /*HasCancel=*/false, /*NoInheritance=*/true); 3931 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3932 } 3933 if (Type->isArrayType()) { 3934 // Initialize firstprivate array. 3935 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3936 // Perform simple memcpy. 3937 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3938 } else { 3939 // Initialize firstprivate array using element-by-element 3940 // initialization. 3941 CGF.EmitOMPAggregateAssign( 3942 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3943 Type, 3944 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3945 Address SrcElement) { 3946 // Clean up any temporaries needed by the initialization. 3947 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3948 InitScope.addPrivate(Elem, SrcElement); 3949 (void)InitScope.Privatize(); 3950 // Emit initialization for single element. 3951 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3952 CGF, &CapturesInfo); 3953 CGF.EmitAnyExprToMem(Init, DestElement, 3954 Init->getType().getQualifiers(), 3955 /*IsInitializer=*/false); 3956 }); 3957 } 3958 } else { 3959 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3960 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3961 (void)InitScope.Privatize(); 3962 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3963 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3964 /*capturedByInit=*/false); 3965 } 3966 } else { 3967 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3968 } 3969 } 3970 ++FI; 3971 } 3972 } 3973 3974 /// Check if duplication function is required for taskloops. 3975 static bool checkInitIsRequired(CodeGenFunction &CGF, 3976 ArrayRef<PrivateDataTy> Privates) { 3977 bool InitRequired = false; 3978 for (const PrivateDataTy &Pair : Privates) { 3979 if (Pair.second.isLocalPrivate()) 3980 continue; 3981 const VarDecl *VD = Pair.second.PrivateCopy; 3982 const Expr *Init = VD->getAnyInitializer(); 3983 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3984 !CGF.isTrivialInitializer(Init)); 3985 if (InitRequired) 3986 break; 3987 } 3988 return InitRequired; 3989 } 3990 3991 3992 /// Emit task_dup function (for initialization of 3993 /// private/firstprivate/lastprivate vars and last_iter flag) 3994 /// \code 3995 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3996 /// lastpriv) { 3997 /// // setup lastprivate flag 3998 /// task_dst->last = lastpriv; 3999 /// // could be constructor calls here... 4000 /// } 4001 /// \endcode 4002 static llvm::Value * 4003 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4004 const OMPExecutableDirective &D, 4005 QualType KmpTaskTWithPrivatesPtrQTy, 4006 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4007 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4008 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4009 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4010 ASTContext &C = CGM.getContext(); 4011 FunctionArgList Args; 4012 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4013 KmpTaskTWithPrivatesPtrQTy, 4014 ImplicitParamDecl::Other); 4015 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4016 KmpTaskTWithPrivatesPtrQTy, 4017 ImplicitParamDecl::Other); 4018 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4019 ImplicitParamDecl::Other); 4020 Args.push_back(&DstArg); 4021 Args.push_back(&SrcArg); 4022 Args.push_back(&LastprivArg); 4023 const auto &TaskDupFnInfo = 4024 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4025 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4026 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4027 auto *TaskDup = llvm::Function::Create( 4028 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4029 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4030 TaskDup->setDoesNotRecurse(); 4031 CodeGenFunction CGF(CGM); 4032 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4033 Loc); 4034 4035 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4036 CGF.GetAddrOfLocalVar(&DstArg), 4037 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4038 // task_dst->liter = lastpriv; 4039 if (WithLastIter) { 4040 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4041 LValue Base = CGF.EmitLValueForField( 4042 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4043 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4044 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4045 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4046 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4047 } 4048 4049 // Emit initial values for private copies (if any). 4050 assert(!Privates.empty()); 4051 Address KmpTaskSharedsPtr = Address::invalid(); 4052 if (!Data.FirstprivateVars.empty()) { 4053 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4054 CGF.GetAddrOfLocalVar(&SrcArg), 4055 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4056 LValue Base = CGF.EmitLValueForField( 4057 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4058 KmpTaskSharedsPtr = Address::deprecated( 4059 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4060 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4061 KmpTaskTShareds)), 4062 Loc), 4063 CGM.getNaturalTypeAlignment(SharedsTy)); 4064 } 4065 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4066 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4067 CGF.FinishFunction(); 4068 return TaskDup; 4069 } 4070 4071 /// Checks if destructor function is required to be generated. 4072 /// \return true if cleanups are required, false otherwise. 4073 static bool 4074 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4075 ArrayRef<PrivateDataTy> Privates) { 4076 for (const PrivateDataTy &P : Privates) { 4077 if (P.second.isLocalPrivate()) 4078 continue; 4079 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4080 if (Ty.isDestructedType()) 4081 return true; 4082 } 4083 return false; 4084 } 4085 4086 namespace { 4087 /// Loop generator for OpenMP iterator expression. 4088 class OMPIteratorGeneratorScope final 4089 : public CodeGenFunction::OMPPrivateScope { 4090 CodeGenFunction &CGF; 4091 const OMPIteratorExpr *E = nullptr; 4092 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4093 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4094 OMPIteratorGeneratorScope() = delete; 4095 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4096 4097 public: 4098 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4099 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4100 if (!E) 4101 return; 4102 SmallVector<llvm::Value *, 4> Uppers; 4103 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4104 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4105 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4106 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 4107 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4108 addPrivate( 4109 HelperData.CounterVD, 4110 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 4111 } 4112 Privatize(); 4113 4114 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4115 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4116 LValue CLVal = 4117 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4118 HelperData.CounterVD->getType()); 4119 // Counter = 0; 4120 CGF.EmitStoreOfScalar( 4121 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4122 CLVal); 4123 CodeGenFunction::JumpDest &ContDest = 4124 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4125 CodeGenFunction::JumpDest &ExitDest = 4126 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4127 // N = <number-of_iterations>; 4128 llvm::Value *N = Uppers[I]; 4129 // cont: 4130 // if (Counter < N) goto body; else goto exit; 4131 CGF.EmitBlock(ContDest.getBlock()); 4132 auto *CVal = 4133 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4134 llvm::Value *Cmp = 4135 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4136 ? CGF.Builder.CreateICmpSLT(CVal, N) 4137 : CGF.Builder.CreateICmpULT(CVal, N); 4138 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4139 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4140 // body: 4141 CGF.EmitBlock(BodyBB); 4142 // Iteri = Begini + Counter * Stepi; 4143 CGF.EmitIgnoredExpr(HelperData.Update); 4144 } 4145 } 4146 ~OMPIteratorGeneratorScope() { 4147 if (!E) 4148 return; 4149 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4150 // Counter = Counter + 1; 4151 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4152 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4153 // goto cont; 4154 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4155 // exit: 4156 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4157 } 4158 } 4159 }; 4160 } // namespace 4161 4162 static std::pair<llvm::Value *, llvm::Value *> 4163 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4164 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4165 llvm::Value *Addr; 4166 if (OASE) { 4167 const Expr *Base = OASE->getBase(); 4168 Addr = CGF.EmitScalarExpr(Base); 4169 } else { 4170 Addr = CGF.EmitLValue(E).getPointer(CGF); 4171 } 4172 llvm::Value *SizeVal; 4173 QualType Ty = E->getType(); 4174 if (OASE) { 4175 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4176 for (const Expr *SE : OASE->getDimensions()) { 4177 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4178 Sz = CGF.EmitScalarConversion( 4179 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4180 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4181 } 4182 } else if (const auto *ASE = 4183 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4184 LValue UpAddrLVal = 4185 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4186 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4187 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4188 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4189 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4190 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4191 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4192 } else { 4193 SizeVal = CGF.getTypeSize(Ty); 4194 } 4195 return std::make_pair(Addr, SizeVal); 4196 } 4197 4198 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4199 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4200 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4201 if (KmpTaskAffinityInfoTy.isNull()) { 4202 RecordDecl *KmpAffinityInfoRD = 4203 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4204 KmpAffinityInfoRD->startDefinition(); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4206 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4207 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4208 KmpAffinityInfoRD->completeDefinition(); 4209 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4210 } 4211 } 4212 4213 CGOpenMPRuntime::TaskResultTy 4214 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4215 const OMPExecutableDirective &D, 4216 llvm::Function *TaskFunction, QualType SharedsTy, 4217 Address Shareds, const OMPTaskDataTy &Data) { 4218 ASTContext &C = CGM.getContext(); 4219 llvm::SmallVector<PrivateDataTy, 4> Privates; 4220 // Aggregate privates and sort them by the alignment. 4221 const auto *I = Data.PrivateCopies.begin(); 4222 for (const Expr *E : Data.PrivateVars) { 4223 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4224 Privates.emplace_back( 4225 C.getDeclAlign(VD), 4226 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4227 /*PrivateElemInit=*/nullptr)); 4228 ++I; 4229 } 4230 I = Data.FirstprivateCopies.begin(); 4231 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4232 for (const Expr *E : Data.FirstprivateVars) { 4233 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4234 Privates.emplace_back( 4235 C.getDeclAlign(VD), 4236 PrivateHelpersTy( 4237 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4238 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4239 ++I; 4240 ++IElemInitRef; 4241 } 4242 I = Data.LastprivateCopies.begin(); 4243 for (const Expr *E : Data.LastprivateVars) { 4244 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4245 Privates.emplace_back( 4246 C.getDeclAlign(VD), 4247 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4248 /*PrivateElemInit=*/nullptr)); 4249 ++I; 4250 } 4251 for (const VarDecl *VD : Data.PrivateLocals) { 4252 if (isAllocatableDecl(VD)) 4253 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4254 else 4255 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4256 } 4257 llvm::stable_sort(Privates, 4258 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4259 return L.first > R.first; 4260 }); 4261 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4262 // Build type kmp_routine_entry_t (if not built yet). 4263 emitKmpRoutineEntryT(KmpInt32Ty); 4264 // Build type kmp_task_t (if not built yet). 4265 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4266 if (SavedKmpTaskloopTQTy.isNull()) { 4267 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4268 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4269 } 4270 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4271 } else { 4272 assert((D.getDirectiveKind() == OMPD_task || 4273 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4274 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4275 "Expected taskloop, task or target directive"); 4276 if (SavedKmpTaskTQTy.isNull()) { 4277 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4278 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4279 } 4280 KmpTaskTQTy = SavedKmpTaskTQTy; 4281 } 4282 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4283 // Build particular struct kmp_task_t for the given task. 4284 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4285 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4286 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4287 QualType KmpTaskTWithPrivatesPtrQTy = 4288 C.getPointerType(KmpTaskTWithPrivatesQTy); 4289 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4290 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4291 KmpTaskTWithPrivatesTy->getPointerTo(); 4292 llvm::Value *KmpTaskTWithPrivatesTySize = 4293 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4294 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4295 4296 // Emit initial values for private copies (if any). 4297 llvm::Value *TaskPrivatesMap = nullptr; 4298 llvm::Type *TaskPrivatesMapTy = 4299 std::next(TaskFunction->arg_begin(), 3)->getType(); 4300 if (!Privates.empty()) { 4301 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4302 TaskPrivatesMap = 4303 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4304 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4305 TaskPrivatesMap, TaskPrivatesMapTy); 4306 } else { 4307 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4308 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4309 } 4310 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4311 // kmp_task_t *tt); 4312 llvm::Function *TaskEntry = emitProxyTaskFunction( 4313 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4314 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4315 TaskPrivatesMap); 4316 4317 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4318 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4319 // kmp_routine_entry_t *task_entry); 4320 // Task flags. Format is taken from 4321 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4322 // description of kmp_tasking_flags struct. 4323 enum { 4324 TiedFlag = 0x1, 4325 FinalFlag = 0x2, 4326 DestructorsFlag = 0x8, 4327 PriorityFlag = 0x20, 4328 DetachableFlag = 0x40, 4329 }; 4330 unsigned Flags = Data.Tied ? TiedFlag : 0; 4331 bool NeedsCleanup = false; 4332 if (!Privates.empty()) { 4333 NeedsCleanup = 4334 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4335 if (NeedsCleanup) 4336 Flags = Flags | DestructorsFlag; 4337 } 4338 if (Data.Priority.getInt()) 4339 Flags = Flags | PriorityFlag; 4340 if (D.hasClausesOfKind<OMPDetachClause>()) 4341 Flags = Flags | DetachableFlag; 4342 llvm::Value *TaskFlags = 4343 Data.Final.getPointer() 4344 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4345 CGF.Builder.getInt32(FinalFlag), 4346 CGF.Builder.getInt32(/*C=*/0)) 4347 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4348 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4349 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4350 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4351 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4352 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4353 TaskEntry, KmpRoutineEntryPtrTy)}; 4354 llvm::Value *NewTask; 4355 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4356 // Check if we have any device clause associated with the directive. 4357 const Expr *Device = nullptr; 4358 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4359 Device = C->getDevice(); 4360 // Emit device ID if any otherwise use default value. 4361 llvm::Value *DeviceID; 4362 if (Device) 4363 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4364 CGF.Int64Ty, /*isSigned=*/true); 4365 else 4366 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4367 AllocArgs.push_back(DeviceID); 4368 NewTask = CGF.EmitRuntimeCall( 4369 OMPBuilder.getOrCreateRuntimeFunction( 4370 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4371 AllocArgs); 4372 } else { 4373 NewTask = 4374 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4375 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4376 AllocArgs); 4377 } 4378 // Emit detach clause initialization. 4379 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4380 // task_descriptor); 4381 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4382 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4383 LValue EvtLVal = CGF.EmitLValue(Evt); 4384 4385 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4386 // int gtid, kmp_task_t *task); 4387 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4388 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4389 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4390 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4391 OMPBuilder.getOrCreateRuntimeFunction( 4392 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4393 {Loc, Tid, NewTask}); 4394 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4395 Evt->getExprLoc()); 4396 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4397 } 4398 // Process affinity clauses. 4399 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4400 // Process list of affinity data. 4401 ASTContext &C = CGM.getContext(); 4402 Address AffinitiesArray = Address::invalid(); 4403 // Calculate number of elements to form the array of affinity data. 4404 llvm::Value *NumOfElements = nullptr; 4405 unsigned NumAffinities = 0; 4406 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4407 if (const Expr *Modifier = C->getModifier()) { 4408 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4409 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4410 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4411 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4412 NumOfElements = 4413 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4414 } 4415 } else { 4416 NumAffinities += C->varlist_size(); 4417 } 4418 } 4419 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4420 // Fields ids in kmp_task_affinity_info record. 4421 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4422 4423 QualType KmpTaskAffinityInfoArrayTy; 4424 if (NumOfElements) { 4425 NumOfElements = CGF.Builder.CreateNUWAdd( 4426 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4427 auto *OVE = new (C) OpaqueValueExpr( 4428 Loc, 4429 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4430 VK_PRValue); 4431 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4432 RValue::get(NumOfElements)); 4433 KmpTaskAffinityInfoArrayTy = 4434 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4435 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4436 // Properly emit variable-sized array. 4437 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4438 ImplicitParamDecl::Other); 4439 CGF.EmitVarDecl(*PD); 4440 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4441 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4442 /*isSigned=*/false); 4443 } else { 4444 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4445 KmpTaskAffinityInfoTy, 4446 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4447 ArrayType::Normal, /*IndexTypeQuals=*/0); 4448 AffinitiesArray = 4449 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4450 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4451 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4452 /*isSigned=*/false); 4453 } 4454 4455 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4456 // Fill array by elements without iterators. 4457 unsigned Pos = 0; 4458 bool HasIterator = false; 4459 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4460 if (C->getModifier()) { 4461 HasIterator = true; 4462 continue; 4463 } 4464 for (const Expr *E : C->varlists()) { 4465 llvm::Value *Addr; 4466 llvm::Value *Size; 4467 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4468 LValue Base = 4469 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4470 KmpTaskAffinityInfoTy); 4471 // affs[i].base_addr = &<Affinities[i].second>; 4472 LValue BaseAddrLVal = CGF.EmitLValueForField( 4473 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4474 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4475 BaseAddrLVal); 4476 // affs[i].len = sizeof(<Affinities[i].second>); 4477 LValue LenLVal = CGF.EmitLValueForField( 4478 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4479 CGF.EmitStoreOfScalar(Size, LenLVal); 4480 ++Pos; 4481 } 4482 } 4483 LValue PosLVal; 4484 if (HasIterator) { 4485 PosLVal = CGF.MakeAddrLValue( 4486 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4487 C.getSizeType()); 4488 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4489 } 4490 // Process elements with iterators. 4491 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4492 const Expr *Modifier = C->getModifier(); 4493 if (!Modifier) 4494 continue; 4495 OMPIteratorGeneratorScope IteratorScope( 4496 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4497 for (const Expr *E : C->varlists()) { 4498 llvm::Value *Addr; 4499 llvm::Value *Size; 4500 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4501 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4502 LValue Base = CGF.MakeAddrLValue( 4503 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4504 // affs[i].base_addr = &<Affinities[i].second>; 4505 LValue BaseAddrLVal = CGF.EmitLValueForField( 4506 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4507 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4508 BaseAddrLVal); 4509 // affs[i].len = sizeof(<Affinities[i].second>); 4510 LValue LenLVal = CGF.EmitLValueForField( 4511 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4512 CGF.EmitStoreOfScalar(Size, LenLVal); 4513 Idx = CGF.Builder.CreateNUWAdd( 4514 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4515 CGF.EmitStoreOfScalar(Idx, PosLVal); 4516 } 4517 } 4518 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4519 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4520 // naffins, kmp_task_affinity_info_t *affin_list); 4521 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4522 llvm::Value *GTid = getThreadID(CGF, Loc); 4523 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4524 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4525 // FIXME: Emit the function and ignore its result for now unless the 4526 // runtime function is properly implemented. 4527 (void)CGF.EmitRuntimeCall( 4528 OMPBuilder.getOrCreateRuntimeFunction( 4529 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4530 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4531 } 4532 llvm::Value *NewTaskNewTaskTTy = 4533 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4534 NewTask, KmpTaskTWithPrivatesPtrTy); 4535 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4536 KmpTaskTWithPrivatesQTy); 4537 LValue TDBase = 4538 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4539 // Fill the data in the resulting kmp_task_t record. 4540 // Copy shareds if there are any. 4541 Address KmpTaskSharedsPtr = Address::invalid(); 4542 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4543 KmpTaskSharedsPtr = Address::deprecated( 4544 CGF.EmitLoadOfScalar( 4545 CGF.EmitLValueForField( 4546 TDBase, 4547 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4548 Loc), 4549 CGM.getNaturalTypeAlignment(SharedsTy)); 4550 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4551 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4552 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4553 } 4554 // Emit initial values for private copies (if any). 4555 TaskResultTy Result; 4556 if (!Privates.empty()) { 4557 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4558 SharedsTy, SharedsPtrTy, Data, Privates, 4559 /*ForDup=*/false); 4560 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4561 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4562 Result.TaskDupFn = emitTaskDupFunction( 4563 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4564 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4565 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4566 } 4567 } 4568 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4569 enum { Priority = 0, Destructors = 1 }; 4570 // Provide pointer to function with destructors for privates. 4571 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4572 const RecordDecl *KmpCmplrdataUD = 4573 (*FI)->getType()->getAsUnionType()->getDecl(); 4574 if (NeedsCleanup) { 4575 llvm::Value *DestructorFn = emitDestructorsFunction( 4576 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4577 KmpTaskTWithPrivatesQTy); 4578 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4579 LValue DestructorsLV = CGF.EmitLValueForField( 4580 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4581 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4582 DestructorFn, KmpRoutineEntryPtrTy), 4583 DestructorsLV); 4584 } 4585 // Set priority. 4586 if (Data.Priority.getInt()) { 4587 LValue Data2LV = CGF.EmitLValueForField( 4588 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4589 LValue PriorityLV = CGF.EmitLValueForField( 4590 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4591 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4592 } 4593 Result.NewTask = NewTask; 4594 Result.TaskEntry = TaskEntry; 4595 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4596 Result.TDBase = TDBase; 4597 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4598 return Result; 4599 } 4600 4601 namespace { 4602 /// Dependence kind for RTL. 4603 enum RTLDependenceKindTy { 4604 DepIn = 0x01, 4605 DepInOut = 0x3, 4606 DepMutexInOutSet = 0x4, 4607 DepInOutSet = 0x8 4608 }; 4609 /// Fields ids in kmp_depend_info record. 4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4611 } // namespace 4612 4613 /// Translates internal dependency kind into the runtime kind. 4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4615 RTLDependenceKindTy DepKind; 4616 switch (K) { 4617 case OMPC_DEPEND_in: 4618 DepKind = DepIn; 4619 break; 4620 // Out and InOut dependencies must use the same code. 4621 case OMPC_DEPEND_out: 4622 case OMPC_DEPEND_inout: 4623 DepKind = DepInOut; 4624 break; 4625 case OMPC_DEPEND_mutexinoutset: 4626 DepKind = DepMutexInOutSet; 4627 break; 4628 case OMPC_DEPEND_inoutset: 4629 DepKind = DepInOutSet; 4630 break; 4631 case OMPC_DEPEND_source: 4632 case OMPC_DEPEND_sink: 4633 case OMPC_DEPEND_depobj: 4634 case OMPC_DEPEND_unknown: 4635 llvm_unreachable("Unknown task dependence type"); 4636 } 4637 return DepKind; 4638 } 4639 4640 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4641 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4642 QualType &FlagsTy) { 4643 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4644 if (KmpDependInfoTy.isNull()) { 4645 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4646 KmpDependInfoRD->startDefinition(); 4647 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4648 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4649 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4650 KmpDependInfoRD->completeDefinition(); 4651 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4652 } 4653 } 4654 4655 std::pair<llvm::Value *, LValue> 4656 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4657 SourceLocation Loc) { 4658 ASTContext &C = CGM.getContext(); 4659 QualType FlagsTy; 4660 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4661 RecordDecl *KmpDependInfoRD = 4662 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4663 LValue Base = CGF.EmitLoadOfPointerLValue( 4664 DepobjLVal.getAddress(CGF), 4665 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4666 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4667 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4668 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4669 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4670 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4671 Base.getTBAAInfo()); 4672 Address DepObjAddr = CGF.Builder.CreateGEP( 4673 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4674 LValue NumDepsBase = CGF.MakeAddrLValue( 4675 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4676 // NumDeps = deps[i].base_addr; 4677 LValue BaseAddrLVal = CGF.EmitLValueForField( 4678 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4679 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4680 return std::make_pair(NumDeps, Base); 4681 } 4682 4683 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4684 llvm::PointerUnion<unsigned *, LValue *> Pos, 4685 const OMPTaskDataTy::DependData &Data, 4686 Address DependenciesArray) { 4687 CodeGenModule &CGM = CGF.CGM; 4688 ASTContext &C = CGM.getContext(); 4689 QualType FlagsTy; 4690 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4691 RecordDecl *KmpDependInfoRD = 4692 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4693 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4694 4695 OMPIteratorGeneratorScope IteratorScope( 4696 CGF, cast_or_null<OMPIteratorExpr>( 4697 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4698 : nullptr)); 4699 for (const Expr *E : Data.DepExprs) { 4700 llvm::Value *Addr; 4701 llvm::Value *Size; 4702 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4703 LValue Base; 4704 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4705 Base = CGF.MakeAddrLValue( 4706 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4707 } else { 4708 LValue &PosLVal = *Pos.get<LValue *>(); 4709 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4710 Base = CGF.MakeAddrLValue( 4711 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4712 } 4713 // deps[i].base_addr = &<Dependencies[i].second>; 4714 LValue BaseAddrLVal = CGF.EmitLValueForField( 4715 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4716 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4717 BaseAddrLVal); 4718 // deps[i].len = sizeof(<Dependencies[i].second>); 4719 LValue LenLVal = CGF.EmitLValueForField( 4720 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4721 CGF.EmitStoreOfScalar(Size, LenLVal); 4722 // deps[i].flags = <Dependencies[i].first>; 4723 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4724 LValue FlagsLVal = CGF.EmitLValueForField( 4725 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4726 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4727 FlagsLVal); 4728 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4729 ++(*P); 4730 } else { 4731 LValue &PosLVal = *Pos.get<LValue *>(); 4732 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4733 Idx = CGF.Builder.CreateNUWAdd(Idx, 4734 llvm::ConstantInt::get(Idx->getType(), 1)); 4735 CGF.EmitStoreOfScalar(Idx, PosLVal); 4736 } 4737 } 4738 } 4739 4740 static SmallVector<llvm::Value *, 4> 4741 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4742 const OMPTaskDataTy::DependData &Data) { 4743 assert(Data.DepKind == OMPC_DEPEND_depobj && 4744 "Expected depobj dependecy kind."); 4745 SmallVector<llvm::Value *, 4> Sizes; 4746 SmallVector<LValue, 4> SizeLVals; 4747 ASTContext &C = CGF.getContext(); 4748 QualType FlagsTy; 4749 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4750 RecordDecl *KmpDependInfoRD = 4751 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4752 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4753 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4754 { 4755 OMPIteratorGeneratorScope IteratorScope( 4756 CGF, cast_or_null<OMPIteratorExpr>( 4757 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4758 : nullptr)); 4759 for (const Expr *E : Data.DepExprs) { 4760 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4761 LValue Base = CGF.EmitLoadOfPointerLValue( 4762 DepobjLVal.getAddress(CGF), 4763 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4764 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4765 Base.getAddress(CGF), KmpDependInfoPtrT, 4766 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4767 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4768 Base.getTBAAInfo()); 4769 Address DepObjAddr = CGF.Builder.CreateGEP( 4770 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4771 LValue NumDepsBase = CGF.MakeAddrLValue( 4772 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4773 // NumDeps = deps[i].base_addr; 4774 LValue BaseAddrLVal = CGF.EmitLValueForField( 4775 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4776 llvm::Value *NumDeps = 4777 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4778 LValue NumLVal = CGF.MakeAddrLValue( 4779 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4780 C.getUIntPtrType()); 4781 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4782 NumLVal.getAddress(CGF)); 4783 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4784 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4785 CGF.EmitStoreOfScalar(Add, NumLVal); 4786 SizeLVals.push_back(NumLVal); 4787 } 4788 } 4789 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4790 llvm::Value *Size = 4791 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4792 Sizes.push_back(Size); 4793 } 4794 return Sizes; 4795 } 4796 4797 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4798 LValue PosLVal, 4799 const OMPTaskDataTy::DependData &Data, 4800 Address DependenciesArray) { 4801 assert(Data.DepKind == OMPC_DEPEND_depobj && 4802 "Expected depobj dependecy kind."); 4803 ASTContext &C = CGF.getContext(); 4804 QualType FlagsTy; 4805 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4806 RecordDecl *KmpDependInfoRD = 4807 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4808 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4809 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4810 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4811 { 4812 OMPIteratorGeneratorScope IteratorScope( 4813 CGF, cast_or_null<OMPIteratorExpr>( 4814 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4815 : nullptr)); 4816 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4817 const Expr *E = Data.DepExprs[I]; 4818 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4819 LValue Base = CGF.EmitLoadOfPointerLValue( 4820 DepobjLVal.getAddress(CGF), 4821 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4822 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4823 Base.getAddress(CGF), KmpDependInfoPtrT, 4824 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4825 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4826 Base.getTBAAInfo()); 4827 4828 // Get number of elements in a single depobj. 4829 Address DepObjAddr = CGF.Builder.CreateGEP( 4830 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4831 LValue NumDepsBase = CGF.MakeAddrLValue( 4832 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4833 // NumDeps = deps[i].base_addr; 4834 LValue BaseAddrLVal = CGF.EmitLValueForField( 4835 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4836 llvm::Value *NumDeps = 4837 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4838 4839 // memcopy dependency data. 4840 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4841 ElSize, 4842 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4843 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4844 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4845 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4846 4847 // Increase pos. 4848 // pos += size; 4849 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4850 CGF.EmitStoreOfScalar(Add, PosLVal); 4851 } 4852 } 4853 } 4854 4855 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4856 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4857 SourceLocation Loc) { 4858 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4859 return D.DepExprs.empty(); 4860 })) 4861 return std::make_pair(nullptr, Address::invalid()); 4862 // Process list of dependencies. 4863 ASTContext &C = CGM.getContext(); 4864 Address DependenciesArray = Address::invalid(); 4865 llvm::Value *NumOfElements = nullptr; 4866 unsigned NumDependencies = std::accumulate( 4867 Dependencies.begin(), Dependencies.end(), 0, 4868 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4869 return D.DepKind == OMPC_DEPEND_depobj 4870 ? V 4871 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4872 }); 4873 QualType FlagsTy; 4874 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4875 bool HasDepobjDeps = false; 4876 bool HasRegularWithIterators = false; 4877 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4878 llvm::Value *NumOfRegularWithIterators = 4879 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4880 // Calculate number of depobj dependecies and regular deps with the iterators. 4881 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4882 if (D.DepKind == OMPC_DEPEND_depobj) { 4883 SmallVector<llvm::Value *, 4> Sizes = 4884 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4885 for (llvm::Value *Size : Sizes) { 4886 NumOfDepobjElements = 4887 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4888 } 4889 HasDepobjDeps = true; 4890 continue; 4891 } 4892 // Include number of iterations, if any. 4893 4894 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4895 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4896 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4897 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4898 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4899 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4900 NumOfRegularWithIterators = 4901 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4902 } 4903 HasRegularWithIterators = true; 4904 continue; 4905 } 4906 } 4907 4908 QualType KmpDependInfoArrayTy; 4909 if (HasDepobjDeps || HasRegularWithIterators) { 4910 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4911 /*isSigned=*/false); 4912 if (HasDepobjDeps) { 4913 NumOfElements = 4914 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4915 } 4916 if (HasRegularWithIterators) { 4917 NumOfElements = 4918 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4919 } 4920 auto *OVE = new (C) OpaqueValueExpr( 4921 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4922 VK_PRValue); 4923 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4924 RValue::get(NumOfElements)); 4925 KmpDependInfoArrayTy = 4926 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4927 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4928 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4929 // Properly emit variable-sized array. 4930 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4931 ImplicitParamDecl::Other); 4932 CGF.EmitVarDecl(*PD); 4933 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4934 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4935 /*isSigned=*/false); 4936 } else { 4937 KmpDependInfoArrayTy = C.getConstantArrayType( 4938 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4939 ArrayType::Normal, /*IndexTypeQuals=*/0); 4940 DependenciesArray = 4941 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4942 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4943 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4944 /*isSigned=*/false); 4945 } 4946 unsigned Pos = 0; 4947 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4948 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4949 Dependencies[I].IteratorExpr) 4950 continue; 4951 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4952 DependenciesArray); 4953 } 4954 // Copy regular dependecies with iterators. 4955 LValue PosLVal = CGF.MakeAddrLValue( 4956 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4957 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4958 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4959 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4960 !Dependencies[I].IteratorExpr) 4961 continue; 4962 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4963 DependenciesArray); 4964 } 4965 // Copy final depobj arrays without iterators. 4966 if (HasDepobjDeps) { 4967 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4968 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4969 continue; 4970 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4971 DependenciesArray); 4972 } 4973 } 4974 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4975 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4976 return std::make_pair(NumOfElements, DependenciesArray); 4977 } 4978 4979 Address CGOpenMPRuntime::emitDepobjDependClause( 4980 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4981 SourceLocation Loc) { 4982 if (Dependencies.DepExprs.empty()) 4983 return Address::invalid(); 4984 // Process list of dependencies. 4985 ASTContext &C = CGM.getContext(); 4986 Address DependenciesArray = Address::invalid(); 4987 unsigned NumDependencies = Dependencies.DepExprs.size(); 4988 QualType FlagsTy; 4989 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4990 RecordDecl *KmpDependInfoRD = 4991 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4992 4993 llvm::Value *Size; 4994 // Define type kmp_depend_info[<Dependencies.size()>]; 4995 // For depobj reserve one extra element to store the number of elements. 4996 // It is required to handle depobj(x) update(in) construct. 4997 // kmp_depend_info[<Dependencies.size()>] deps; 4998 llvm::Value *NumDepsVal; 4999 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5000 if (const auto *IE = 5001 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5002 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5003 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5004 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5005 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5006 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5007 } 5008 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5009 NumDepsVal); 5010 CharUnits SizeInBytes = 5011 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5012 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5013 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5014 NumDepsVal = 5015 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5016 } else { 5017 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5018 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5019 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5020 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5021 Size = CGM.getSize(Sz.alignTo(Align)); 5022 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5023 } 5024 // Need to allocate on the dynamic memory. 5025 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5026 // Use default allocator. 5027 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5028 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5029 5030 llvm::Value *Addr = 5031 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5032 CGM.getModule(), OMPRTL___kmpc_alloc), 5033 Args, ".dep.arr.addr"); 5034 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5035 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5036 DependenciesArray = Address::deprecated(Addr, Align); 5037 // Write number of elements in the first element of array for depobj. 5038 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5039 // deps[i].base_addr = NumDependencies; 5040 LValue BaseAddrLVal = CGF.EmitLValueForField( 5041 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5042 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5043 llvm::PointerUnion<unsigned *, LValue *> Pos; 5044 unsigned Idx = 1; 5045 LValue PosLVal; 5046 if (Dependencies.IteratorExpr) { 5047 PosLVal = CGF.MakeAddrLValue( 5048 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5049 C.getSizeType()); 5050 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5051 /*IsInit=*/true); 5052 Pos = &PosLVal; 5053 } else { 5054 Pos = &Idx; 5055 } 5056 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5057 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5058 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 5059 CGF.Int8Ty); 5060 return DependenciesArray; 5061 } 5062 5063 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5064 SourceLocation Loc) { 5065 ASTContext &C = CGM.getContext(); 5066 QualType FlagsTy; 5067 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5068 LValue Base = CGF.EmitLoadOfPointerLValue( 5069 DepobjLVal.getAddress(CGF), 5070 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5071 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5072 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5073 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 5074 CGF.ConvertTypeForMem(KmpDependInfoTy)); 5075 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5076 Addr.getElementType(), Addr.getPointer(), 5077 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5078 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5079 CGF.VoidPtrTy); 5080 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5081 // Use default allocator. 5082 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5083 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5084 5085 // _kmpc_free(gtid, addr, nullptr); 5086 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5087 CGM.getModule(), OMPRTL___kmpc_free), 5088 Args); 5089 } 5090 5091 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5092 OpenMPDependClauseKind NewDepKind, 5093 SourceLocation Loc) { 5094 ASTContext &C = CGM.getContext(); 5095 QualType FlagsTy; 5096 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5097 RecordDecl *KmpDependInfoRD = 5098 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5099 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5100 llvm::Value *NumDeps; 5101 LValue Base; 5102 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5103 5104 Address Begin = Base.getAddress(CGF); 5105 // Cast from pointer to array type to pointer to single element. 5106 llvm::Value *End = CGF.Builder.CreateGEP( 5107 Begin.getElementType(), Begin.getPointer(), NumDeps); 5108 // The basic structure here is a while-do loop. 5109 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5110 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5111 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5112 CGF.EmitBlock(BodyBB); 5113 llvm::PHINode *ElementPHI = 5114 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5115 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5116 Begin = Begin.withPointer(ElementPHI); 5117 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5118 Base.getTBAAInfo()); 5119 // deps[i].flags = NewDepKind; 5120 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5121 LValue FlagsLVal = CGF.EmitLValueForField( 5122 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5123 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5124 FlagsLVal); 5125 5126 // Shift the address forward by one element. 5127 Address ElementNext = 5128 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5129 ElementPHI->addIncoming(ElementNext.getPointer(), 5130 CGF.Builder.GetInsertBlock()); 5131 llvm::Value *IsEmpty = 5132 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5133 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5134 // Done. 5135 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5136 } 5137 5138 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5139 const OMPExecutableDirective &D, 5140 llvm::Function *TaskFunction, 5141 QualType SharedsTy, Address Shareds, 5142 const Expr *IfCond, 5143 const OMPTaskDataTy &Data) { 5144 if (!CGF.HaveInsertPoint()) 5145 return; 5146 5147 TaskResultTy Result = 5148 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5149 llvm::Value *NewTask = Result.NewTask; 5150 llvm::Function *TaskEntry = Result.TaskEntry; 5151 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5152 LValue TDBase = Result.TDBase; 5153 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5154 // Process list of dependences. 5155 Address DependenciesArray = Address::invalid(); 5156 llvm::Value *NumOfElements; 5157 std::tie(NumOfElements, DependenciesArray) = 5158 emitDependClause(CGF, Data.Dependences, Loc); 5159 5160 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5161 // libcall. 5162 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5163 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5164 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5165 // list is not empty 5166 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5167 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5168 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5169 llvm::Value *DepTaskArgs[7]; 5170 if (!Data.Dependences.empty()) { 5171 DepTaskArgs[0] = UpLoc; 5172 DepTaskArgs[1] = ThreadID; 5173 DepTaskArgs[2] = NewTask; 5174 DepTaskArgs[3] = NumOfElements; 5175 DepTaskArgs[4] = DependenciesArray.getPointer(); 5176 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5177 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5178 } 5179 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5180 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5181 if (!Data.Tied) { 5182 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5183 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5185 } 5186 if (!Data.Dependences.empty()) { 5187 CGF.EmitRuntimeCall( 5188 OMPBuilder.getOrCreateRuntimeFunction( 5189 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5190 DepTaskArgs); 5191 } else { 5192 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5193 CGM.getModule(), OMPRTL___kmpc_omp_task), 5194 TaskArgs); 5195 } 5196 // Check if parent region is untied and build return for untied task; 5197 if (auto *Region = 5198 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5199 Region->emitUntiedSwitch(CGF); 5200 }; 5201 5202 llvm::Value *DepWaitTaskArgs[6]; 5203 if (!Data.Dependences.empty()) { 5204 DepWaitTaskArgs[0] = UpLoc; 5205 DepWaitTaskArgs[1] = ThreadID; 5206 DepWaitTaskArgs[2] = NumOfElements; 5207 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5208 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5209 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5210 } 5211 auto &M = CGM.getModule(); 5212 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5213 TaskEntry, &Data, &DepWaitTaskArgs, 5214 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5215 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5216 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5217 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5218 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5219 // is specified. 5220 if (!Data.Dependences.empty()) 5221 CGF.EmitRuntimeCall( 5222 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5223 DepWaitTaskArgs); 5224 // Call proxy_task_entry(gtid, new_task); 5225 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5226 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5227 Action.Enter(CGF); 5228 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5229 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5230 OutlinedFnArgs); 5231 }; 5232 5233 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5234 // kmp_task_t *new_task); 5235 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5236 // kmp_task_t *new_task); 5237 RegionCodeGenTy RCG(CodeGen); 5238 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5239 M, OMPRTL___kmpc_omp_task_begin_if0), 5240 TaskArgs, 5241 OMPBuilder.getOrCreateRuntimeFunction( 5242 M, OMPRTL___kmpc_omp_task_complete_if0), 5243 TaskArgs); 5244 RCG.setAction(Action); 5245 RCG(CGF); 5246 }; 5247 5248 if (IfCond) { 5249 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5250 } else { 5251 RegionCodeGenTy ThenRCG(ThenCodeGen); 5252 ThenRCG(CGF); 5253 } 5254 } 5255 5256 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5257 const OMPLoopDirective &D, 5258 llvm::Function *TaskFunction, 5259 QualType SharedsTy, Address Shareds, 5260 const Expr *IfCond, 5261 const OMPTaskDataTy &Data) { 5262 if (!CGF.HaveInsertPoint()) 5263 return; 5264 TaskResultTy Result = 5265 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5266 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5267 // libcall. 5268 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5269 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5270 // sched, kmp_uint64 grainsize, void *task_dup); 5271 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5272 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5273 llvm::Value *IfVal; 5274 if (IfCond) { 5275 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5276 /*isSigned=*/true); 5277 } else { 5278 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5279 } 5280 5281 LValue LBLVal = CGF.EmitLValueForField( 5282 Result.TDBase, 5283 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5284 const auto *LBVar = 5285 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5286 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5287 LBLVal.getQuals(), 5288 /*IsInitializer=*/true); 5289 LValue UBLVal = CGF.EmitLValueForField( 5290 Result.TDBase, 5291 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5292 const auto *UBVar = 5293 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5294 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5295 UBLVal.getQuals(), 5296 /*IsInitializer=*/true); 5297 LValue StLVal = CGF.EmitLValueForField( 5298 Result.TDBase, 5299 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5300 const auto *StVar = 5301 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5302 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5303 StLVal.getQuals(), 5304 /*IsInitializer=*/true); 5305 // Store reductions address. 5306 LValue RedLVal = CGF.EmitLValueForField( 5307 Result.TDBase, 5308 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5309 if (Data.Reductions) { 5310 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5311 } else { 5312 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5313 CGF.getContext().VoidPtrTy); 5314 } 5315 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5316 llvm::Value *TaskArgs[] = { 5317 UpLoc, 5318 ThreadID, 5319 Result.NewTask, 5320 IfVal, 5321 LBLVal.getPointer(CGF), 5322 UBLVal.getPointer(CGF), 5323 CGF.EmitLoadOfScalar(StLVal, Loc), 5324 llvm::ConstantInt::getSigned( 5325 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5326 llvm::ConstantInt::getSigned( 5327 CGF.IntTy, Data.Schedule.getPointer() 5328 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5329 : NoSchedule), 5330 Data.Schedule.getPointer() 5331 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5332 /*isSigned=*/false) 5333 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5334 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5335 Result.TaskDupFn, CGF.VoidPtrTy) 5336 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5337 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5338 CGM.getModule(), OMPRTL___kmpc_taskloop), 5339 TaskArgs); 5340 } 5341 5342 /// Emit reduction operation for each element of array (required for 5343 /// array sections) LHS op = RHS. 5344 /// \param Type Type of array. 5345 /// \param LHSVar Variable on the left side of the reduction operation 5346 /// (references element of array in original variable). 5347 /// \param RHSVar Variable on the right side of the reduction operation 5348 /// (references element of array in original variable). 5349 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5350 /// RHSVar. 5351 static void EmitOMPAggregateReduction( 5352 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5353 const VarDecl *RHSVar, 5354 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5355 const Expr *, const Expr *)> &RedOpGen, 5356 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5357 const Expr *UpExpr = nullptr) { 5358 // Perform element-by-element initialization. 5359 QualType ElementTy; 5360 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5361 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5362 5363 // Drill down to the base element type on both arrays. 5364 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5365 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5366 5367 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5368 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5369 // Cast from pointer to array type to pointer to single element. 5370 llvm::Value *LHSEnd = 5371 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5372 // The basic structure here is a while-do loop. 5373 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5374 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5375 llvm::Value *IsEmpty = 5376 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5377 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5378 5379 // Enter the loop body, making that address the current address. 5380 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5381 CGF.EmitBlock(BodyBB); 5382 5383 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5384 5385 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5386 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5387 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5388 Address RHSElementCurrent = Address::deprecated( 5389 RHSElementPHI, 5390 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5391 5392 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5393 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5394 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5395 Address LHSElementCurrent = Address::deprecated( 5396 LHSElementPHI, 5397 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5398 5399 // Emit copy. 5400 CodeGenFunction::OMPPrivateScope Scope(CGF); 5401 Scope.addPrivate(LHSVar, LHSElementCurrent); 5402 Scope.addPrivate(RHSVar, RHSElementCurrent); 5403 Scope.Privatize(); 5404 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5405 Scope.ForceCleanup(); 5406 5407 // Shift the address forward by one element. 5408 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5409 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5410 "omp.arraycpy.dest.element"); 5411 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5412 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5413 "omp.arraycpy.src.element"); 5414 // Check whether we've reached the end. 5415 llvm::Value *Done = 5416 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5417 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5418 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5419 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5420 5421 // Done. 5422 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5423 } 5424 5425 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5426 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5427 /// UDR combiner function. 5428 static void emitReductionCombiner(CodeGenFunction &CGF, 5429 const Expr *ReductionOp) { 5430 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5431 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5432 if (const auto *DRE = 5433 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5434 if (const auto *DRD = 5435 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5436 std::pair<llvm::Function *, llvm::Function *> Reduction = 5437 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5438 RValue Func = RValue::get(Reduction.first); 5439 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5440 CGF.EmitIgnoredExpr(ReductionOp); 5441 return; 5442 } 5443 CGF.EmitIgnoredExpr(ReductionOp); 5444 } 5445 5446 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5447 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5448 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5449 ArrayRef<const Expr *> ReductionOps) { 5450 ASTContext &C = CGM.getContext(); 5451 5452 // void reduction_func(void *LHSArg, void *RHSArg); 5453 FunctionArgList Args; 5454 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5455 ImplicitParamDecl::Other); 5456 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5457 ImplicitParamDecl::Other); 5458 Args.push_back(&LHSArg); 5459 Args.push_back(&RHSArg); 5460 const auto &CGFI = 5461 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5462 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5463 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5464 llvm::GlobalValue::InternalLinkage, Name, 5465 &CGM.getModule()); 5466 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5467 Fn->setDoesNotRecurse(); 5468 CodeGenFunction CGF(CGM); 5469 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5470 5471 // Dst = (void*[n])(LHSArg); 5472 // Src = (void*[n])(RHSArg); 5473 Address LHS = Address::deprecated( 5474 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5475 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), 5476 CGF.getPointerAlign()); 5477 Address RHS = Address::deprecated( 5478 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5479 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), 5480 CGF.getPointerAlign()); 5481 5482 // ... 5483 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5484 // ... 5485 CodeGenFunction::OMPPrivateScope Scope(CGF); 5486 const auto *IPriv = Privates.begin(); 5487 unsigned Idx = 0; 5488 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5489 const auto *RHSVar = 5490 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5491 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5492 const auto *LHSVar = 5493 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5494 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5495 QualType PrivTy = (*IPriv)->getType(); 5496 if (PrivTy->isVariablyModifiedType()) { 5497 // Get array size and emit VLA type. 5498 ++Idx; 5499 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5500 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5501 const VariableArrayType *VLA = 5502 CGF.getContext().getAsVariableArrayType(PrivTy); 5503 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5504 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5505 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5506 CGF.EmitVariablyModifiedType(PrivTy); 5507 } 5508 } 5509 Scope.Privatize(); 5510 IPriv = Privates.begin(); 5511 const auto *ILHS = LHSExprs.begin(); 5512 const auto *IRHS = RHSExprs.begin(); 5513 for (const Expr *E : ReductionOps) { 5514 if ((*IPriv)->getType()->isArrayType()) { 5515 // Emit reduction for array section. 5516 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5517 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5518 EmitOMPAggregateReduction( 5519 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5520 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5521 emitReductionCombiner(CGF, E); 5522 }); 5523 } else { 5524 // Emit reduction for array subscript or single variable. 5525 emitReductionCombiner(CGF, E); 5526 } 5527 ++IPriv; 5528 ++ILHS; 5529 ++IRHS; 5530 } 5531 Scope.ForceCleanup(); 5532 CGF.FinishFunction(); 5533 return Fn; 5534 } 5535 5536 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5537 const Expr *ReductionOp, 5538 const Expr *PrivateRef, 5539 const DeclRefExpr *LHS, 5540 const DeclRefExpr *RHS) { 5541 if (PrivateRef->getType()->isArrayType()) { 5542 // Emit reduction for array section. 5543 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5544 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5545 EmitOMPAggregateReduction( 5546 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5547 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5548 emitReductionCombiner(CGF, ReductionOp); 5549 }); 5550 } else { 5551 // Emit reduction for array subscript or single variable. 5552 emitReductionCombiner(CGF, ReductionOp); 5553 } 5554 } 5555 5556 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5557 ArrayRef<const Expr *> Privates, 5558 ArrayRef<const Expr *> LHSExprs, 5559 ArrayRef<const Expr *> RHSExprs, 5560 ArrayRef<const Expr *> ReductionOps, 5561 ReductionOptionsTy Options) { 5562 if (!CGF.HaveInsertPoint()) 5563 return; 5564 5565 bool WithNowait = Options.WithNowait; 5566 bool SimpleReduction = Options.SimpleReduction; 5567 5568 // Next code should be emitted for reduction: 5569 // 5570 // static kmp_critical_name lock = { 0 }; 5571 // 5572 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5573 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5574 // ... 5575 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5576 // *(Type<n>-1*)rhs[<n>-1]); 5577 // } 5578 // 5579 // ... 5580 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5581 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5582 // RedList, reduce_func, &<lock>)) { 5583 // case 1: 5584 // ... 5585 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5586 // ... 5587 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5588 // break; 5589 // case 2: 5590 // ... 5591 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5592 // ... 5593 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5594 // break; 5595 // default:; 5596 // } 5597 // 5598 // if SimpleReduction is true, only the next code is generated: 5599 // ... 5600 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5601 // ... 5602 5603 ASTContext &C = CGM.getContext(); 5604 5605 if (SimpleReduction) { 5606 CodeGenFunction::RunCleanupsScope Scope(CGF); 5607 const auto *IPriv = Privates.begin(); 5608 const auto *ILHS = LHSExprs.begin(); 5609 const auto *IRHS = RHSExprs.begin(); 5610 for (const Expr *E : ReductionOps) { 5611 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5612 cast<DeclRefExpr>(*IRHS)); 5613 ++IPriv; 5614 ++ILHS; 5615 ++IRHS; 5616 } 5617 return; 5618 } 5619 5620 // 1. Build a list of reduction variables. 5621 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5622 auto Size = RHSExprs.size(); 5623 for (const Expr *E : Privates) { 5624 if (E->getType()->isVariablyModifiedType()) 5625 // Reserve place for array size. 5626 ++Size; 5627 } 5628 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5629 QualType ReductionArrayTy = 5630 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5631 /*IndexTypeQuals=*/0); 5632 Address ReductionList = 5633 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5634 const auto *IPriv = Privates.begin(); 5635 unsigned Idx = 0; 5636 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5637 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5638 CGF.Builder.CreateStore( 5639 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5640 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5641 Elem); 5642 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5643 // Store array size. 5644 ++Idx; 5645 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5646 llvm::Value *Size = CGF.Builder.CreateIntCast( 5647 CGF.getVLASize( 5648 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5649 .NumElts, 5650 CGF.SizeTy, /*isSigned=*/false); 5651 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5652 Elem); 5653 } 5654 } 5655 5656 // 2. Emit reduce_func(). 5657 llvm::Function *ReductionFn = emitReductionFunction( 5658 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5659 LHSExprs, RHSExprs, ReductionOps); 5660 5661 // 3. Create static kmp_critical_name lock = { 0 }; 5662 std::string Name = getName({"reduction"}); 5663 llvm::Value *Lock = getCriticalRegionLock(Name); 5664 5665 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5666 // RedList, reduce_func, &<lock>); 5667 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5668 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5669 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5670 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5671 ReductionList.getPointer(), CGF.VoidPtrTy); 5672 llvm::Value *Args[] = { 5673 IdentTLoc, // ident_t *<loc> 5674 ThreadId, // i32 <gtid> 5675 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5676 ReductionArrayTySize, // size_type sizeof(RedList) 5677 RL, // void *RedList 5678 ReductionFn, // void (*) (void *, void *) <reduce_func> 5679 Lock // kmp_critical_name *&<lock> 5680 }; 5681 llvm::Value *Res = CGF.EmitRuntimeCall( 5682 OMPBuilder.getOrCreateRuntimeFunction( 5683 CGM.getModule(), 5684 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5685 Args); 5686 5687 // 5. Build switch(res) 5688 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5689 llvm::SwitchInst *SwInst = 5690 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5691 5692 // 6. Build case 1: 5693 // ... 5694 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5695 // ... 5696 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5697 // break; 5698 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5699 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5700 CGF.EmitBlock(Case1BB); 5701 5702 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5703 llvm::Value *EndArgs[] = { 5704 IdentTLoc, // ident_t *<loc> 5705 ThreadId, // i32 <gtid> 5706 Lock // kmp_critical_name *&<lock> 5707 }; 5708 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5709 CodeGenFunction &CGF, PrePostActionTy &Action) { 5710 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5711 const auto *IPriv = Privates.begin(); 5712 const auto *ILHS = LHSExprs.begin(); 5713 const auto *IRHS = RHSExprs.begin(); 5714 for (const Expr *E : ReductionOps) { 5715 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5716 cast<DeclRefExpr>(*IRHS)); 5717 ++IPriv; 5718 ++ILHS; 5719 ++IRHS; 5720 } 5721 }; 5722 RegionCodeGenTy RCG(CodeGen); 5723 CommonActionTy Action( 5724 nullptr, llvm::None, 5725 OMPBuilder.getOrCreateRuntimeFunction( 5726 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5727 : OMPRTL___kmpc_end_reduce), 5728 EndArgs); 5729 RCG.setAction(Action); 5730 RCG(CGF); 5731 5732 CGF.EmitBranch(DefaultBB); 5733 5734 // 7. Build case 2: 5735 // ... 5736 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5737 // ... 5738 // break; 5739 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5740 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5741 CGF.EmitBlock(Case2BB); 5742 5743 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5744 CodeGenFunction &CGF, PrePostActionTy &Action) { 5745 const auto *ILHS = LHSExprs.begin(); 5746 const auto *IRHS = RHSExprs.begin(); 5747 const auto *IPriv = Privates.begin(); 5748 for (const Expr *E : ReductionOps) { 5749 const Expr *XExpr = nullptr; 5750 const Expr *EExpr = nullptr; 5751 const Expr *UpExpr = nullptr; 5752 BinaryOperatorKind BO = BO_Comma; 5753 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5754 if (BO->getOpcode() == BO_Assign) { 5755 XExpr = BO->getLHS(); 5756 UpExpr = BO->getRHS(); 5757 } 5758 } 5759 // Try to emit update expression as a simple atomic. 5760 const Expr *RHSExpr = UpExpr; 5761 if (RHSExpr) { 5762 // Analyze RHS part of the whole expression. 5763 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5764 RHSExpr->IgnoreParenImpCasts())) { 5765 // If this is a conditional operator, analyze its condition for 5766 // min/max reduction operator. 5767 RHSExpr = ACO->getCond(); 5768 } 5769 if (const auto *BORHS = 5770 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5771 EExpr = BORHS->getRHS(); 5772 BO = BORHS->getOpcode(); 5773 } 5774 } 5775 if (XExpr) { 5776 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5777 auto &&AtomicRedGen = [BO, VD, 5778 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5779 const Expr *EExpr, const Expr *UpExpr) { 5780 LValue X = CGF.EmitLValue(XExpr); 5781 RValue E; 5782 if (EExpr) 5783 E = CGF.EmitAnyExpr(EExpr); 5784 CGF.EmitOMPAtomicSimpleUpdateExpr( 5785 X, E, BO, /*IsXLHSInRHSPart=*/true, 5786 llvm::AtomicOrdering::Monotonic, Loc, 5787 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5788 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5789 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5790 CGF.emitOMPSimpleStore( 5791 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5792 VD->getType().getNonReferenceType(), Loc); 5793 PrivateScope.addPrivate(VD, LHSTemp); 5794 (void)PrivateScope.Privatize(); 5795 return CGF.EmitAnyExpr(UpExpr); 5796 }); 5797 }; 5798 if ((*IPriv)->getType()->isArrayType()) { 5799 // Emit atomic reduction for array section. 5800 const auto *RHSVar = 5801 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5802 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5803 AtomicRedGen, XExpr, EExpr, UpExpr); 5804 } else { 5805 // Emit atomic reduction for array subscript or single variable. 5806 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5807 } 5808 } else { 5809 // Emit as a critical region. 5810 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5811 const Expr *, const Expr *) { 5812 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5813 std::string Name = RT.getName({"atomic_reduction"}); 5814 RT.emitCriticalRegion( 5815 CGF, Name, 5816 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5817 Action.Enter(CGF); 5818 emitReductionCombiner(CGF, E); 5819 }, 5820 Loc); 5821 }; 5822 if ((*IPriv)->getType()->isArrayType()) { 5823 const auto *LHSVar = 5824 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5825 const auto *RHSVar = 5826 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5827 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5828 CritRedGen); 5829 } else { 5830 CritRedGen(CGF, nullptr, nullptr, nullptr); 5831 } 5832 } 5833 ++ILHS; 5834 ++IRHS; 5835 ++IPriv; 5836 } 5837 }; 5838 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5839 if (!WithNowait) { 5840 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5841 llvm::Value *EndArgs[] = { 5842 IdentTLoc, // ident_t *<loc> 5843 ThreadId, // i32 <gtid> 5844 Lock // kmp_critical_name *&<lock> 5845 }; 5846 CommonActionTy Action(nullptr, llvm::None, 5847 OMPBuilder.getOrCreateRuntimeFunction( 5848 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5849 EndArgs); 5850 AtomicRCG.setAction(Action); 5851 AtomicRCG(CGF); 5852 } else { 5853 AtomicRCG(CGF); 5854 } 5855 5856 CGF.EmitBranch(DefaultBB); 5857 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5858 } 5859 5860 /// Generates unique name for artificial threadprivate variables. 5861 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5862 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5863 const Expr *Ref) { 5864 SmallString<256> Buffer; 5865 llvm::raw_svector_ostream Out(Buffer); 5866 const clang::DeclRefExpr *DE; 5867 const VarDecl *D = ::getBaseDecl(Ref, DE); 5868 if (!D) 5869 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5870 D = D->getCanonicalDecl(); 5871 std::string Name = CGM.getOpenMPRuntime().getName( 5872 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5873 Out << Prefix << Name << "_" 5874 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5875 return std::string(Out.str()); 5876 } 5877 5878 /// Emits reduction initializer function: 5879 /// \code 5880 /// void @.red_init(void* %arg, void* %orig) { 5881 /// %0 = bitcast void* %arg to <type>* 5882 /// store <type> <init>, <type>* %0 5883 /// ret void 5884 /// } 5885 /// \endcode 5886 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5887 SourceLocation Loc, 5888 ReductionCodeGen &RCG, unsigned N) { 5889 ASTContext &C = CGM.getContext(); 5890 QualType VoidPtrTy = C.VoidPtrTy; 5891 VoidPtrTy.addRestrict(); 5892 FunctionArgList Args; 5893 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5894 ImplicitParamDecl::Other); 5895 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5896 ImplicitParamDecl::Other); 5897 Args.emplace_back(&Param); 5898 Args.emplace_back(&ParamOrig); 5899 const auto &FnInfo = 5900 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5901 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5902 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5903 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5904 Name, &CGM.getModule()); 5905 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5906 Fn->setDoesNotRecurse(); 5907 CodeGenFunction CGF(CGM); 5908 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5909 Address PrivateAddr = CGF.EmitLoadOfPointer( 5910 CGF.GetAddrOfLocalVar(&Param), 5911 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5912 llvm::Value *Size = nullptr; 5913 // If the size of the reduction item is non-constant, load it from global 5914 // threadprivate variable. 5915 if (RCG.getSizes(N).second) { 5916 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5917 CGF, CGM.getContext().getSizeType(), 5918 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5919 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5920 CGM.getContext().getSizeType(), Loc); 5921 } 5922 RCG.emitAggregateType(CGF, N, Size); 5923 Address OrigAddr = Address::invalid(); 5924 // If initializer uses initializer from declare reduction construct, emit a 5925 // pointer to the address of the original reduction item (reuired by reduction 5926 // initializer) 5927 if (RCG.usesReductionInitializer(N)) { 5928 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5929 OrigAddr = CGF.EmitLoadOfPointer( 5930 SharedAddr, 5931 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5932 } 5933 // Emit the initializer: 5934 // %0 = bitcast void* %arg to <type>* 5935 // store <type> <init>, <type>* %0 5936 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5937 [](CodeGenFunction &) { return false; }); 5938 CGF.FinishFunction(); 5939 return Fn; 5940 } 5941 5942 /// Emits reduction combiner function: 5943 /// \code 5944 /// void @.red_comb(void* %arg0, void* %arg1) { 5945 /// %lhs = bitcast void* %arg0 to <type>* 5946 /// %rhs = bitcast void* %arg1 to <type>* 5947 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5948 /// store <type> %2, <type>* %lhs 5949 /// ret void 5950 /// } 5951 /// \endcode 5952 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5953 SourceLocation Loc, 5954 ReductionCodeGen &RCG, unsigned N, 5955 const Expr *ReductionOp, 5956 const Expr *LHS, const Expr *RHS, 5957 const Expr *PrivateRef) { 5958 ASTContext &C = CGM.getContext(); 5959 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5960 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5961 FunctionArgList Args; 5962 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5963 C.VoidPtrTy, ImplicitParamDecl::Other); 5964 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5965 ImplicitParamDecl::Other); 5966 Args.emplace_back(&ParamInOut); 5967 Args.emplace_back(&ParamIn); 5968 const auto &FnInfo = 5969 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5970 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5971 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5972 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5973 Name, &CGM.getModule()); 5974 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5975 Fn->setDoesNotRecurse(); 5976 CodeGenFunction CGF(CGM); 5977 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5978 llvm::Value *Size = nullptr; 5979 // If the size of the reduction item is non-constant, load it from global 5980 // threadprivate variable. 5981 if (RCG.getSizes(N).second) { 5982 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5983 CGF, CGM.getContext().getSizeType(), 5984 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5985 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5986 CGM.getContext().getSizeType(), Loc); 5987 } 5988 RCG.emitAggregateType(CGF, N, Size); 5989 // Remap lhs and rhs variables to the addresses of the function arguments. 5990 // %lhs = bitcast void* %arg0 to <type>* 5991 // %rhs = bitcast void* %arg1 to <type>* 5992 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5993 PrivateScope.addPrivate( 5994 LHSVD, 5995 // Pull out the pointer to the variable. 5996 CGF.EmitLoadOfPointer( 5997 CGF.Builder.CreateElementBitCast( 5998 CGF.GetAddrOfLocalVar(&ParamInOut), 5999 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 6000 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 6001 PrivateScope.addPrivate( 6002 RHSVD, 6003 // Pull out the pointer to the variable. 6004 CGF.EmitLoadOfPointer( 6005 CGF.Builder.CreateElementBitCast( 6006 CGF.GetAddrOfLocalVar(&ParamIn), 6007 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 6008 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 6009 PrivateScope.Privatize(); 6010 // Emit the combiner body: 6011 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6012 // store <type> %2, <type>* %lhs 6013 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6014 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6015 cast<DeclRefExpr>(RHS)); 6016 CGF.FinishFunction(); 6017 return Fn; 6018 } 6019 6020 /// Emits reduction finalizer function: 6021 /// \code 6022 /// void @.red_fini(void* %arg) { 6023 /// %0 = bitcast void* %arg to <type>* 6024 /// <destroy>(<type>* %0) 6025 /// ret void 6026 /// } 6027 /// \endcode 6028 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6029 SourceLocation Loc, 6030 ReductionCodeGen &RCG, unsigned N) { 6031 if (!RCG.needCleanups(N)) 6032 return nullptr; 6033 ASTContext &C = CGM.getContext(); 6034 FunctionArgList Args; 6035 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6036 ImplicitParamDecl::Other); 6037 Args.emplace_back(&Param); 6038 const auto &FnInfo = 6039 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6040 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6041 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6042 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6043 Name, &CGM.getModule()); 6044 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6045 Fn->setDoesNotRecurse(); 6046 CodeGenFunction CGF(CGM); 6047 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6048 Address PrivateAddr = CGF.EmitLoadOfPointer( 6049 CGF.GetAddrOfLocalVar(&Param), 6050 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6051 llvm::Value *Size = nullptr; 6052 // If the size of the reduction item is non-constant, load it from global 6053 // threadprivate variable. 6054 if (RCG.getSizes(N).second) { 6055 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6056 CGF, CGM.getContext().getSizeType(), 6057 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6058 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6059 CGM.getContext().getSizeType(), Loc); 6060 } 6061 RCG.emitAggregateType(CGF, N, Size); 6062 // Emit the finalizer body: 6063 // <destroy>(<type>* %0) 6064 RCG.emitCleanups(CGF, N, PrivateAddr); 6065 CGF.FinishFunction(Loc); 6066 return Fn; 6067 } 6068 6069 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6070 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6071 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6072 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6073 return nullptr; 6074 6075 // Build typedef struct: 6076 // kmp_taskred_input { 6077 // void *reduce_shar; // shared reduction item 6078 // void *reduce_orig; // original reduction item used for initialization 6079 // size_t reduce_size; // size of data item 6080 // void *reduce_init; // data initialization routine 6081 // void *reduce_fini; // data finalization routine 6082 // void *reduce_comb; // data combiner routine 6083 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6084 // } kmp_taskred_input_t; 6085 ASTContext &C = CGM.getContext(); 6086 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6087 RD->startDefinition(); 6088 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6089 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6090 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6091 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6092 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6093 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6094 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6095 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6096 RD->completeDefinition(); 6097 QualType RDType = C.getRecordType(RD); 6098 unsigned Size = Data.ReductionVars.size(); 6099 llvm::APInt ArraySize(/*numBits=*/64, Size); 6100 QualType ArrayRDType = C.getConstantArrayType( 6101 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6102 // kmp_task_red_input_t .rd_input.[Size]; 6103 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6104 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6105 Data.ReductionCopies, Data.ReductionOps); 6106 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6107 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6108 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6109 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6110 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6111 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6112 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6113 ".rd_input.gep."); 6114 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6115 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6116 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6117 RCG.emitSharedOrigLValue(CGF, Cnt); 6118 llvm::Value *CastedShared = 6119 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6120 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6121 // ElemLVal.reduce_orig = &Origs[Cnt]; 6122 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6123 llvm::Value *CastedOrig = 6124 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6125 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6126 RCG.emitAggregateType(CGF, Cnt); 6127 llvm::Value *SizeValInChars; 6128 llvm::Value *SizeVal; 6129 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6130 // We use delayed creation/initialization for VLAs and array sections. It is 6131 // required because runtime does not provide the way to pass the sizes of 6132 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6133 // threadprivate global variables are used to store these values and use 6134 // them in the functions. 6135 bool DelayedCreation = !!SizeVal; 6136 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6137 /*isSigned=*/false); 6138 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6139 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6140 // ElemLVal.reduce_init = init; 6141 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6142 llvm::Value *InitAddr = 6143 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6144 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6145 // ElemLVal.reduce_fini = fini; 6146 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6147 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6148 llvm::Value *FiniAddr = Fini 6149 ? CGF.EmitCastToVoidPtr(Fini) 6150 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6151 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6152 // ElemLVal.reduce_comb = comb; 6153 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6154 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6155 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6156 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6157 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6158 // ElemLVal.flags = 0; 6159 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6160 if (DelayedCreation) { 6161 CGF.EmitStoreOfScalar( 6162 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6163 FlagsLVal); 6164 } else 6165 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6166 FlagsLVal.getType()); 6167 } 6168 if (Data.IsReductionWithTaskMod) { 6169 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6170 // is_ws, int num, void *data); 6171 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6172 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6173 CGM.IntTy, /*isSigned=*/true); 6174 llvm::Value *Args[] = { 6175 IdentTLoc, GTid, 6176 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6177 /*isSigned=*/true), 6178 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6179 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6180 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6181 return CGF.EmitRuntimeCall( 6182 OMPBuilder.getOrCreateRuntimeFunction( 6183 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6184 Args); 6185 } 6186 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6187 llvm::Value *Args[] = { 6188 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6189 /*isSigned=*/true), 6190 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6191 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6192 CGM.VoidPtrTy)}; 6193 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6194 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6195 Args); 6196 } 6197 6198 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6199 SourceLocation Loc, 6200 bool IsWorksharingReduction) { 6201 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6202 // is_ws, int num, void *data); 6203 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6204 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6205 CGM.IntTy, /*isSigned=*/true); 6206 llvm::Value *Args[] = {IdentTLoc, GTid, 6207 llvm::ConstantInt::get(CGM.IntTy, 6208 IsWorksharingReduction ? 1 : 0, 6209 /*isSigned=*/true)}; 6210 (void)CGF.EmitRuntimeCall( 6211 OMPBuilder.getOrCreateRuntimeFunction( 6212 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6213 Args); 6214 } 6215 6216 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6217 SourceLocation Loc, 6218 ReductionCodeGen &RCG, 6219 unsigned N) { 6220 auto Sizes = RCG.getSizes(N); 6221 // Emit threadprivate global variable if the type is non-constant 6222 // (Sizes.second = nullptr). 6223 if (Sizes.second) { 6224 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6225 /*isSigned=*/false); 6226 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6227 CGF, CGM.getContext().getSizeType(), 6228 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6229 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6230 } 6231 } 6232 6233 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6234 SourceLocation Loc, 6235 llvm::Value *ReductionsPtr, 6236 LValue SharedLVal) { 6237 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6238 // *d); 6239 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6240 CGM.IntTy, 6241 /*isSigned=*/true), 6242 ReductionsPtr, 6243 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6244 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6245 return Address::deprecated( 6246 CGF.EmitRuntimeCall( 6247 OMPBuilder.getOrCreateRuntimeFunction( 6248 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6249 Args), 6250 SharedLVal.getAlignment()); 6251 } 6252 6253 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6254 const OMPTaskDataTy &Data) { 6255 if (!CGF.HaveInsertPoint()) 6256 return; 6257 6258 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6259 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6260 OMPBuilder.createTaskwait(CGF.Builder); 6261 } else { 6262 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6263 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6264 auto &M = CGM.getModule(); 6265 Address DependenciesArray = Address::invalid(); 6266 llvm::Value *NumOfElements; 6267 std::tie(NumOfElements, DependenciesArray) = 6268 emitDependClause(CGF, Data.Dependences, Loc); 6269 llvm::Value *DepWaitTaskArgs[6]; 6270 if (!Data.Dependences.empty()) { 6271 DepWaitTaskArgs[0] = UpLoc; 6272 DepWaitTaskArgs[1] = ThreadID; 6273 DepWaitTaskArgs[2] = NumOfElements; 6274 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6275 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6276 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6277 6278 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6279 6280 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6281 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6282 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6283 // is specified. 6284 CGF.EmitRuntimeCall( 6285 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6286 DepWaitTaskArgs); 6287 6288 } else { 6289 6290 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6291 // global_tid); 6292 llvm::Value *Args[] = {UpLoc, ThreadID}; 6293 // Ignore return result until untied tasks are supported. 6294 CGF.EmitRuntimeCall( 6295 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6296 Args); 6297 } 6298 } 6299 6300 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6301 Region->emitUntiedSwitch(CGF); 6302 } 6303 6304 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6305 OpenMPDirectiveKind InnerKind, 6306 const RegionCodeGenTy &CodeGen, 6307 bool HasCancel) { 6308 if (!CGF.HaveInsertPoint()) 6309 return; 6310 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6311 InnerKind != OMPD_critical && 6312 InnerKind != OMPD_master && 6313 InnerKind != OMPD_masked); 6314 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6315 } 6316 6317 namespace { 6318 enum RTCancelKind { 6319 CancelNoreq = 0, 6320 CancelParallel = 1, 6321 CancelLoop = 2, 6322 CancelSections = 3, 6323 CancelTaskgroup = 4 6324 }; 6325 } // anonymous namespace 6326 6327 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6328 RTCancelKind CancelKind = CancelNoreq; 6329 if (CancelRegion == OMPD_parallel) 6330 CancelKind = CancelParallel; 6331 else if (CancelRegion == OMPD_for) 6332 CancelKind = CancelLoop; 6333 else if (CancelRegion == OMPD_sections) 6334 CancelKind = CancelSections; 6335 else { 6336 assert(CancelRegion == OMPD_taskgroup); 6337 CancelKind = CancelTaskgroup; 6338 } 6339 return CancelKind; 6340 } 6341 6342 void CGOpenMPRuntime::emitCancellationPointCall( 6343 CodeGenFunction &CGF, SourceLocation Loc, 6344 OpenMPDirectiveKind CancelRegion) { 6345 if (!CGF.HaveInsertPoint()) 6346 return; 6347 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6348 // global_tid, kmp_int32 cncl_kind); 6349 if (auto *OMPRegionInfo = 6350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6351 // For 'cancellation point taskgroup', the task region info may not have a 6352 // cancel. This may instead happen in another adjacent task. 6353 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6354 llvm::Value *Args[] = { 6355 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6356 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6357 // Ignore return result until untied tasks are supported. 6358 llvm::Value *Result = CGF.EmitRuntimeCall( 6359 OMPBuilder.getOrCreateRuntimeFunction( 6360 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6361 Args); 6362 // if (__kmpc_cancellationpoint()) { 6363 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6364 // exit from construct; 6365 // } 6366 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6367 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6368 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6369 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6370 CGF.EmitBlock(ExitBB); 6371 if (CancelRegion == OMPD_parallel) 6372 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6373 // exit from construct; 6374 CodeGenFunction::JumpDest CancelDest = 6375 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6376 CGF.EmitBranchThroughCleanup(CancelDest); 6377 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6378 } 6379 } 6380 } 6381 6382 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6383 const Expr *IfCond, 6384 OpenMPDirectiveKind CancelRegion) { 6385 if (!CGF.HaveInsertPoint()) 6386 return; 6387 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6388 // kmp_int32 cncl_kind); 6389 auto &M = CGM.getModule(); 6390 if (auto *OMPRegionInfo = 6391 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6392 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6393 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6394 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6395 llvm::Value *Args[] = { 6396 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6397 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6398 // Ignore return result until untied tasks are supported. 6399 llvm::Value *Result = CGF.EmitRuntimeCall( 6400 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6401 // if (__kmpc_cancel()) { 6402 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6403 // exit from construct; 6404 // } 6405 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6406 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6407 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6408 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6409 CGF.EmitBlock(ExitBB); 6410 if (CancelRegion == OMPD_parallel) 6411 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6412 // exit from construct; 6413 CodeGenFunction::JumpDest CancelDest = 6414 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6415 CGF.EmitBranchThroughCleanup(CancelDest); 6416 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6417 }; 6418 if (IfCond) { 6419 emitIfClause(CGF, IfCond, ThenGen, 6420 [](CodeGenFunction &, PrePostActionTy &) {}); 6421 } else { 6422 RegionCodeGenTy ThenRCG(ThenGen); 6423 ThenRCG(CGF); 6424 } 6425 } 6426 } 6427 6428 namespace { 6429 /// Cleanup action for uses_allocators support. 6430 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6431 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6432 6433 public: 6434 OMPUsesAllocatorsActionTy( 6435 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6436 : Allocators(Allocators) {} 6437 void Enter(CodeGenFunction &CGF) override { 6438 if (!CGF.HaveInsertPoint()) 6439 return; 6440 for (const auto &AllocatorData : Allocators) { 6441 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6442 CGF, AllocatorData.first, AllocatorData.second); 6443 } 6444 } 6445 void Exit(CodeGenFunction &CGF) override { 6446 if (!CGF.HaveInsertPoint()) 6447 return; 6448 for (const auto &AllocatorData : Allocators) { 6449 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6450 AllocatorData.first); 6451 } 6452 } 6453 }; 6454 } // namespace 6455 6456 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6457 const OMPExecutableDirective &D, StringRef ParentName, 6458 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6459 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6460 assert(!ParentName.empty() && "Invalid target region parent name!"); 6461 HasEmittedTargetRegion = true; 6462 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6463 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6464 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6465 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6466 if (!D.AllocatorTraits) 6467 continue; 6468 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6469 } 6470 } 6471 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6472 CodeGen.setAction(UsesAllocatorAction); 6473 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6474 IsOffloadEntry, CodeGen); 6475 } 6476 6477 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6478 const Expr *Allocator, 6479 const Expr *AllocatorTraits) { 6480 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6481 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6482 // Use default memspace handle. 6483 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6484 llvm::Value *NumTraits = llvm::ConstantInt::get( 6485 CGF.IntTy, cast<ConstantArrayType>( 6486 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6487 ->getSize() 6488 .getLimitedValue()); 6489 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6490 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6491 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6492 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6493 AllocatorTraitsLVal.getBaseInfo(), 6494 AllocatorTraitsLVal.getTBAAInfo()); 6495 llvm::Value *Traits = 6496 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6497 6498 llvm::Value *AllocatorVal = 6499 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6500 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6501 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6502 // Store to allocator. 6503 CGF.EmitVarDecl(*cast<VarDecl>( 6504 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6505 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6506 AllocatorVal = 6507 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6508 Allocator->getType(), Allocator->getExprLoc()); 6509 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6510 } 6511 6512 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6513 const Expr *Allocator) { 6514 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6515 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6516 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6517 llvm::Value *AllocatorVal = 6518 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6519 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6520 CGF.getContext().VoidPtrTy, 6521 Allocator->getExprLoc()); 6522 (void)CGF.EmitRuntimeCall( 6523 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6524 OMPRTL___kmpc_destroy_allocator), 6525 {ThreadId, AllocatorVal}); 6526 } 6527 6528 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6529 const OMPExecutableDirective &D, StringRef ParentName, 6530 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6531 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6532 // Create a unique name for the entry function using the source location 6533 // information of the current target region. The name will be something like: 6534 // 6535 // __omp_offloading_DD_FFFF_PP_lBB 6536 // 6537 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6538 // mangled name of the function that encloses the target region and BB is the 6539 // line number of the target region. 6540 6541 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6542 !CGM.getLangOpts().OpenMPOffloadMandatory; 6543 unsigned DeviceID; 6544 unsigned FileID; 6545 unsigned Line; 6546 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6547 Line); 6548 SmallString<64> EntryFnName; 6549 { 6550 llvm::raw_svector_ostream OS(EntryFnName); 6551 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6552 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6553 } 6554 6555 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6556 6557 CodeGenFunction CGF(CGM, true); 6558 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6559 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6560 6561 if (BuildOutlinedFn) 6562 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6563 6564 // If this target outline function is not an offload entry, we don't need to 6565 // register it. 6566 if (!IsOffloadEntry) 6567 return; 6568 6569 // The target region ID is used by the runtime library to identify the current 6570 // target region, so it only has to be unique and not necessarily point to 6571 // anything. It could be the pointer to the outlined function that implements 6572 // the target region, but we aren't using that so that the compiler doesn't 6573 // need to keep that, and could therefore inline the host function if proven 6574 // worthwhile during optimization. In the other hand, if emitting code for the 6575 // device, the ID has to be the function address so that it can retrieved from 6576 // the offloading entry and launched by the runtime library. We also mark the 6577 // outlined function to have external linkage in case we are emitting code for 6578 // the device, because these functions will be entry points to the device. 6579 6580 if (CGM.getLangOpts().OpenMPIsDevice) { 6581 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6582 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6583 OutlinedFn->setDSOLocal(false); 6584 if (CGM.getTriple().isAMDGCN()) 6585 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6586 } else { 6587 std::string Name = getName({EntryFnName, "region_id"}); 6588 OutlinedFnID = new llvm::GlobalVariable( 6589 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6590 llvm::GlobalValue::WeakAnyLinkage, 6591 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6592 } 6593 6594 // If we do not allow host fallback we still need a named address to use. 6595 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6596 if (!BuildOutlinedFn) { 6597 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6598 "Named kernel already exists?"); 6599 TargetRegionEntryAddr = new llvm::GlobalVariable( 6600 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6601 llvm::GlobalValue::InternalLinkage, 6602 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6603 } 6604 6605 // Register the information for the entry associated with this target region. 6606 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6607 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6608 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6609 6610 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6611 int32_t DefaultValTeams = -1; 6612 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6613 if (DefaultValTeams > 0 && OutlinedFn) { 6614 OutlinedFn->addFnAttr("omp_target_num_teams", 6615 std::to_string(DefaultValTeams)); 6616 } 6617 int32_t DefaultValThreads = -1; 6618 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6619 if (DefaultValThreads > 0 && OutlinedFn) { 6620 OutlinedFn->addFnAttr("omp_target_thread_limit", 6621 std::to_string(DefaultValThreads)); 6622 } 6623 6624 if (BuildOutlinedFn) 6625 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6626 } 6627 6628 /// Checks if the expression is constant or does not have non-trivial function 6629 /// calls. 6630 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6631 // We can skip constant expressions. 6632 // We can skip expressions with trivial calls or simple expressions. 6633 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6634 !E->hasNonTrivialCall(Ctx)) && 6635 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6636 } 6637 6638 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6639 const Stmt *Body) { 6640 const Stmt *Child = Body->IgnoreContainers(); 6641 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6642 Child = nullptr; 6643 for (const Stmt *S : C->body()) { 6644 if (const auto *E = dyn_cast<Expr>(S)) { 6645 if (isTrivial(Ctx, E)) 6646 continue; 6647 } 6648 // Some of the statements can be ignored. 6649 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6650 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6651 continue; 6652 // Analyze declarations. 6653 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6654 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6655 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6656 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6657 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6658 isa<UsingDirectiveDecl>(D) || 6659 isa<OMPDeclareReductionDecl>(D) || 6660 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6661 return true; 6662 const auto *VD = dyn_cast<VarDecl>(D); 6663 if (!VD) 6664 return false; 6665 return VD->hasGlobalStorage() || !VD->isUsed(); 6666 })) 6667 continue; 6668 } 6669 // Found multiple children - cannot get the one child only. 6670 if (Child) 6671 return nullptr; 6672 Child = S; 6673 } 6674 if (Child) 6675 Child = Child->IgnoreContainers(); 6676 } 6677 return Child; 6678 } 6679 6680 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6681 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6682 int32_t &DefaultVal) { 6683 6684 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6685 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6686 "Expected target-based executable directive."); 6687 switch (DirectiveKind) { 6688 case OMPD_target: { 6689 const auto *CS = D.getInnermostCapturedStmt(); 6690 const auto *Body = 6691 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6692 const Stmt *ChildStmt = 6693 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6694 if (const auto *NestedDir = 6695 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6696 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6697 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6698 const Expr *NumTeams = 6699 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6700 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6701 if (auto Constant = 6702 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6703 DefaultVal = Constant->getExtValue(); 6704 return NumTeams; 6705 } 6706 DefaultVal = 0; 6707 return nullptr; 6708 } 6709 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6710 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6711 DefaultVal = 1; 6712 return nullptr; 6713 } 6714 DefaultVal = 1; 6715 return nullptr; 6716 } 6717 // A value of -1 is used to check if we need to emit no teams region 6718 DefaultVal = -1; 6719 return nullptr; 6720 } 6721 case OMPD_target_teams: 6722 case OMPD_target_teams_distribute: 6723 case OMPD_target_teams_distribute_simd: 6724 case OMPD_target_teams_distribute_parallel_for: 6725 case OMPD_target_teams_distribute_parallel_for_simd: { 6726 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6727 const Expr *NumTeams = 6728 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6729 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6730 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6731 DefaultVal = Constant->getExtValue(); 6732 return NumTeams; 6733 } 6734 DefaultVal = 0; 6735 return nullptr; 6736 } 6737 case OMPD_target_parallel: 6738 case OMPD_target_parallel_for: 6739 case OMPD_target_parallel_for_simd: 6740 case OMPD_target_simd: 6741 DefaultVal = 1; 6742 return nullptr; 6743 case OMPD_parallel: 6744 case OMPD_for: 6745 case OMPD_parallel_for: 6746 case OMPD_parallel_master: 6747 case OMPD_parallel_sections: 6748 case OMPD_for_simd: 6749 case OMPD_parallel_for_simd: 6750 case OMPD_cancel: 6751 case OMPD_cancellation_point: 6752 case OMPD_ordered: 6753 case OMPD_threadprivate: 6754 case OMPD_allocate: 6755 case OMPD_task: 6756 case OMPD_simd: 6757 case OMPD_tile: 6758 case OMPD_unroll: 6759 case OMPD_sections: 6760 case OMPD_section: 6761 case OMPD_single: 6762 case OMPD_master: 6763 case OMPD_critical: 6764 case OMPD_taskyield: 6765 case OMPD_barrier: 6766 case OMPD_taskwait: 6767 case OMPD_taskgroup: 6768 case OMPD_atomic: 6769 case OMPD_flush: 6770 case OMPD_depobj: 6771 case OMPD_scan: 6772 case OMPD_teams: 6773 case OMPD_target_data: 6774 case OMPD_target_exit_data: 6775 case OMPD_target_enter_data: 6776 case OMPD_distribute: 6777 case OMPD_distribute_simd: 6778 case OMPD_distribute_parallel_for: 6779 case OMPD_distribute_parallel_for_simd: 6780 case OMPD_teams_distribute: 6781 case OMPD_teams_distribute_simd: 6782 case OMPD_teams_distribute_parallel_for: 6783 case OMPD_teams_distribute_parallel_for_simd: 6784 case OMPD_target_update: 6785 case OMPD_declare_simd: 6786 case OMPD_declare_variant: 6787 case OMPD_begin_declare_variant: 6788 case OMPD_end_declare_variant: 6789 case OMPD_declare_target: 6790 case OMPD_end_declare_target: 6791 case OMPD_declare_reduction: 6792 case OMPD_declare_mapper: 6793 case OMPD_taskloop: 6794 case OMPD_taskloop_simd: 6795 case OMPD_master_taskloop: 6796 case OMPD_master_taskloop_simd: 6797 case OMPD_parallel_master_taskloop: 6798 case OMPD_parallel_master_taskloop_simd: 6799 case OMPD_requires: 6800 case OMPD_metadirective: 6801 case OMPD_unknown: 6802 break; 6803 default: 6804 break; 6805 } 6806 llvm_unreachable("Unexpected directive kind."); 6807 } 6808 6809 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6810 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6811 assert(!CGF.getLangOpts().OpenMPIsDevice && 6812 "Clauses associated with the teams directive expected to be emitted " 6813 "only for the host!"); 6814 CGBuilderTy &Bld = CGF.Builder; 6815 int32_t DefaultNT = -1; 6816 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6817 if (NumTeams != nullptr) { 6818 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6819 6820 switch (DirectiveKind) { 6821 case OMPD_target: { 6822 const auto *CS = D.getInnermostCapturedStmt(); 6823 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6824 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6825 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6826 /*IgnoreResultAssign*/ true); 6827 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6828 /*isSigned=*/true); 6829 } 6830 case OMPD_target_teams: 6831 case OMPD_target_teams_distribute: 6832 case OMPD_target_teams_distribute_simd: 6833 case OMPD_target_teams_distribute_parallel_for: 6834 case OMPD_target_teams_distribute_parallel_for_simd: { 6835 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6836 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6837 /*IgnoreResultAssign*/ true); 6838 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6839 /*isSigned=*/true); 6840 } 6841 default: 6842 break; 6843 } 6844 } else if (DefaultNT == -1) { 6845 return nullptr; 6846 } 6847 6848 return Bld.getInt32(DefaultNT); 6849 } 6850 6851 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6852 llvm::Value *DefaultThreadLimitVal) { 6853 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6854 CGF.getContext(), CS->getCapturedStmt()); 6855 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6856 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6857 llvm::Value *NumThreads = nullptr; 6858 llvm::Value *CondVal = nullptr; 6859 // Handle if clause. If if clause present, the number of threads is 6860 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6861 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6862 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6863 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6864 const OMPIfClause *IfClause = nullptr; 6865 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6866 if (C->getNameModifier() == OMPD_unknown || 6867 C->getNameModifier() == OMPD_parallel) { 6868 IfClause = C; 6869 break; 6870 } 6871 } 6872 if (IfClause) { 6873 const Expr *Cond = IfClause->getCondition(); 6874 bool Result; 6875 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6876 if (!Result) 6877 return CGF.Builder.getInt32(1); 6878 } else { 6879 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6880 if (const auto *PreInit = 6881 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6882 for (const auto *I : PreInit->decls()) { 6883 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6884 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6885 } else { 6886 CodeGenFunction::AutoVarEmission Emission = 6887 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6888 CGF.EmitAutoVarCleanups(Emission); 6889 } 6890 } 6891 } 6892 CondVal = CGF.EvaluateExprAsBool(Cond); 6893 } 6894 } 6895 } 6896 // Check the value of num_threads clause iff if clause was not specified 6897 // or is not evaluated to false. 6898 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6899 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6900 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6901 const auto *NumThreadsClause = 6902 Dir->getSingleClause<OMPNumThreadsClause>(); 6903 CodeGenFunction::LexicalScope Scope( 6904 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6905 if (const auto *PreInit = 6906 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6907 for (const auto *I : PreInit->decls()) { 6908 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6909 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6910 } else { 6911 CodeGenFunction::AutoVarEmission Emission = 6912 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6913 CGF.EmitAutoVarCleanups(Emission); 6914 } 6915 } 6916 } 6917 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6918 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6919 /*isSigned=*/false); 6920 if (DefaultThreadLimitVal) 6921 NumThreads = CGF.Builder.CreateSelect( 6922 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6923 DefaultThreadLimitVal, NumThreads); 6924 } else { 6925 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6926 : CGF.Builder.getInt32(0); 6927 } 6928 // Process condition of the if clause. 6929 if (CondVal) { 6930 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6931 CGF.Builder.getInt32(1)); 6932 } 6933 return NumThreads; 6934 } 6935 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6936 return CGF.Builder.getInt32(1); 6937 return DefaultThreadLimitVal; 6938 } 6939 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6940 : CGF.Builder.getInt32(0); 6941 } 6942 6943 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6944 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6945 int32_t &DefaultVal) { 6946 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6947 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6948 "Expected target-based executable directive."); 6949 6950 switch (DirectiveKind) { 6951 case OMPD_target: 6952 // Teams have no clause thread_limit 6953 return nullptr; 6954 case OMPD_target_teams: 6955 case OMPD_target_teams_distribute: 6956 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6957 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6958 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6959 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6960 if (auto Constant = 6961 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6962 DefaultVal = Constant->getExtValue(); 6963 return ThreadLimit; 6964 } 6965 return nullptr; 6966 case OMPD_target_parallel: 6967 case OMPD_target_parallel_for: 6968 case OMPD_target_parallel_for_simd: 6969 case OMPD_target_teams_distribute_parallel_for: 6970 case OMPD_target_teams_distribute_parallel_for_simd: { 6971 Expr *ThreadLimit = nullptr; 6972 Expr *NumThreads = nullptr; 6973 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6974 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6975 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6976 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6977 if (auto Constant = 6978 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6979 DefaultVal = Constant->getExtValue(); 6980 } 6981 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6982 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6983 NumThreads = NumThreadsClause->getNumThreads(); 6984 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6985 if (auto Constant = 6986 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6987 if (Constant->getExtValue() < DefaultVal) { 6988 DefaultVal = Constant->getExtValue(); 6989 ThreadLimit = NumThreads; 6990 } 6991 } 6992 } 6993 } 6994 return ThreadLimit; 6995 } 6996 case OMPD_target_teams_distribute_simd: 6997 case OMPD_target_simd: 6998 DefaultVal = 1; 6999 return nullptr; 7000 case OMPD_parallel: 7001 case OMPD_for: 7002 case OMPD_parallel_for: 7003 case OMPD_parallel_master: 7004 case OMPD_parallel_sections: 7005 case OMPD_for_simd: 7006 case OMPD_parallel_for_simd: 7007 case OMPD_cancel: 7008 case OMPD_cancellation_point: 7009 case OMPD_ordered: 7010 case OMPD_threadprivate: 7011 case OMPD_allocate: 7012 case OMPD_task: 7013 case OMPD_simd: 7014 case OMPD_tile: 7015 case OMPD_unroll: 7016 case OMPD_sections: 7017 case OMPD_section: 7018 case OMPD_single: 7019 case OMPD_master: 7020 case OMPD_critical: 7021 case OMPD_taskyield: 7022 case OMPD_barrier: 7023 case OMPD_taskwait: 7024 case OMPD_taskgroup: 7025 case OMPD_atomic: 7026 case OMPD_flush: 7027 case OMPD_depobj: 7028 case OMPD_scan: 7029 case OMPD_teams: 7030 case OMPD_target_data: 7031 case OMPD_target_exit_data: 7032 case OMPD_target_enter_data: 7033 case OMPD_distribute: 7034 case OMPD_distribute_simd: 7035 case OMPD_distribute_parallel_for: 7036 case OMPD_distribute_parallel_for_simd: 7037 case OMPD_teams_distribute: 7038 case OMPD_teams_distribute_simd: 7039 case OMPD_teams_distribute_parallel_for: 7040 case OMPD_teams_distribute_parallel_for_simd: 7041 case OMPD_target_update: 7042 case OMPD_declare_simd: 7043 case OMPD_declare_variant: 7044 case OMPD_begin_declare_variant: 7045 case OMPD_end_declare_variant: 7046 case OMPD_declare_target: 7047 case OMPD_end_declare_target: 7048 case OMPD_declare_reduction: 7049 case OMPD_declare_mapper: 7050 case OMPD_taskloop: 7051 case OMPD_taskloop_simd: 7052 case OMPD_master_taskloop: 7053 case OMPD_master_taskloop_simd: 7054 case OMPD_parallel_master_taskloop: 7055 case OMPD_parallel_master_taskloop_simd: 7056 case OMPD_requires: 7057 case OMPD_unknown: 7058 break; 7059 default: 7060 break; 7061 } 7062 llvm_unreachable("Unsupported directive kind."); 7063 } 7064 7065 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7066 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7067 assert(!CGF.getLangOpts().OpenMPIsDevice && 7068 "Clauses associated with the teams directive expected to be emitted " 7069 "only for the host!"); 7070 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7071 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7072 "Expected target-based executable directive."); 7073 CGBuilderTy &Bld = CGF.Builder; 7074 llvm::Value *ThreadLimitVal = nullptr; 7075 llvm::Value *NumThreadsVal = nullptr; 7076 switch (DirectiveKind) { 7077 case OMPD_target: { 7078 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7079 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7080 return NumThreads; 7081 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7082 CGF.getContext(), CS->getCapturedStmt()); 7083 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7084 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7085 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7086 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7087 const auto *ThreadLimitClause = 7088 Dir->getSingleClause<OMPThreadLimitClause>(); 7089 CodeGenFunction::LexicalScope Scope( 7090 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7091 if (const auto *PreInit = 7092 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7093 for (const auto *I : PreInit->decls()) { 7094 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7095 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7096 } else { 7097 CodeGenFunction::AutoVarEmission Emission = 7098 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7099 CGF.EmitAutoVarCleanups(Emission); 7100 } 7101 } 7102 } 7103 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7104 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7105 ThreadLimitVal = 7106 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7107 } 7108 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7109 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7110 CS = Dir->getInnermostCapturedStmt(); 7111 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7112 CGF.getContext(), CS->getCapturedStmt()); 7113 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7114 } 7115 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7116 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7117 CS = Dir->getInnermostCapturedStmt(); 7118 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7119 return NumThreads; 7120 } 7121 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7122 return Bld.getInt32(1); 7123 } 7124 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7125 } 7126 case OMPD_target_teams: { 7127 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7128 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7129 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7130 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7131 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7132 ThreadLimitVal = 7133 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7134 } 7135 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7136 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7137 return NumThreads; 7138 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7139 CGF.getContext(), CS->getCapturedStmt()); 7140 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7141 if (Dir->getDirectiveKind() == OMPD_distribute) { 7142 CS = Dir->getInnermostCapturedStmt(); 7143 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7144 return NumThreads; 7145 } 7146 } 7147 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7148 } 7149 case OMPD_target_teams_distribute: 7150 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7151 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7152 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7153 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7154 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7155 ThreadLimitVal = 7156 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7157 } 7158 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7159 case OMPD_target_parallel: 7160 case OMPD_target_parallel_for: 7161 case OMPD_target_parallel_for_simd: 7162 case OMPD_target_teams_distribute_parallel_for: 7163 case OMPD_target_teams_distribute_parallel_for_simd: { 7164 llvm::Value *CondVal = nullptr; 7165 // Handle if clause. If if clause present, the number of threads is 7166 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7167 if (D.hasClausesOfKind<OMPIfClause>()) { 7168 const OMPIfClause *IfClause = nullptr; 7169 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7170 if (C->getNameModifier() == OMPD_unknown || 7171 C->getNameModifier() == OMPD_parallel) { 7172 IfClause = C; 7173 break; 7174 } 7175 } 7176 if (IfClause) { 7177 const Expr *Cond = IfClause->getCondition(); 7178 bool Result; 7179 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7180 if (!Result) 7181 return Bld.getInt32(1); 7182 } else { 7183 CodeGenFunction::RunCleanupsScope Scope(CGF); 7184 CondVal = CGF.EvaluateExprAsBool(Cond); 7185 } 7186 } 7187 } 7188 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7189 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7190 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7191 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7192 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7193 ThreadLimitVal = 7194 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7195 } 7196 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7197 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7198 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7199 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7200 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7201 NumThreadsVal = 7202 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7203 ThreadLimitVal = ThreadLimitVal 7204 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7205 ThreadLimitVal), 7206 NumThreadsVal, ThreadLimitVal) 7207 : NumThreadsVal; 7208 } 7209 if (!ThreadLimitVal) 7210 ThreadLimitVal = Bld.getInt32(0); 7211 if (CondVal) 7212 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7213 return ThreadLimitVal; 7214 } 7215 case OMPD_target_teams_distribute_simd: 7216 case OMPD_target_simd: 7217 return Bld.getInt32(1); 7218 case OMPD_parallel: 7219 case OMPD_for: 7220 case OMPD_parallel_for: 7221 case OMPD_parallel_master: 7222 case OMPD_parallel_sections: 7223 case OMPD_for_simd: 7224 case OMPD_parallel_for_simd: 7225 case OMPD_cancel: 7226 case OMPD_cancellation_point: 7227 case OMPD_ordered: 7228 case OMPD_threadprivate: 7229 case OMPD_allocate: 7230 case OMPD_task: 7231 case OMPD_simd: 7232 case OMPD_tile: 7233 case OMPD_unroll: 7234 case OMPD_sections: 7235 case OMPD_section: 7236 case OMPD_single: 7237 case OMPD_master: 7238 case OMPD_critical: 7239 case OMPD_taskyield: 7240 case OMPD_barrier: 7241 case OMPD_taskwait: 7242 case OMPD_taskgroup: 7243 case OMPD_atomic: 7244 case OMPD_flush: 7245 case OMPD_depobj: 7246 case OMPD_scan: 7247 case OMPD_teams: 7248 case OMPD_target_data: 7249 case OMPD_target_exit_data: 7250 case OMPD_target_enter_data: 7251 case OMPD_distribute: 7252 case OMPD_distribute_simd: 7253 case OMPD_distribute_parallel_for: 7254 case OMPD_distribute_parallel_for_simd: 7255 case OMPD_teams_distribute: 7256 case OMPD_teams_distribute_simd: 7257 case OMPD_teams_distribute_parallel_for: 7258 case OMPD_teams_distribute_parallel_for_simd: 7259 case OMPD_target_update: 7260 case OMPD_declare_simd: 7261 case OMPD_declare_variant: 7262 case OMPD_begin_declare_variant: 7263 case OMPD_end_declare_variant: 7264 case OMPD_declare_target: 7265 case OMPD_end_declare_target: 7266 case OMPD_declare_reduction: 7267 case OMPD_declare_mapper: 7268 case OMPD_taskloop: 7269 case OMPD_taskloop_simd: 7270 case OMPD_master_taskloop: 7271 case OMPD_master_taskloop_simd: 7272 case OMPD_parallel_master_taskloop: 7273 case OMPD_parallel_master_taskloop_simd: 7274 case OMPD_requires: 7275 case OMPD_metadirective: 7276 case OMPD_unknown: 7277 break; 7278 default: 7279 break; 7280 } 7281 llvm_unreachable("Unsupported directive kind."); 7282 } 7283 7284 namespace { 7285 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7286 7287 // Utility to handle information from clauses associated with a given 7288 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7289 // It provides a convenient interface to obtain the information and generate 7290 // code for that information. 7291 class MappableExprsHandler { 7292 public: 7293 /// Values for bit flags used to specify the mapping type for 7294 /// offloading. 7295 enum OpenMPOffloadMappingFlags : uint64_t { 7296 /// No flags 7297 OMP_MAP_NONE = 0x0, 7298 /// Allocate memory on the device and move data from host to device. 7299 OMP_MAP_TO = 0x01, 7300 /// Allocate memory on the device and move data from device to host. 7301 OMP_MAP_FROM = 0x02, 7302 /// Always perform the requested mapping action on the element, even 7303 /// if it was already mapped before. 7304 OMP_MAP_ALWAYS = 0x04, 7305 /// Delete the element from the device environment, ignoring the 7306 /// current reference count associated with the element. 7307 OMP_MAP_DELETE = 0x08, 7308 /// The element being mapped is a pointer-pointee pair; both the 7309 /// pointer and the pointee should be mapped. 7310 OMP_MAP_PTR_AND_OBJ = 0x10, 7311 /// This flags signals that the base address of an entry should be 7312 /// passed to the target kernel as an argument. 7313 OMP_MAP_TARGET_PARAM = 0x20, 7314 /// Signal that the runtime library has to return the device pointer 7315 /// in the current position for the data being mapped. Used when we have the 7316 /// use_device_ptr or use_device_addr clause. 7317 OMP_MAP_RETURN_PARAM = 0x40, 7318 /// This flag signals that the reference being passed is a pointer to 7319 /// private data. 7320 OMP_MAP_PRIVATE = 0x80, 7321 /// Pass the element to the device by value. 7322 OMP_MAP_LITERAL = 0x100, 7323 /// Implicit map 7324 OMP_MAP_IMPLICIT = 0x200, 7325 /// Close is a hint to the runtime to allocate memory close to 7326 /// the target device. 7327 OMP_MAP_CLOSE = 0x400, 7328 /// 0x800 is reserved for compatibility with XLC. 7329 /// Produce a runtime error if the data is not already allocated. 7330 OMP_MAP_PRESENT = 0x1000, 7331 // Increment and decrement a separate reference counter so that the data 7332 // cannot be unmapped within the associated region. Thus, this flag is 7333 // intended to be used on 'target' and 'target data' directives because they 7334 // are inherently structured. It is not intended to be used on 'target 7335 // enter data' and 'target exit data' directives because they are inherently 7336 // dynamic. 7337 // This is an OpenMP extension for the sake of OpenACC support. 7338 OMP_MAP_OMPX_HOLD = 0x2000, 7339 /// Signal that the runtime library should use args as an array of 7340 /// descriptor_dim pointers and use args_size as dims. Used when we have 7341 /// non-contiguous list items in target update directive 7342 OMP_MAP_NON_CONTIG = 0x100000000000, 7343 /// The 16 MSBs of the flags indicate whether the entry is member of some 7344 /// struct/class. 7345 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7346 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7347 }; 7348 7349 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7350 static unsigned getFlagMemberOffset() { 7351 unsigned Offset = 0; 7352 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7353 Remain = Remain >> 1) 7354 Offset++; 7355 return Offset; 7356 } 7357 7358 /// Class that holds debugging information for a data mapping to be passed to 7359 /// the runtime library. 7360 class MappingExprInfo { 7361 /// The variable declaration used for the data mapping. 7362 const ValueDecl *MapDecl = nullptr; 7363 /// The original expression used in the map clause, or null if there is 7364 /// none. 7365 const Expr *MapExpr = nullptr; 7366 7367 public: 7368 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7369 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7370 7371 const ValueDecl *getMapDecl() const { return MapDecl; } 7372 const Expr *getMapExpr() const { return MapExpr; } 7373 }; 7374 7375 /// Class that associates information with a base pointer to be passed to the 7376 /// runtime library. 7377 class BasePointerInfo { 7378 /// The base pointer. 7379 llvm::Value *Ptr = nullptr; 7380 /// The base declaration that refers to this device pointer, or null if 7381 /// there is none. 7382 const ValueDecl *DevPtrDecl = nullptr; 7383 7384 public: 7385 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7386 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7387 llvm::Value *operator*() const { return Ptr; } 7388 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7389 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7390 }; 7391 7392 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7393 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7394 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7395 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7396 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7397 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7398 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7399 7400 /// This structure contains combined information generated for mappable 7401 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7402 /// mappers, and non-contiguous information. 7403 struct MapCombinedInfoTy { 7404 struct StructNonContiguousInfo { 7405 bool IsNonContiguous = false; 7406 MapDimArrayTy Dims; 7407 MapNonContiguousArrayTy Offsets; 7408 MapNonContiguousArrayTy Counts; 7409 MapNonContiguousArrayTy Strides; 7410 }; 7411 MapExprsArrayTy Exprs; 7412 MapBaseValuesArrayTy BasePointers; 7413 MapValuesArrayTy Pointers; 7414 MapValuesArrayTy Sizes; 7415 MapFlagsArrayTy Types; 7416 MapMappersArrayTy Mappers; 7417 StructNonContiguousInfo NonContigInfo; 7418 7419 /// Append arrays in \a CurInfo. 7420 void append(MapCombinedInfoTy &CurInfo) { 7421 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7422 BasePointers.append(CurInfo.BasePointers.begin(), 7423 CurInfo.BasePointers.end()); 7424 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7425 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7426 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7427 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7428 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7429 CurInfo.NonContigInfo.Dims.end()); 7430 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7431 CurInfo.NonContigInfo.Offsets.end()); 7432 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7433 CurInfo.NonContigInfo.Counts.end()); 7434 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7435 CurInfo.NonContigInfo.Strides.end()); 7436 } 7437 }; 7438 7439 /// Map between a struct and the its lowest & highest elements which have been 7440 /// mapped. 7441 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7442 /// HE(FieldIndex, Pointer)} 7443 struct StructRangeInfoTy { 7444 MapCombinedInfoTy PreliminaryMapData; 7445 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7446 0, Address::invalid()}; 7447 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7448 0, Address::invalid()}; 7449 Address Base = Address::invalid(); 7450 Address LB = Address::invalid(); 7451 bool IsArraySection = false; 7452 bool HasCompleteRecord = false; 7453 }; 7454 7455 private: 7456 /// Kind that defines how a device pointer has to be returned. 7457 struct MapInfo { 7458 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7459 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7460 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7461 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7462 bool ReturnDevicePointer = false; 7463 bool IsImplicit = false; 7464 const ValueDecl *Mapper = nullptr; 7465 const Expr *VarRef = nullptr; 7466 bool ForDeviceAddr = false; 7467 7468 MapInfo() = default; 7469 MapInfo( 7470 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7471 OpenMPMapClauseKind MapType, 7472 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7473 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7474 bool ReturnDevicePointer, bool IsImplicit, 7475 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7476 bool ForDeviceAddr = false) 7477 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7478 MotionModifiers(MotionModifiers), 7479 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7480 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7481 }; 7482 7483 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7484 /// member and there is no map information about it, then emission of that 7485 /// entry is deferred until the whole struct has been processed. 7486 struct DeferredDevicePtrEntryTy { 7487 const Expr *IE = nullptr; 7488 const ValueDecl *VD = nullptr; 7489 bool ForDeviceAddr = false; 7490 7491 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7492 bool ForDeviceAddr) 7493 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7494 }; 7495 7496 /// The target directive from where the mappable clauses were extracted. It 7497 /// is either a executable directive or a user-defined mapper directive. 7498 llvm::PointerUnion<const OMPExecutableDirective *, 7499 const OMPDeclareMapperDecl *> 7500 CurDir; 7501 7502 /// Function the directive is being generated for. 7503 CodeGenFunction &CGF; 7504 7505 /// Set of all first private variables in the current directive. 7506 /// bool data is set to true if the variable is implicitly marked as 7507 /// firstprivate, false otherwise. 7508 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7509 7510 /// Map between device pointer declarations and their expression components. 7511 /// The key value for declarations in 'this' is null. 7512 llvm::DenseMap< 7513 const ValueDecl *, 7514 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7515 DevPointersMap; 7516 7517 /// Map between lambda declarations and their map type. 7518 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7519 7520 llvm::Value *getExprTypeSize(const Expr *E) const { 7521 QualType ExprTy = E->getType().getCanonicalType(); 7522 7523 // Calculate the size for array shaping expression. 7524 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7525 llvm::Value *Size = 7526 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7527 for (const Expr *SE : OAE->getDimensions()) { 7528 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7529 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7530 CGF.getContext().getSizeType(), 7531 SE->getExprLoc()); 7532 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7533 } 7534 return Size; 7535 } 7536 7537 // Reference types are ignored for mapping purposes. 7538 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7539 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7540 7541 // Given that an array section is considered a built-in type, we need to 7542 // do the calculation based on the length of the section instead of relying 7543 // on CGF.getTypeSize(E->getType()). 7544 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7545 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7546 OAE->getBase()->IgnoreParenImpCasts()) 7547 .getCanonicalType(); 7548 7549 // If there is no length associated with the expression and lower bound is 7550 // not specified too, that means we are using the whole length of the 7551 // base. 7552 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7553 !OAE->getLowerBound()) 7554 return CGF.getTypeSize(BaseTy); 7555 7556 llvm::Value *ElemSize; 7557 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7558 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7559 } else { 7560 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7561 assert(ATy && "Expecting array type if not a pointer type."); 7562 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7563 } 7564 7565 // If we don't have a length at this point, that is because we have an 7566 // array section with a single element. 7567 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7568 return ElemSize; 7569 7570 if (const Expr *LenExpr = OAE->getLength()) { 7571 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7572 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7573 CGF.getContext().getSizeType(), 7574 LenExpr->getExprLoc()); 7575 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7576 } 7577 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7578 OAE->getLowerBound() && "expected array_section[lb:]."); 7579 // Size = sizetype - lb * elemtype; 7580 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7581 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7582 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7583 CGF.getContext().getSizeType(), 7584 OAE->getLowerBound()->getExprLoc()); 7585 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7586 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7587 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7588 LengthVal = CGF.Builder.CreateSelect( 7589 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7590 return LengthVal; 7591 } 7592 return CGF.getTypeSize(ExprTy); 7593 } 7594 7595 /// Return the corresponding bits for a given map clause modifier. Add 7596 /// a flag marking the map as a pointer if requested. Add a flag marking the 7597 /// map as the first one of a series of maps that relate to the same map 7598 /// expression. 7599 OpenMPOffloadMappingFlags getMapTypeBits( 7600 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7601 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7602 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7603 OpenMPOffloadMappingFlags Bits = 7604 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7605 switch (MapType) { 7606 case OMPC_MAP_alloc: 7607 case OMPC_MAP_release: 7608 // alloc and release is the default behavior in the runtime library, i.e. 7609 // if we don't pass any bits alloc/release that is what the runtime is 7610 // going to do. Therefore, we don't need to signal anything for these two 7611 // type modifiers. 7612 break; 7613 case OMPC_MAP_to: 7614 Bits |= OMP_MAP_TO; 7615 break; 7616 case OMPC_MAP_from: 7617 Bits |= OMP_MAP_FROM; 7618 break; 7619 case OMPC_MAP_tofrom: 7620 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7621 break; 7622 case OMPC_MAP_delete: 7623 Bits |= OMP_MAP_DELETE; 7624 break; 7625 case OMPC_MAP_unknown: 7626 llvm_unreachable("Unexpected map type!"); 7627 } 7628 if (AddPtrFlag) 7629 Bits |= OMP_MAP_PTR_AND_OBJ; 7630 if (AddIsTargetParamFlag) 7631 Bits |= OMP_MAP_TARGET_PARAM; 7632 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7633 Bits |= OMP_MAP_ALWAYS; 7634 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7635 Bits |= OMP_MAP_CLOSE; 7636 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7637 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7638 Bits |= OMP_MAP_PRESENT; 7639 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7640 Bits |= OMP_MAP_OMPX_HOLD; 7641 if (IsNonContiguous) 7642 Bits |= OMP_MAP_NON_CONTIG; 7643 return Bits; 7644 } 7645 7646 /// Return true if the provided expression is a final array section. A 7647 /// final array section, is one whose length can't be proved to be one. 7648 bool isFinalArraySectionExpression(const Expr *E) const { 7649 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7650 7651 // It is not an array section and therefore not a unity-size one. 7652 if (!OASE) 7653 return false; 7654 7655 // An array section with no colon always refer to a single element. 7656 if (OASE->getColonLocFirst().isInvalid()) 7657 return false; 7658 7659 const Expr *Length = OASE->getLength(); 7660 7661 // If we don't have a length we have to check if the array has size 1 7662 // for this dimension. Also, we should always expect a length if the 7663 // base type is pointer. 7664 if (!Length) { 7665 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7666 OASE->getBase()->IgnoreParenImpCasts()) 7667 .getCanonicalType(); 7668 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7669 return ATy->getSize().getSExtValue() != 1; 7670 // If we don't have a constant dimension length, we have to consider 7671 // the current section as having any size, so it is not necessarily 7672 // unitary. If it happen to be unity size, that's user fault. 7673 return true; 7674 } 7675 7676 // Check if the length evaluates to 1. 7677 Expr::EvalResult Result; 7678 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7679 return true; // Can have more that size 1. 7680 7681 llvm::APSInt ConstLength = Result.Val.getInt(); 7682 return ConstLength.getSExtValue() != 1; 7683 } 7684 7685 /// Generate the base pointers, section pointers, sizes, map type bits, and 7686 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7687 /// map type, map or motion modifiers, and expression components. 7688 /// \a IsFirstComponent should be set to true if the provided set of 7689 /// components is the first associated with a capture. 7690 void generateInfoForComponentList( 7691 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7692 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7693 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7694 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7695 bool IsFirstComponentList, bool IsImplicit, 7696 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7697 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7698 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7699 OverlappedElements = llvm::None) const { 7700 // The following summarizes what has to be generated for each map and the 7701 // types below. The generated information is expressed in this order: 7702 // base pointer, section pointer, size, flags 7703 // (to add to the ones that come from the map type and modifier). 7704 // 7705 // double d; 7706 // int i[100]; 7707 // float *p; 7708 // 7709 // struct S1 { 7710 // int i; 7711 // float f[50]; 7712 // } 7713 // struct S2 { 7714 // int i; 7715 // float f[50]; 7716 // S1 s; 7717 // double *p; 7718 // struct S2 *ps; 7719 // int &ref; 7720 // } 7721 // S2 s; 7722 // S2 *ps; 7723 // 7724 // map(d) 7725 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7726 // 7727 // map(i) 7728 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7729 // 7730 // map(i[1:23]) 7731 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7732 // 7733 // map(p) 7734 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7735 // 7736 // map(p[1:24]) 7737 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7738 // in unified shared memory mode or for local pointers 7739 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7740 // 7741 // map(s) 7742 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7743 // 7744 // map(s.i) 7745 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7746 // 7747 // map(s.s.f) 7748 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7749 // 7750 // map(s.p) 7751 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7752 // 7753 // map(to: s.p[:22]) 7754 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7755 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7756 // &(s.p), &(s.p[0]), 22*sizeof(double), 7757 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7758 // (*) alloc space for struct members, only this is a target parameter 7759 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7760 // optimizes this entry out, same in the examples below) 7761 // (***) map the pointee (map: to) 7762 // 7763 // map(to: s.ref) 7764 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7765 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7766 // (*) alloc space for struct members, only this is a target parameter 7767 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7768 // optimizes this entry out, same in the examples below) 7769 // (***) map the pointee (map: to) 7770 // 7771 // map(s.ps) 7772 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7773 // 7774 // map(from: s.ps->s.i) 7775 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7776 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7777 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7778 // 7779 // map(to: s.ps->ps) 7780 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7781 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7782 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7783 // 7784 // map(s.ps->ps->ps) 7785 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7786 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7787 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7788 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7789 // 7790 // map(to: s.ps->ps->s.f[:22]) 7791 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7792 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7793 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7794 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7795 // 7796 // map(ps) 7797 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7798 // 7799 // map(ps->i) 7800 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7801 // 7802 // map(ps->s.f) 7803 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7804 // 7805 // map(from: ps->p) 7806 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7807 // 7808 // map(to: ps->p[:22]) 7809 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7810 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7811 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7812 // 7813 // map(ps->ps) 7814 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7815 // 7816 // map(from: ps->ps->s.i) 7817 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7818 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7819 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7820 // 7821 // map(from: ps->ps->ps) 7822 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7823 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7824 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7825 // 7826 // map(ps->ps->ps->ps) 7827 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7828 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7829 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7830 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7831 // 7832 // map(to: ps->ps->ps->s.f[:22]) 7833 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7834 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7835 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7836 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7837 // 7838 // map(to: s.f[:22]) map(from: s.p[:33]) 7839 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7840 // sizeof(double*) (**), TARGET_PARAM 7841 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7842 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7843 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7844 // (*) allocate contiguous space needed to fit all mapped members even if 7845 // we allocate space for members not mapped (in this example, 7846 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7847 // them as well because they fall between &s.f[0] and &s.p) 7848 // 7849 // map(from: s.f[:22]) map(to: ps->p[:33]) 7850 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7851 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7852 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7853 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7854 // (*) the struct this entry pertains to is the 2nd element in the list of 7855 // arguments, hence MEMBER_OF(2) 7856 // 7857 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7858 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7859 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7860 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7861 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7862 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7863 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7864 // (*) the struct this entry pertains to is the 4th element in the list 7865 // of arguments, hence MEMBER_OF(4) 7866 7867 // Track if the map information being generated is the first for a capture. 7868 bool IsCaptureFirstInfo = IsFirstComponentList; 7869 // When the variable is on a declare target link or in a to clause with 7870 // unified memory, a reference is needed to hold the host/device address 7871 // of the variable. 7872 bool RequiresReference = false; 7873 7874 // Scan the components from the base to the complete expression. 7875 auto CI = Components.rbegin(); 7876 auto CE = Components.rend(); 7877 auto I = CI; 7878 7879 // Track if the map information being generated is the first for a list of 7880 // components. 7881 bool IsExpressionFirstInfo = true; 7882 bool FirstPointerInComplexData = false; 7883 Address BP = Address::invalid(); 7884 const Expr *AssocExpr = I->getAssociatedExpression(); 7885 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7886 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7887 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7888 7889 if (isa<MemberExpr>(AssocExpr)) { 7890 // The base is the 'this' pointer. The content of the pointer is going 7891 // to be the base of the field being mapped. 7892 BP = CGF.LoadCXXThisAddress(); 7893 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7894 (OASE && 7895 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7896 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7897 } else if (OAShE && 7898 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7899 BP = Address::deprecated( 7900 CGF.EmitScalarExpr(OAShE->getBase()), 7901 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7902 } else { 7903 // The base is the reference to the variable. 7904 // BP = &Var. 7905 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7906 if (const auto *VD = 7907 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7908 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7909 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7910 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7911 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7912 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7913 RequiresReference = true; 7914 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7915 } 7916 } 7917 } 7918 7919 // If the variable is a pointer and is being dereferenced (i.e. is not 7920 // the last component), the base has to be the pointer itself, not its 7921 // reference. References are ignored for mapping purposes. 7922 QualType Ty = 7923 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7924 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7925 // No need to generate individual map information for the pointer, it 7926 // can be associated with the combined storage if shared memory mode is 7927 // active or the base declaration is not global variable. 7928 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7929 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7930 !VD || VD->hasLocalStorage()) 7931 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7932 else 7933 FirstPointerInComplexData = true; 7934 ++I; 7935 } 7936 } 7937 7938 // Track whether a component of the list should be marked as MEMBER_OF some 7939 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7940 // in a component list should be marked as MEMBER_OF, all subsequent entries 7941 // do not belong to the base struct. E.g. 7942 // struct S2 s; 7943 // s.ps->ps->ps->f[:] 7944 // (1) (2) (3) (4) 7945 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7946 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7947 // is the pointee of ps(2) which is not member of struct s, so it should not 7948 // be marked as such (it is still PTR_AND_OBJ). 7949 // The variable is initialized to false so that PTR_AND_OBJ entries which 7950 // are not struct members are not considered (e.g. array of pointers to 7951 // data). 7952 bool ShouldBeMemberOf = false; 7953 7954 // Variable keeping track of whether or not we have encountered a component 7955 // in the component list which is a member expression. Useful when we have a 7956 // pointer or a final array section, in which case it is the previous 7957 // component in the list which tells us whether we have a member expression. 7958 // E.g. X.f[:] 7959 // While processing the final array section "[:]" it is "f" which tells us 7960 // whether we are dealing with a member of a declared struct. 7961 const MemberExpr *EncounteredME = nullptr; 7962 7963 // Track for the total number of dimension. Start from one for the dummy 7964 // dimension. 7965 uint64_t DimSize = 1; 7966 7967 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7968 bool IsPrevMemberReference = false; 7969 7970 for (; I != CE; ++I) { 7971 // If the current component is member of a struct (parent struct) mark it. 7972 if (!EncounteredME) { 7973 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7974 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7975 // as MEMBER_OF the parent struct. 7976 if (EncounteredME) { 7977 ShouldBeMemberOf = true; 7978 // Do not emit as complex pointer if this is actually not array-like 7979 // expression. 7980 if (FirstPointerInComplexData) { 7981 QualType Ty = std::prev(I) 7982 ->getAssociatedDeclaration() 7983 ->getType() 7984 .getNonReferenceType(); 7985 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7986 FirstPointerInComplexData = false; 7987 } 7988 } 7989 } 7990 7991 auto Next = std::next(I); 7992 7993 // We need to generate the addresses and sizes if this is the last 7994 // component, if the component is a pointer or if it is an array section 7995 // whose length can't be proved to be one. If this is a pointer, it 7996 // becomes the base address for the following components. 7997 7998 // A final array section, is one whose length can't be proved to be one. 7999 // If the map item is non-contiguous then we don't treat any array section 8000 // as final array section. 8001 bool IsFinalArraySection = 8002 !IsNonContiguous && 8003 isFinalArraySectionExpression(I->getAssociatedExpression()); 8004 8005 // If we have a declaration for the mapping use that, otherwise use 8006 // the base declaration of the map clause. 8007 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 8008 ? I->getAssociatedDeclaration() 8009 : BaseDecl; 8010 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 8011 : MapExpr; 8012 8013 // Get information on whether the element is a pointer. Have to do a 8014 // special treatment for array sections given that they are built-in 8015 // types. 8016 const auto *OASE = 8017 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8018 const auto *OAShE = 8019 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8020 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8021 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8022 bool IsPointer = 8023 OAShE || 8024 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8025 .getCanonicalType() 8026 ->isAnyPointerType()) || 8027 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8028 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8029 MapDecl && 8030 MapDecl->getType()->isLValueReferenceType(); 8031 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8032 8033 if (OASE) 8034 ++DimSize; 8035 8036 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8037 IsFinalArraySection) { 8038 // If this is not the last component, we expect the pointer to be 8039 // associated with an array expression or member expression. 8040 assert((Next == CE || 8041 isa<MemberExpr>(Next->getAssociatedExpression()) || 8042 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8043 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8044 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8045 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8046 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8047 "Unexpected expression"); 8048 8049 Address LB = Address::invalid(); 8050 Address LowestElem = Address::invalid(); 8051 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8052 const MemberExpr *E) { 8053 const Expr *BaseExpr = E->getBase(); 8054 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8055 // scalar. 8056 LValue BaseLV; 8057 if (E->isArrow()) { 8058 LValueBaseInfo BaseInfo; 8059 TBAAAccessInfo TBAAInfo; 8060 Address Addr = 8061 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8062 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8063 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8064 } else { 8065 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8066 } 8067 return BaseLV; 8068 }; 8069 if (OAShE) { 8070 LowestElem = LB = 8071 Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()), 8072 CGF.getContext().getTypeAlignInChars( 8073 OAShE->getBase()->getType())); 8074 } else if (IsMemberReference) { 8075 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8076 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8077 LowestElem = CGF.EmitLValueForFieldInitialization( 8078 BaseLVal, cast<FieldDecl>(MapDecl)) 8079 .getAddress(CGF); 8080 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8081 .getAddress(CGF); 8082 } else { 8083 LowestElem = LB = 8084 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8085 .getAddress(CGF); 8086 } 8087 8088 // If this component is a pointer inside the base struct then we don't 8089 // need to create any entry for it - it will be combined with the object 8090 // it is pointing to into a single PTR_AND_OBJ entry. 8091 bool IsMemberPointerOrAddr = 8092 EncounteredME && 8093 (((IsPointer || ForDeviceAddr) && 8094 I->getAssociatedExpression() == EncounteredME) || 8095 (IsPrevMemberReference && !IsPointer) || 8096 (IsMemberReference && Next != CE && 8097 !Next->getAssociatedExpression()->getType()->isPointerType())); 8098 if (!OverlappedElements.empty() && Next == CE) { 8099 // Handle base element with the info for overlapped elements. 8100 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8101 assert(!IsPointer && 8102 "Unexpected base element with the pointer type."); 8103 // Mark the whole struct as the struct that requires allocation on the 8104 // device. 8105 PartialStruct.LowestElem = {0, LowestElem}; 8106 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8107 I->getAssociatedExpression()->getType()); 8108 Address HB = CGF.Builder.CreateConstGEP( 8109 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8110 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 8111 TypeSize.getQuantity() - 1); 8112 PartialStruct.HighestElem = { 8113 std::numeric_limits<decltype( 8114 PartialStruct.HighestElem.first)>::max(), 8115 HB}; 8116 PartialStruct.Base = BP; 8117 PartialStruct.LB = LB; 8118 assert( 8119 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8120 "Overlapped elements must be used only once for the variable."); 8121 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8122 // Emit data for non-overlapped data. 8123 OpenMPOffloadMappingFlags Flags = 8124 OMP_MAP_MEMBER_OF | 8125 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8126 /*AddPtrFlag=*/false, 8127 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8128 llvm::Value *Size = nullptr; 8129 // Do bitcopy of all non-overlapped structure elements. 8130 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8131 Component : OverlappedElements) { 8132 Address ComponentLB = Address::invalid(); 8133 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8134 Component) { 8135 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8136 const auto *FD = dyn_cast<FieldDecl>(VD); 8137 if (FD && FD->getType()->isLValueReferenceType()) { 8138 const auto *ME = 8139 cast<MemberExpr>(MC.getAssociatedExpression()); 8140 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8141 ComponentLB = 8142 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8143 .getAddress(CGF); 8144 } else { 8145 ComponentLB = 8146 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8147 .getAddress(CGF); 8148 } 8149 Size = CGF.Builder.CreatePtrDiff( 8150 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8151 CGF.EmitCastToVoidPtr(LB.getPointer())); 8152 break; 8153 } 8154 } 8155 assert(Size && "Failed to determine structure size"); 8156 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8157 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8158 CombinedInfo.Pointers.push_back(LB.getPointer()); 8159 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8160 Size, CGF.Int64Ty, /*isSigned=*/true)); 8161 CombinedInfo.Types.push_back(Flags); 8162 CombinedInfo.Mappers.push_back(nullptr); 8163 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8164 : 1); 8165 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8166 } 8167 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8168 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8169 CombinedInfo.Pointers.push_back(LB.getPointer()); 8170 Size = CGF.Builder.CreatePtrDiff( 8171 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8172 CGF.EmitCastToVoidPtr(LB.getPointer())); 8173 CombinedInfo.Sizes.push_back( 8174 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8175 CombinedInfo.Types.push_back(Flags); 8176 CombinedInfo.Mappers.push_back(nullptr); 8177 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8178 : 1); 8179 break; 8180 } 8181 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8182 if (!IsMemberPointerOrAddr || 8183 (Next == CE && MapType != OMPC_MAP_unknown)) { 8184 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8185 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8186 CombinedInfo.Pointers.push_back(LB.getPointer()); 8187 CombinedInfo.Sizes.push_back( 8188 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8189 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8190 : 1); 8191 8192 // If Mapper is valid, the last component inherits the mapper. 8193 bool HasMapper = Mapper && Next == CE; 8194 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8195 8196 // We need to add a pointer flag for each map that comes from the 8197 // same expression except for the first one. We also need to signal 8198 // this map is the first one that relates with the current capture 8199 // (there is a set of entries for each capture). 8200 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8201 MapType, MapModifiers, MotionModifiers, IsImplicit, 8202 !IsExpressionFirstInfo || RequiresReference || 8203 FirstPointerInComplexData || IsMemberReference, 8204 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8205 8206 if (!IsExpressionFirstInfo || IsMemberReference) { 8207 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8208 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8209 if (IsPointer || (IsMemberReference && Next != CE)) 8210 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8211 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8212 8213 if (ShouldBeMemberOf) { 8214 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8215 // should be later updated with the correct value of MEMBER_OF. 8216 Flags |= OMP_MAP_MEMBER_OF; 8217 // From now on, all subsequent PTR_AND_OBJ entries should not be 8218 // marked as MEMBER_OF. 8219 ShouldBeMemberOf = false; 8220 } 8221 } 8222 8223 CombinedInfo.Types.push_back(Flags); 8224 } 8225 8226 // If we have encountered a member expression so far, keep track of the 8227 // mapped member. If the parent is "*this", then the value declaration 8228 // is nullptr. 8229 if (EncounteredME) { 8230 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8231 unsigned FieldIndex = FD->getFieldIndex(); 8232 8233 // Update info about the lowest and highest elements for this struct 8234 if (!PartialStruct.Base.isValid()) { 8235 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8236 if (IsFinalArraySection) { 8237 Address HB = 8238 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8239 .getAddress(CGF); 8240 PartialStruct.HighestElem = {FieldIndex, HB}; 8241 } else { 8242 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8243 } 8244 PartialStruct.Base = BP; 8245 PartialStruct.LB = BP; 8246 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8247 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8248 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8249 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8250 } 8251 } 8252 8253 // Need to emit combined struct for array sections. 8254 if (IsFinalArraySection || IsNonContiguous) 8255 PartialStruct.IsArraySection = true; 8256 8257 // If we have a final array section, we are done with this expression. 8258 if (IsFinalArraySection) 8259 break; 8260 8261 // The pointer becomes the base for the next element. 8262 if (Next != CE) 8263 BP = IsMemberReference ? LowestElem : LB; 8264 8265 IsExpressionFirstInfo = false; 8266 IsCaptureFirstInfo = false; 8267 FirstPointerInComplexData = false; 8268 IsPrevMemberReference = IsMemberReference; 8269 } else if (FirstPointerInComplexData) { 8270 QualType Ty = Components.rbegin() 8271 ->getAssociatedDeclaration() 8272 ->getType() 8273 .getNonReferenceType(); 8274 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8275 FirstPointerInComplexData = false; 8276 } 8277 } 8278 // If ran into the whole component - allocate the space for the whole 8279 // record. 8280 if (!EncounteredME) 8281 PartialStruct.HasCompleteRecord = true; 8282 8283 if (!IsNonContiguous) 8284 return; 8285 8286 const ASTContext &Context = CGF.getContext(); 8287 8288 // For supporting stride in array section, we need to initialize the first 8289 // dimension size as 1, first offset as 0, and first count as 1 8290 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8291 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8292 MapValuesArrayTy CurStrides; 8293 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8294 uint64_t ElementTypeSize; 8295 8296 // Collect Size information for each dimension and get the element size as 8297 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8298 // should be [10, 10] and the first stride is 4 btyes. 8299 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8300 Components) { 8301 const Expr *AssocExpr = Component.getAssociatedExpression(); 8302 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8303 8304 if (!OASE) 8305 continue; 8306 8307 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8308 auto *CAT = Context.getAsConstantArrayType(Ty); 8309 auto *VAT = Context.getAsVariableArrayType(Ty); 8310 8311 // We need all the dimension size except for the last dimension. 8312 assert((VAT || CAT || &Component == &*Components.begin()) && 8313 "Should be either ConstantArray or VariableArray if not the " 8314 "first Component"); 8315 8316 // Get element size if CurStrides is empty. 8317 if (CurStrides.empty()) { 8318 const Type *ElementType = nullptr; 8319 if (CAT) 8320 ElementType = CAT->getElementType().getTypePtr(); 8321 else if (VAT) 8322 ElementType = VAT->getElementType().getTypePtr(); 8323 else 8324 assert(&Component == &*Components.begin() && 8325 "Only expect pointer (non CAT or VAT) when this is the " 8326 "first Component"); 8327 // If ElementType is null, then it means the base is a pointer 8328 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8329 // for next iteration. 8330 if (ElementType) { 8331 // For the case that having pointer as base, we need to remove one 8332 // level of indirection. 8333 if (&Component != &*Components.begin()) 8334 ElementType = ElementType->getPointeeOrArrayElementType(); 8335 ElementTypeSize = 8336 Context.getTypeSizeInChars(ElementType).getQuantity(); 8337 CurStrides.push_back( 8338 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8339 } 8340 } 8341 // Get dimension value except for the last dimension since we don't need 8342 // it. 8343 if (DimSizes.size() < Components.size() - 1) { 8344 if (CAT) 8345 DimSizes.push_back(llvm::ConstantInt::get( 8346 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8347 else if (VAT) 8348 DimSizes.push_back(CGF.Builder.CreateIntCast( 8349 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8350 /*IsSigned=*/false)); 8351 } 8352 } 8353 8354 // Skip the dummy dimension since we have already have its information. 8355 auto *DI = DimSizes.begin() + 1; 8356 // Product of dimension. 8357 llvm::Value *DimProd = 8358 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8359 8360 // Collect info for non-contiguous. Notice that offset, count, and stride 8361 // are only meaningful for array-section, so we insert a null for anything 8362 // other than array-section. 8363 // Also, the size of offset, count, and stride are not the same as 8364 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8365 // count, and stride are the same as the number of non-contiguous 8366 // declaration in target update to/from clause. 8367 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8368 Components) { 8369 const Expr *AssocExpr = Component.getAssociatedExpression(); 8370 8371 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8372 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8373 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8374 /*isSigned=*/false); 8375 CurOffsets.push_back(Offset); 8376 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8377 CurStrides.push_back(CurStrides.back()); 8378 continue; 8379 } 8380 8381 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8382 8383 if (!OASE) 8384 continue; 8385 8386 // Offset 8387 const Expr *OffsetExpr = OASE->getLowerBound(); 8388 llvm::Value *Offset = nullptr; 8389 if (!OffsetExpr) { 8390 // If offset is absent, then we just set it to zero. 8391 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8392 } else { 8393 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8394 CGF.Int64Ty, 8395 /*isSigned=*/false); 8396 } 8397 CurOffsets.push_back(Offset); 8398 8399 // Count 8400 const Expr *CountExpr = OASE->getLength(); 8401 llvm::Value *Count = nullptr; 8402 if (!CountExpr) { 8403 // In Clang, once a high dimension is an array section, we construct all 8404 // the lower dimension as array section, however, for case like 8405 // arr[0:2][2], Clang construct the inner dimension as an array section 8406 // but it actually is not in an array section form according to spec. 8407 if (!OASE->getColonLocFirst().isValid() && 8408 !OASE->getColonLocSecond().isValid()) { 8409 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8410 } else { 8411 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8412 // When the length is absent it defaults to ⌈(size − 8413 // lower-bound)/stride⌉, where size is the size of the array 8414 // dimension. 8415 const Expr *StrideExpr = OASE->getStride(); 8416 llvm::Value *Stride = 8417 StrideExpr 8418 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8419 CGF.Int64Ty, /*isSigned=*/false) 8420 : nullptr; 8421 if (Stride) 8422 Count = CGF.Builder.CreateUDiv( 8423 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8424 else 8425 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8426 } 8427 } else { 8428 Count = CGF.EmitScalarExpr(CountExpr); 8429 } 8430 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8431 CurCounts.push_back(Count); 8432 8433 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8434 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8435 // Offset Count Stride 8436 // D0 0 1 4 (int) <- dummy dimension 8437 // D1 0 2 8 (2 * (1) * 4) 8438 // D2 1 2 20 (1 * (1 * 5) * 4) 8439 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8440 const Expr *StrideExpr = OASE->getStride(); 8441 llvm::Value *Stride = 8442 StrideExpr 8443 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8444 CGF.Int64Ty, /*isSigned=*/false) 8445 : nullptr; 8446 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8447 if (Stride) 8448 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8449 else 8450 CurStrides.push_back(DimProd); 8451 if (DI != DimSizes.end()) 8452 ++DI; 8453 } 8454 8455 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8456 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8457 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8458 } 8459 8460 /// Return the adjusted map modifiers if the declaration a capture refers to 8461 /// appears in a first-private clause. This is expected to be used only with 8462 /// directives that start with 'target'. 8463 MappableExprsHandler::OpenMPOffloadMappingFlags 8464 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8465 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8466 8467 // A first private variable captured by reference will use only the 8468 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8469 // declaration is known as first-private in this handler. 8470 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8471 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8472 return MappableExprsHandler::OMP_MAP_TO | 8473 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8474 return MappableExprsHandler::OMP_MAP_PRIVATE | 8475 MappableExprsHandler::OMP_MAP_TO; 8476 } 8477 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8478 if (I != LambdasMap.end()) 8479 // for map(to: lambda): using user specified map type. 8480 return getMapTypeBits( 8481 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8482 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8483 /*AddPtrFlag=*/false, 8484 /*AddIsTargetParamFlag=*/false, 8485 /*isNonContiguous=*/false); 8486 return MappableExprsHandler::OMP_MAP_TO | 8487 MappableExprsHandler::OMP_MAP_FROM; 8488 } 8489 8490 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8491 // Rotate by getFlagMemberOffset() bits. 8492 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8493 << getFlagMemberOffset()); 8494 } 8495 8496 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8497 OpenMPOffloadMappingFlags MemberOfFlag) { 8498 // If the entry is PTR_AND_OBJ but has not been marked with the special 8499 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8500 // marked as MEMBER_OF. 8501 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8502 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8503 return; 8504 8505 // Reset the placeholder value to prepare the flag for the assignment of the 8506 // proper MEMBER_OF value. 8507 Flags &= ~OMP_MAP_MEMBER_OF; 8508 Flags |= MemberOfFlag; 8509 } 8510 8511 void getPlainLayout(const CXXRecordDecl *RD, 8512 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8513 bool AsBase) const { 8514 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8515 8516 llvm::StructType *St = 8517 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8518 8519 unsigned NumElements = St->getNumElements(); 8520 llvm::SmallVector< 8521 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8522 RecordLayout(NumElements); 8523 8524 // Fill bases. 8525 for (const auto &I : RD->bases()) { 8526 if (I.isVirtual()) 8527 continue; 8528 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8529 // Ignore empty bases. 8530 if (Base->isEmpty() || CGF.getContext() 8531 .getASTRecordLayout(Base) 8532 .getNonVirtualSize() 8533 .isZero()) 8534 continue; 8535 8536 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8537 RecordLayout[FieldIndex] = Base; 8538 } 8539 // Fill in virtual bases. 8540 for (const auto &I : RD->vbases()) { 8541 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8542 // Ignore empty bases. 8543 if (Base->isEmpty()) 8544 continue; 8545 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8546 if (RecordLayout[FieldIndex]) 8547 continue; 8548 RecordLayout[FieldIndex] = Base; 8549 } 8550 // Fill in all the fields. 8551 assert(!RD->isUnion() && "Unexpected union."); 8552 for (const auto *Field : RD->fields()) { 8553 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8554 // will fill in later.) 8555 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8556 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8557 RecordLayout[FieldIndex] = Field; 8558 } 8559 } 8560 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8561 &Data : RecordLayout) { 8562 if (Data.isNull()) 8563 continue; 8564 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8565 getPlainLayout(Base, Layout, /*AsBase=*/true); 8566 else 8567 Layout.push_back(Data.get<const FieldDecl *>()); 8568 } 8569 } 8570 8571 /// Generate all the base pointers, section pointers, sizes, map types, and 8572 /// mappers for the extracted mappable expressions (all included in \a 8573 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8574 /// pair of the relevant declaration and index where it occurs is appended to 8575 /// the device pointers info array. 8576 void generateAllInfoForClauses( 8577 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8578 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8579 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8580 // We have to process the component lists that relate with the same 8581 // declaration in a single chunk so that we can generate the map flags 8582 // correctly. Therefore, we organize all lists in a map. 8583 enum MapKind { Present, Allocs, Other, Total }; 8584 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8585 SmallVector<SmallVector<MapInfo, 8>, 4>> 8586 Info; 8587 8588 // Helper function to fill the information map for the different supported 8589 // clauses. 8590 auto &&InfoGen = 8591 [&Info, &SkipVarSet]( 8592 const ValueDecl *D, MapKind Kind, 8593 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8594 OpenMPMapClauseKind MapType, 8595 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8596 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8597 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8598 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8599 if (SkipVarSet.contains(D)) 8600 return; 8601 auto It = Info.find(D); 8602 if (It == Info.end()) 8603 It = Info 8604 .insert(std::make_pair( 8605 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8606 .first; 8607 It->second[Kind].emplace_back( 8608 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8609 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8610 }; 8611 8612 for (const auto *Cl : Clauses) { 8613 const auto *C = dyn_cast<OMPMapClause>(Cl); 8614 if (!C) 8615 continue; 8616 MapKind Kind = Other; 8617 if (llvm::is_contained(C->getMapTypeModifiers(), 8618 OMPC_MAP_MODIFIER_present)) 8619 Kind = Present; 8620 else if (C->getMapType() == OMPC_MAP_alloc) 8621 Kind = Allocs; 8622 const auto *EI = C->getVarRefs().begin(); 8623 for (const auto L : C->component_lists()) { 8624 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8625 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8626 C->getMapTypeModifiers(), llvm::None, 8627 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8628 E); 8629 ++EI; 8630 } 8631 } 8632 for (const auto *Cl : Clauses) { 8633 const auto *C = dyn_cast<OMPToClause>(Cl); 8634 if (!C) 8635 continue; 8636 MapKind Kind = Other; 8637 if (llvm::is_contained(C->getMotionModifiers(), 8638 OMPC_MOTION_MODIFIER_present)) 8639 Kind = Present; 8640 const auto *EI = C->getVarRefs().begin(); 8641 for (const auto L : C->component_lists()) { 8642 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8643 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8644 C->isImplicit(), std::get<2>(L), *EI); 8645 ++EI; 8646 } 8647 } 8648 for (const auto *Cl : Clauses) { 8649 const auto *C = dyn_cast<OMPFromClause>(Cl); 8650 if (!C) 8651 continue; 8652 MapKind Kind = Other; 8653 if (llvm::is_contained(C->getMotionModifiers(), 8654 OMPC_MOTION_MODIFIER_present)) 8655 Kind = Present; 8656 const auto *EI = C->getVarRefs().begin(); 8657 for (const auto L : C->component_lists()) { 8658 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8659 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8660 C->isImplicit(), std::get<2>(L), *EI); 8661 ++EI; 8662 } 8663 } 8664 8665 // Look at the use_device_ptr clause information and mark the existing map 8666 // entries as such. If there is no map information for an entry in the 8667 // use_device_ptr list, we create one with map type 'alloc' and zero size 8668 // section. It is the user fault if that was not mapped before. If there is 8669 // no map information and the pointer is a struct member, then we defer the 8670 // emission of that entry until the whole struct has been processed. 8671 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8672 SmallVector<DeferredDevicePtrEntryTy, 4>> 8673 DeferredInfo; 8674 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8675 8676 for (const auto *Cl : Clauses) { 8677 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8678 if (!C) 8679 continue; 8680 for (const auto L : C->component_lists()) { 8681 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8682 std::get<1>(L); 8683 assert(!Components.empty() && 8684 "Not expecting empty list of components!"); 8685 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8686 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8687 const Expr *IE = Components.back().getAssociatedExpression(); 8688 // If the first component is a member expression, we have to look into 8689 // 'this', which maps to null in the map of map information. Otherwise 8690 // look directly for the information. 8691 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8692 8693 // We potentially have map information for this declaration already. 8694 // Look for the first set of components that refer to it. 8695 if (It != Info.end()) { 8696 bool Found = false; 8697 for (auto &Data : It->second) { 8698 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8699 return MI.Components.back().getAssociatedDeclaration() == VD; 8700 }); 8701 // If we found a map entry, signal that the pointer has to be 8702 // returned and move on to the next declaration. Exclude cases where 8703 // the base pointer is mapped as array subscript, array section or 8704 // array shaping. The base address is passed as a pointer to base in 8705 // this case and cannot be used as a base for use_device_ptr list 8706 // item. 8707 if (CI != Data.end()) { 8708 auto PrevCI = std::next(CI->Components.rbegin()); 8709 const auto *VarD = dyn_cast<VarDecl>(VD); 8710 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8711 isa<MemberExpr>(IE) || 8712 !VD->getType().getNonReferenceType()->isPointerType() || 8713 PrevCI == CI->Components.rend() || 8714 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8715 VarD->hasLocalStorage()) { 8716 CI->ReturnDevicePointer = true; 8717 Found = true; 8718 break; 8719 } 8720 } 8721 } 8722 if (Found) 8723 continue; 8724 } 8725 8726 // We didn't find any match in our map information - generate a zero 8727 // size array section - if the pointer is a struct member we defer this 8728 // action until the whole struct has been processed. 8729 if (isa<MemberExpr>(IE)) { 8730 // Insert the pointer into Info to be processed by 8731 // generateInfoForComponentList. Because it is a member pointer 8732 // without a pointee, no entry will be generated for it, therefore 8733 // we need to generate one after the whole struct has been processed. 8734 // Nonetheless, generateInfoForComponentList must be called to take 8735 // the pointer into account for the calculation of the range of the 8736 // partial struct. 8737 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8738 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8739 nullptr); 8740 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8741 } else { 8742 llvm::Value *Ptr = 8743 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8744 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8745 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8746 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8747 UseDevicePtrCombinedInfo.Sizes.push_back( 8748 llvm::Constant::getNullValue(CGF.Int64Ty)); 8749 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8750 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8751 } 8752 } 8753 } 8754 8755 // Look at the use_device_addr clause information and mark the existing map 8756 // entries as such. If there is no map information for an entry in the 8757 // use_device_addr list, we create one with map type 'alloc' and zero size 8758 // section. It is the user fault if that was not mapped before. If there is 8759 // no map information and the pointer is a struct member, then we defer the 8760 // emission of that entry until the whole struct has been processed. 8761 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8762 for (const auto *Cl : Clauses) { 8763 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8764 if (!C) 8765 continue; 8766 for (const auto L : C->component_lists()) { 8767 assert(!std::get<1>(L).empty() && 8768 "Not expecting empty list of components!"); 8769 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8770 if (!Processed.insert(VD).second) 8771 continue; 8772 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8773 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8774 // If the first component is a member expression, we have to look into 8775 // 'this', which maps to null in the map of map information. Otherwise 8776 // look directly for the information. 8777 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8778 8779 // We potentially have map information for this declaration already. 8780 // Look for the first set of components that refer to it. 8781 if (It != Info.end()) { 8782 bool Found = false; 8783 for (auto &Data : It->second) { 8784 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8785 return MI.Components.back().getAssociatedDeclaration() == VD; 8786 }); 8787 // If we found a map entry, signal that the pointer has to be 8788 // returned and move on to the next declaration. 8789 if (CI != Data.end()) { 8790 CI->ReturnDevicePointer = true; 8791 Found = true; 8792 break; 8793 } 8794 } 8795 if (Found) 8796 continue; 8797 } 8798 8799 // We didn't find any match in our map information - generate a zero 8800 // size array section - if the pointer is a struct member we defer this 8801 // action until the whole struct has been processed. 8802 if (isa<MemberExpr>(IE)) { 8803 // Insert the pointer into Info to be processed by 8804 // generateInfoForComponentList. Because it is a member pointer 8805 // without a pointee, no entry will be generated for it, therefore 8806 // we need to generate one after the whole struct has been processed. 8807 // Nonetheless, generateInfoForComponentList must be called to take 8808 // the pointer into account for the calculation of the range of the 8809 // partial struct. 8810 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8811 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8812 nullptr, nullptr, /*ForDeviceAddr=*/true); 8813 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8814 } else { 8815 llvm::Value *Ptr; 8816 if (IE->isGLValue()) 8817 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8818 else 8819 Ptr = CGF.EmitScalarExpr(IE); 8820 CombinedInfo.Exprs.push_back(VD); 8821 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8822 CombinedInfo.Pointers.push_back(Ptr); 8823 CombinedInfo.Sizes.push_back( 8824 llvm::Constant::getNullValue(CGF.Int64Ty)); 8825 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8826 CombinedInfo.Mappers.push_back(nullptr); 8827 } 8828 } 8829 } 8830 8831 for (const auto &Data : Info) { 8832 StructRangeInfoTy PartialStruct; 8833 // Temporary generated information. 8834 MapCombinedInfoTy CurInfo; 8835 const Decl *D = Data.first; 8836 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8837 for (const auto &M : Data.second) { 8838 for (const MapInfo &L : M) { 8839 assert(!L.Components.empty() && 8840 "Not expecting declaration with no component lists."); 8841 8842 // Remember the current base pointer index. 8843 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8844 CurInfo.NonContigInfo.IsNonContiguous = 8845 L.Components.back().isNonContiguous(); 8846 generateInfoForComponentList( 8847 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8848 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8849 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8850 8851 // If this entry relates with a device pointer, set the relevant 8852 // declaration and add the 'return pointer' flag. 8853 if (L.ReturnDevicePointer) { 8854 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8855 "Unexpected number of mapped base pointers."); 8856 8857 const ValueDecl *RelevantVD = 8858 L.Components.back().getAssociatedDeclaration(); 8859 assert(RelevantVD && 8860 "No relevant declaration related with device pointer??"); 8861 8862 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8863 RelevantVD); 8864 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8865 } 8866 } 8867 } 8868 8869 // Append any pending zero-length pointers which are struct members and 8870 // used with use_device_ptr or use_device_addr. 8871 auto CI = DeferredInfo.find(Data.first); 8872 if (CI != DeferredInfo.end()) { 8873 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8874 llvm::Value *BasePtr; 8875 llvm::Value *Ptr; 8876 if (L.ForDeviceAddr) { 8877 if (L.IE->isGLValue()) 8878 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8879 else 8880 Ptr = this->CGF.EmitScalarExpr(L.IE); 8881 BasePtr = Ptr; 8882 // Entry is RETURN_PARAM. Also, set the placeholder value 8883 // MEMBER_OF=FFFF so that the entry is later updated with the 8884 // correct value of MEMBER_OF. 8885 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8886 } else { 8887 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8888 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8889 L.IE->getExprLoc()); 8890 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8891 // placeholder value MEMBER_OF=FFFF so that the entry is later 8892 // updated with the correct value of MEMBER_OF. 8893 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8894 OMP_MAP_MEMBER_OF); 8895 } 8896 CurInfo.Exprs.push_back(L.VD); 8897 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8898 CurInfo.Pointers.push_back(Ptr); 8899 CurInfo.Sizes.push_back( 8900 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8901 CurInfo.Mappers.push_back(nullptr); 8902 } 8903 } 8904 // If there is an entry in PartialStruct it means we have a struct with 8905 // individual members mapped. Emit an extra combined entry. 8906 if (PartialStruct.Base.isValid()) { 8907 CurInfo.NonContigInfo.Dims.push_back(0); 8908 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8909 } 8910 8911 // We need to append the results of this capture to what we already 8912 // have. 8913 CombinedInfo.append(CurInfo); 8914 } 8915 // Append data for use_device_ptr clauses. 8916 CombinedInfo.append(UseDevicePtrCombinedInfo); 8917 } 8918 8919 public: 8920 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8921 : CurDir(&Dir), CGF(CGF) { 8922 // Extract firstprivate clause information. 8923 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8924 for (const auto *D : C->varlists()) 8925 FirstPrivateDecls.try_emplace( 8926 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8927 // Extract implicit firstprivates from uses_allocators clauses. 8928 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8929 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8930 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8931 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8932 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8933 /*Implicit=*/true); 8934 else if (const auto *VD = dyn_cast<VarDecl>( 8935 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8936 ->getDecl())) 8937 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8938 } 8939 } 8940 // Extract device pointer clause information. 8941 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8942 for (auto L : C->component_lists()) 8943 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8944 // Extract map information. 8945 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8946 if (C->getMapType() != OMPC_MAP_to) 8947 continue; 8948 for (auto L : C->component_lists()) { 8949 const ValueDecl *VD = std::get<0>(L); 8950 const auto *RD = VD ? VD->getType() 8951 .getCanonicalType() 8952 .getNonReferenceType() 8953 ->getAsCXXRecordDecl() 8954 : nullptr; 8955 if (RD && RD->isLambda()) 8956 LambdasMap.try_emplace(std::get<0>(L), C); 8957 } 8958 } 8959 } 8960 8961 /// Constructor for the declare mapper directive. 8962 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8963 : CurDir(&Dir), CGF(CGF) {} 8964 8965 /// Generate code for the combined entry if we have a partially mapped struct 8966 /// and take care of the mapping flags of the arguments corresponding to 8967 /// individual struct members. 8968 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8969 MapFlagsArrayTy &CurTypes, 8970 const StructRangeInfoTy &PartialStruct, 8971 const ValueDecl *VD = nullptr, 8972 bool NotTargetParams = true) const { 8973 if (CurTypes.size() == 1 && 8974 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8975 !PartialStruct.IsArraySection) 8976 return; 8977 Address LBAddr = PartialStruct.LowestElem.second; 8978 Address HBAddr = PartialStruct.HighestElem.second; 8979 if (PartialStruct.HasCompleteRecord) { 8980 LBAddr = PartialStruct.LB; 8981 HBAddr = PartialStruct.LB; 8982 } 8983 CombinedInfo.Exprs.push_back(VD); 8984 // Base is the base of the struct 8985 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8986 // Pointer is the address of the lowest element 8987 llvm::Value *LB = LBAddr.getPointer(); 8988 CombinedInfo.Pointers.push_back(LB); 8989 // There should not be a mapper for a combined entry. 8990 CombinedInfo.Mappers.push_back(nullptr); 8991 // Size is (addr of {highest+1} element) - (addr of lowest element) 8992 llvm::Value *HB = HBAddr.getPointer(); 8993 llvm::Value *HAddr = 8994 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8995 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8996 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8997 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8998 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8999 /*isSigned=*/false); 9000 CombinedInfo.Sizes.push_back(Size); 9001 // Map type is always TARGET_PARAM, if generate info for captures. 9002 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 9003 : OMP_MAP_TARGET_PARAM); 9004 // If any element has the present modifier, then make sure the runtime 9005 // doesn't attempt to allocate the struct. 9006 if (CurTypes.end() != 9007 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9008 return Type & OMP_MAP_PRESENT; 9009 })) 9010 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 9011 // Remove TARGET_PARAM flag from the first element 9012 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 9013 // If any element has the ompx_hold modifier, then make sure the runtime 9014 // uses the hold reference count for the struct as a whole so that it won't 9015 // be unmapped by an extra dynamic reference count decrement. Add it to all 9016 // elements as well so the runtime knows which reference count to check 9017 // when determining whether it's time for device-to-host transfers of 9018 // individual elements. 9019 if (CurTypes.end() != 9020 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9021 return Type & OMP_MAP_OMPX_HOLD; 9022 })) { 9023 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9024 for (auto &M : CurTypes) 9025 M |= OMP_MAP_OMPX_HOLD; 9026 } 9027 9028 // All other current entries will be MEMBER_OF the combined entry 9029 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9030 // 0xFFFF in the MEMBER_OF field). 9031 OpenMPOffloadMappingFlags MemberOfFlag = 9032 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9033 for (auto &M : CurTypes) 9034 setCorrectMemberOfFlag(M, MemberOfFlag); 9035 } 9036 9037 /// Generate all the base pointers, section pointers, sizes, map types, and 9038 /// mappers for the extracted mappable expressions (all included in \a 9039 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9040 /// pair of the relevant declaration and index where it occurs is appended to 9041 /// the device pointers info array. 9042 void generateAllInfo( 9043 MapCombinedInfoTy &CombinedInfo, 9044 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9045 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9046 assert(CurDir.is<const OMPExecutableDirective *>() && 9047 "Expect a executable directive"); 9048 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9049 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9050 } 9051 9052 /// Generate all the base pointers, section pointers, sizes, map types, and 9053 /// mappers for the extracted map clauses of user-defined mapper (all included 9054 /// in \a CombinedInfo). 9055 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9056 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9057 "Expect a declare mapper directive"); 9058 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9059 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9060 } 9061 9062 /// Emit capture info for lambdas for variables captured by reference. 9063 void generateInfoForLambdaCaptures( 9064 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9065 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9066 const auto *RD = VD->getType() 9067 .getCanonicalType() 9068 .getNonReferenceType() 9069 ->getAsCXXRecordDecl(); 9070 if (!RD || !RD->isLambda()) 9071 return; 9072 Address VDAddr = 9073 Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD)); 9074 LValue VDLVal = CGF.MakeAddrLValue( 9075 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9076 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9077 FieldDecl *ThisCapture = nullptr; 9078 RD->getCaptureFields(Captures, ThisCapture); 9079 if (ThisCapture) { 9080 LValue ThisLVal = 9081 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9082 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9083 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9084 VDLVal.getPointer(CGF)); 9085 CombinedInfo.Exprs.push_back(VD); 9086 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9087 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9088 CombinedInfo.Sizes.push_back( 9089 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9090 CGF.Int64Ty, /*isSigned=*/true)); 9091 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9092 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9093 CombinedInfo.Mappers.push_back(nullptr); 9094 } 9095 for (const LambdaCapture &LC : RD->captures()) { 9096 if (!LC.capturesVariable()) 9097 continue; 9098 const VarDecl *VD = LC.getCapturedVar(); 9099 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9100 continue; 9101 auto It = Captures.find(VD); 9102 assert(It != Captures.end() && "Found lambda capture without field."); 9103 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9104 if (LC.getCaptureKind() == LCK_ByRef) { 9105 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9106 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9107 VDLVal.getPointer(CGF)); 9108 CombinedInfo.Exprs.push_back(VD); 9109 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9110 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9111 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9112 CGF.getTypeSize( 9113 VD->getType().getCanonicalType().getNonReferenceType()), 9114 CGF.Int64Ty, /*isSigned=*/true)); 9115 } else { 9116 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9117 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9118 VDLVal.getPointer(CGF)); 9119 CombinedInfo.Exprs.push_back(VD); 9120 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9121 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9122 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9123 } 9124 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9125 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9126 CombinedInfo.Mappers.push_back(nullptr); 9127 } 9128 } 9129 9130 /// Set correct indices for lambdas captures. 9131 void adjustMemberOfForLambdaCaptures( 9132 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9133 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9134 MapFlagsArrayTy &Types) const { 9135 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9136 // Set correct member_of idx for all implicit lambda captures. 9137 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9138 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9139 continue; 9140 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9141 assert(BasePtr && "Unable to find base lambda address."); 9142 int TgtIdx = -1; 9143 for (unsigned J = I; J > 0; --J) { 9144 unsigned Idx = J - 1; 9145 if (Pointers[Idx] != BasePtr) 9146 continue; 9147 TgtIdx = Idx; 9148 break; 9149 } 9150 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9151 // All other current entries will be MEMBER_OF the combined entry 9152 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9153 // 0xFFFF in the MEMBER_OF field). 9154 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9155 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9156 } 9157 } 9158 9159 /// Generate the base pointers, section pointers, sizes, map types, and 9160 /// mappers associated to a given capture (all included in \a CombinedInfo). 9161 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9162 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9163 StructRangeInfoTy &PartialStruct) const { 9164 assert(!Cap->capturesVariableArrayType() && 9165 "Not expecting to generate map info for a variable array type!"); 9166 9167 // We need to know when we generating information for the first component 9168 const ValueDecl *VD = Cap->capturesThis() 9169 ? nullptr 9170 : Cap->getCapturedVar()->getCanonicalDecl(); 9171 9172 // for map(to: lambda): skip here, processing it in 9173 // generateDefaultMapInfo 9174 if (LambdasMap.count(VD)) 9175 return; 9176 9177 // If this declaration appears in a is_device_ptr clause we just have to 9178 // pass the pointer by value. If it is a reference to a declaration, we just 9179 // pass its value. 9180 if (DevPointersMap.count(VD)) { 9181 CombinedInfo.Exprs.push_back(VD); 9182 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9183 CombinedInfo.Pointers.push_back(Arg); 9184 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9185 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9186 /*isSigned=*/true)); 9187 CombinedInfo.Types.push_back( 9188 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9189 OMP_MAP_TARGET_PARAM); 9190 CombinedInfo.Mappers.push_back(nullptr); 9191 return; 9192 } 9193 9194 using MapData = 9195 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9196 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9197 const ValueDecl *, const Expr *>; 9198 SmallVector<MapData, 4> DeclComponentLists; 9199 assert(CurDir.is<const OMPExecutableDirective *>() && 9200 "Expect a executable directive"); 9201 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9202 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9203 const auto *EI = C->getVarRefs().begin(); 9204 for (const auto L : C->decl_component_lists(VD)) { 9205 const ValueDecl *VDecl, *Mapper; 9206 // The Expression is not correct if the mapping is implicit 9207 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9208 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9209 std::tie(VDecl, Components, Mapper) = L; 9210 assert(VDecl == VD && "We got information for the wrong declaration??"); 9211 assert(!Components.empty() && 9212 "Not expecting declaration with no component lists."); 9213 DeclComponentLists.emplace_back(Components, C->getMapType(), 9214 C->getMapTypeModifiers(), 9215 C->isImplicit(), Mapper, E); 9216 ++EI; 9217 } 9218 } 9219 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9220 const MapData &RHS) { 9221 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9222 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9223 bool HasPresent = 9224 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9225 bool HasAllocs = MapType == OMPC_MAP_alloc; 9226 MapModifiers = std::get<2>(RHS); 9227 MapType = std::get<1>(LHS); 9228 bool HasPresentR = 9229 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9230 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9231 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9232 }); 9233 9234 // Find overlapping elements (including the offset from the base element). 9235 llvm::SmallDenseMap< 9236 const MapData *, 9237 llvm::SmallVector< 9238 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9239 4> 9240 OverlappedData; 9241 size_t Count = 0; 9242 for (const MapData &L : DeclComponentLists) { 9243 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9244 OpenMPMapClauseKind MapType; 9245 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9246 bool IsImplicit; 9247 const ValueDecl *Mapper; 9248 const Expr *VarRef; 9249 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9250 L; 9251 ++Count; 9252 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9253 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9254 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9255 VarRef) = L1; 9256 auto CI = Components.rbegin(); 9257 auto CE = Components.rend(); 9258 auto SI = Components1.rbegin(); 9259 auto SE = Components1.rend(); 9260 for (; CI != CE && SI != SE; ++CI, ++SI) { 9261 if (CI->getAssociatedExpression()->getStmtClass() != 9262 SI->getAssociatedExpression()->getStmtClass()) 9263 break; 9264 // Are we dealing with different variables/fields? 9265 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9266 break; 9267 } 9268 // Found overlapping if, at least for one component, reached the head 9269 // of the components list. 9270 if (CI == CE || SI == SE) { 9271 // Ignore it if it is the same component. 9272 if (CI == CE && SI == SE) 9273 continue; 9274 const auto It = (SI == SE) ? CI : SI; 9275 // If one component is a pointer and another one is a kind of 9276 // dereference of this pointer (array subscript, section, dereference, 9277 // etc.), it is not an overlapping. 9278 // Same, if one component is a base and another component is a 9279 // dereferenced pointer memberexpr with the same base. 9280 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9281 (std::prev(It)->getAssociatedDeclaration() && 9282 std::prev(It) 9283 ->getAssociatedDeclaration() 9284 ->getType() 9285 ->isPointerType()) || 9286 (It->getAssociatedDeclaration() && 9287 It->getAssociatedDeclaration()->getType()->isPointerType() && 9288 std::next(It) != CE && std::next(It) != SE)) 9289 continue; 9290 const MapData &BaseData = CI == CE ? L : L1; 9291 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9292 SI == SE ? Components : Components1; 9293 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9294 OverlappedElements.getSecond().push_back(SubData); 9295 } 9296 } 9297 } 9298 // Sort the overlapped elements for each item. 9299 llvm::SmallVector<const FieldDecl *, 4> Layout; 9300 if (!OverlappedData.empty()) { 9301 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9302 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9303 while (BaseType != OrigType) { 9304 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9305 OrigType = BaseType->getPointeeOrArrayElementType(); 9306 } 9307 9308 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9309 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9310 else { 9311 const auto *RD = BaseType->getAsRecordDecl(); 9312 Layout.append(RD->field_begin(), RD->field_end()); 9313 } 9314 } 9315 for (auto &Pair : OverlappedData) { 9316 llvm::stable_sort( 9317 Pair.getSecond(), 9318 [&Layout]( 9319 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9320 OMPClauseMappableExprCommon::MappableExprComponentListRef 9321 Second) { 9322 auto CI = First.rbegin(); 9323 auto CE = First.rend(); 9324 auto SI = Second.rbegin(); 9325 auto SE = Second.rend(); 9326 for (; CI != CE && SI != SE; ++CI, ++SI) { 9327 if (CI->getAssociatedExpression()->getStmtClass() != 9328 SI->getAssociatedExpression()->getStmtClass()) 9329 break; 9330 // Are we dealing with different variables/fields? 9331 if (CI->getAssociatedDeclaration() != 9332 SI->getAssociatedDeclaration()) 9333 break; 9334 } 9335 9336 // Lists contain the same elements. 9337 if (CI == CE && SI == SE) 9338 return false; 9339 9340 // List with less elements is less than list with more elements. 9341 if (CI == CE || SI == SE) 9342 return CI == CE; 9343 9344 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9345 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9346 if (FD1->getParent() == FD2->getParent()) 9347 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9348 const auto *It = 9349 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9350 return FD == FD1 || FD == FD2; 9351 }); 9352 return *It == FD1; 9353 }); 9354 } 9355 9356 // Associated with a capture, because the mapping flags depend on it. 9357 // Go through all of the elements with the overlapped elements. 9358 bool IsFirstComponentList = true; 9359 for (const auto &Pair : OverlappedData) { 9360 const MapData &L = *Pair.getFirst(); 9361 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9362 OpenMPMapClauseKind MapType; 9363 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9364 bool IsImplicit; 9365 const ValueDecl *Mapper; 9366 const Expr *VarRef; 9367 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9368 L; 9369 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9370 OverlappedComponents = Pair.getSecond(); 9371 generateInfoForComponentList( 9372 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9373 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9374 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9375 IsFirstComponentList = false; 9376 } 9377 // Go through other elements without overlapped elements. 9378 for (const MapData &L : DeclComponentLists) { 9379 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9380 OpenMPMapClauseKind MapType; 9381 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9382 bool IsImplicit; 9383 const ValueDecl *Mapper; 9384 const Expr *VarRef; 9385 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9386 L; 9387 auto It = OverlappedData.find(&L); 9388 if (It == OverlappedData.end()) 9389 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9390 Components, CombinedInfo, PartialStruct, 9391 IsFirstComponentList, IsImplicit, Mapper, 9392 /*ForDeviceAddr=*/false, VD, VarRef); 9393 IsFirstComponentList = false; 9394 } 9395 } 9396 9397 /// Generate the default map information for a given capture \a CI, 9398 /// record field declaration \a RI and captured value \a CV. 9399 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9400 const FieldDecl &RI, llvm::Value *CV, 9401 MapCombinedInfoTy &CombinedInfo) const { 9402 bool IsImplicit = true; 9403 // Do the default mapping. 9404 if (CI.capturesThis()) { 9405 CombinedInfo.Exprs.push_back(nullptr); 9406 CombinedInfo.BasePointers.push_back(CV); 9407 CombinedInfo.Pointers.push_back(CV); 9408 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9409 CombinedInfo.Sizes.push_back( 9410 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9411 CGF.Int64Ty, /*isSigned=*/true)); 9412 // Default map type. 9413 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9414 } else if (CI.capturesVariableByCopy()) { 9415 const VarDecl *VD = CI.getCapturedVar(); 9416 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9417 CombinedInfo.BasePointers.push_back(CV); 9418 CombinedInfo.Pointers.push_back(CV); 9419 if (!RI.getType()->isAnyPointerType()) { 9420 // We have to signal to the runtime captures passed by value that are 9421 // not pointers. 9422 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9423 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9424 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9425 } else { 9426 // Pointers are implicitly mapped with a zero size and no flags 9427 // (other than first map that is added for all implicit maps). 9428 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9429 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9430 } 9431 auto I = FirstPrivateDecls.find(VD); 9432 if (I != FirstPrivateDecls.end()) 9433 IsImplicit = I->getSecond(); 9434 } else { 9435 assert(CI.capturesVariable() && "Expected captured reference."); 9436 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9437 QualType ElementType = PtrTy->getPointeeType(); 9438 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9439 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9440 // The default map type for a scalar/complex type is 'to' because by 9441 // default the value doesn't have to be retrieved. For an aggregate 9442 // type, the default is 'tofrom'. 9443 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9444 const VarDecl *VD = CI.getCapturedVar(); 9445 auto I = FirstPrivateDecls.find(VD); 9446 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9447 CombinedInfo.BasePointers.push_back(CV); 9448 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9449 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9450 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9451 AlignmentSource::Decl)); 9452 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9453 } else { 9454 CombinedInfo.Pointers.push_back(CV); 9455 } 9456 if (I != FirstPrivateDecls.end()) 9457 IsImplicit = I->getSecond(); 9458 } 9459 // Every default map produces a single argument which is a target parameter. 9460 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9461 9462 // Add flag stating this is an implicit map. 9463 if (IsImplicit) 9464 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9465 9466 // No user-defined mapper for default mapping. 9467 CombinedInfo.Mappers.push_back(nullptr); 9468 } 9469 }; 9470 } // anonymous namespace 9471 9472 static void emitNonContiguousDescriptor( 9473 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9474 CGOpenMPRuntime::TargetDataInfo &Info) { 9475 CodeGenModule &CGM = CGF.CGM; 9476 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9477 &NonContigInfo = CombinedInfo.NonContigInfo; 9478 9479 // Build an array of struct descriptor_dim and then assign it to 9480 // offload_args. 9481 // 9482 // struct descriptor_dim { 9483 // uint64_t offset; 9484 // uint64_t count; 9485 // uint64_t stride 9486 // }; 9487 ASTContext &C = CGF.getContext(); 9488 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9489 RecordDecl *RD; 9490 RD = C.buildImplicitRecord("descriptor_dim"); 9491 RD->startDefinition(); 9492 addFieldToRecordDecl(C, RD, Int64Ty); 9493 addFieldToRecordDecl(C, RD, Int64Ty); 9494 addFieldToRecordDecl(C, RD, Int64Ty); 9495 RD->completeDefinition(); 9496 QualType DimTy = C.getRecordType(RD); 9497 9498 enum { OffsetFD = 0, CountFD, StrideFD }; 9499 // We need two index variable here since the size of "Dims" is the same as the 9500 // size of Components, however, the size of offset, count, and stride is equal 9501 // to the size of base declaration that is non-contiguous. 9502 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9503 // Skip emitting ir if dimension size is 1 since it cannot be 9504 // non-contiguous. 9505 if (NonContigInfo.Dims[I] == 1) 9506 continue; 9507 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9508 QualType ArrayTy = 9509 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9510 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9511 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9512 unsigned RevIdx = EE - II - 1; 9513 LValue DimsLVal = CGF.MakeAddrLValue( 9514 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9515 // Offset 9516 LValue OffsetLVal = CGF.EmitLValueForField( 9517 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9518 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9519 // Count 9520 LValue CountLVal = CGF.EmitLValueForField( 9521 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9522 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9523 // Stride 9524 LValue StrideLVal = CGF.EmitLValueForField( 9525 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9526 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9527 } 9528 // args[I] = &dims 9529 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9530 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9531 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9532 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9533 Info.PointersArray, 0, I); 9534 Address PAddr = Address::deprecated(P, CGF.getPointerAlign()); 9535 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9536 ++L; 9537 } 9538 } 9539 9540 // Try to extract the base declaration from a `this->x` expression if possible. 9541 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9542 if (!E) 9543 return nullptr; 9544 9545 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9546 if (const MemberExpr *ME = 9547 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9548 return ME->getMemberDecl(); 9549 return nullptr; 9550 } 9551 9552 /// Emit a string constant containing the names of the values mapped to the 9553 /// offloading runtime library. 9554 llvm::Constant * 9555 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9556 MappableExprsHandler::MappingExprInfo &MapExprs) { 9557 9558 uint32_t SrcLocStrSize; 9559 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9560 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9561 9562 SourceLocation Loc; 9563 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9564 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9565 Loc = VD->getLocation(); 9566 else 9567 Loc = MapExprs.getMapExpr()->getExprLoc(); 9568 } else { 9569 Loc = MapExprs.getMapDecl()->getLocation(); 9570 } 9571 9572 std::string ExprName; 9573 if (MapExprs.getMapExpr()) { 9574 PrintingPolicy P(CGF.getContext().getLangOpts()); 9575 llvm::raw_string_ostream OS(ExprName); 9576 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9577 OS.flush(); 9578 } else { 9579 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9580 } 9581 9582 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9583 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9584 PLoc.getLine(), PLoc.getColumn(), 9585 SrcLocStrSize); 9586 } 9587 9588 /// Emit the arrays used to pass the captures and map information to the 9589 /// offloading runtime library. If there is no map or capture information, 9590 /// return nullptr by reference. 9591 static void emitOffloadingArrays( 9592 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9593 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9594 bool IsNonContiguous = false) { 9595 CodeGenModule &CGM = CGF.CGM; 9596 ASTContext &Ctx = CGF.getContext(); 9597 9598 // Reset the array information. 9599 Info.clearArrayInfo(); 9600 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9601 9602 if (Info.NumberOfPtrs) { 9603 // Detect if we have any capture size requiring runtime evaluation of the 9604 // size so that a constant array could be eventually used. 9605 9606 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9607 QualType PointerArrayType = Ctx.getConstantArrayType( 9608 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9609 /*IndexTypeQuals=*/0); 9610 9611 Info.BasePointersArray = 9612 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9613 Info.PointersArray = 9614 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9615 Address MappersArray = 9616 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9617 Info.MappersArray = MappersArray.getPointer(); 9618 9619 // If we don't have any VLA types or other types that require runtime 9620 // evaluation, we can use a constant array for the map sizes, otherwise we 9621 // need to fill up the arrays as we do for the pointers. 9622 QualType Int64Ty = 9623 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9624 SmallVector<llvm::Constant *> ConstSizes( 9625 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9626 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9627 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9628 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9629 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9630 if (IsNonContiguous && (CombinedInfo.Types[I] & 9631 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9632 ConstSizes[I] = llvm::ConstantInt::get( 9633 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9634 else 9635 ConstSizes[I] = CI; 9636 continue; 9637 } 9638 } 9639 RuntimeSizes.set(I); 9640 } 9641 9642 if (RuntimeSizes.all()) { 9643 QualType SizeArrayType = Ctx.getConstantArrayType( 9644 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9645 /*IndexTypeQuals=*/0); 9646 Info.SizesArray = 9647 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9648 } else { 9649 auto *SizesArrayInit = llvm::ConstantArray::get( 9650 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9651 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9652 auto *SizesArrayGbl = new llvm::GlobalVariable( 9653 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9654 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9655 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9656 if (RuntimeSizes.any()) { 9657 QualType SizeArrayType = Ctx.getConstantArrayType( 9658 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9659 /*IndexTypeQuals=*/0); 9660 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9661 llvm::Value *GblConstPtr = 9662 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9663 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9664 CGF.Builder.CreateMemCpy( 9665 Buffer, 9666 Address(GblConstPtr, CGM.Int64Ty, 9667 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9668 /*DestWidth=*/64, /*Signed=*/false))), 9669 CGF.getTypeSize(SizeArrayType)); 9670 Info.SizesArray = Buffer.getPointer(); 9671 } else { 9672 Info.SizesArray = SizesArrayGbl; 9673 } 9674 } 9675 9676 // The map types are always constant so we don't need to generate code to 9677 // fill arrays. Instead, we create an array constant. 9678 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9679 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9680 std::string MaptypesName = 9681 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9682 auto *MapTypesArrayGbl = 9683 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9684 Info.MapTypesArray = MapTypesArrayGbl; 9685 9686 // The information types are only built if there is debug information 9687 // requested. 9688 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9689 Info.MapNamesArray = llvm::Constant::getNullValue( 9690 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9691 } else { 9692 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9693 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9694 }; 9695 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9696 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9697 std::string MapnamesName = 9698 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9699 auto *MapNamesArrayGbl = 9700 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9701 Info.MapNamesArray = MapNamesArrayGbl; 9702 } 9703 9704 // If there's a present map type modifier, it must not be applied to the end 9705 // of a region, so generate a separate map type array in that case. 9706 if (Info.separateBeginEndCalls()) { 9707 bool EndMapTypesDiffer = false; 9708 for (uint64_t &Type : Mapping) { 9709 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9710 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9711 EndMapTypesDiffer = true; 9712 } 9713 } 9714 if (EndMapTypesDiffer) { 9715 MapTypesArrayGbl = 9716 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9717 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9718 } 9719 } 9720 9721 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9722 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9723 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9724 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9725 Info.BasePointersArray, 0, I); 9726 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9727 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9728 Address BPAddr = 9729 Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9730 CGF.Builder.CreateStore(BPVal, BPAddr); 9731 9732 if (Info.requiresDevicePointerInfo()) 9733 if (const ValueDecl *DevVD = 9734 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9735 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9736 9737 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9738 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9739 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9740 Info.PointersArray, 0, I); 9741 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9742 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9743 Address PAddr = 9744 Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9745 CGF.Builder.CreateStore(PVal, PAddr); 9746 9747 if (RuntimeSizes.test(I)) { 9748 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9749 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9750 Info.SizesArray, 9751 /*Idx0=*/0, 9752 /*Idx1=*/I); 9753 Address SAddr = 9754 Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty)); 9755 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9756 CGM.Int64Ty, 9757 /*isSigned=*/true), 9758 SAddr); 9759 } 9760 9761 // Fill up the mapper array. 9762 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9763 if (CombinedInfo.Mappers[I]) { 9764 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9765 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9766 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9767 Info.HasMapper = true; 9768 } 9769 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9770 CGF.Builder.CreateStore(MFunc, MAddr); 9771 } 9772 } 9773 9774 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9775 Info.NumberOfPtrs == 0) 9776 return; 9777 9778 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9779 } 9780 9781 namespace { 9782 /// Additional arguments for emitOffloadingArraysArgument function. 9783 struct ArgumentsOptions { 9784 bool ForEndCall = false; 9785 ArgumentsOptions() = default; 9786 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9787 }; 9788 } // namespace 9789 9790 /// Emit the arguments to be passed to the runtime library based on the 9791 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9792 /// ForEndCall, emit map types to be passed for the end of the region instead of 9793 /// the beginning. 9794 static void emitOffloadingArraysArgument( 9795 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9796 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9797 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9798 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9799 const ArgumentsOptions &Options = ArgumentsOptions()) { 9800 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9801 "expected region end call to runtime only when end call is separate"); 9802 CodeGenModule &CGM = CGF.CGM; 9803 if (Info.NumberOfPtrs) { 9804 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9805 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9806 Info.BasePointersArray, 9807 /*Idx0=*/0, /*Idx1=*/0); 9808 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9809 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9810 Info.PointersArray, 9811 /*Idx0=*/0, 9812 /*Idx1=*/0); 9813 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9814 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9815 /*Idx0=*/0, /*Idx1=*/0); 9816 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9817 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9818 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9819 : Info.MapTypesArray, 9820 /*Idx0=*/0, 9821 /*Idx1=*/0); 9822 9823 // Only emit the mapper information arrays if debug information is 9824 // requested. 9825 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9826 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9827 else 9828 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9829 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9830 Info.MapNamesArray, 9831 /*Idx0=*/0, 9832 /*Idx1=*/0); 9833 // If there is no user-defined mapper, set the mapper array to nullptr to 9834 // avoid an unnecessary data privatization 9835 if (!Info.HasMapper) 9836 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9837 else 9838 MappersArrayArg = 9839 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9840 } else { 9841 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9842 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9843 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9844 MapTypesArrayArg = 9845 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9846 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9847 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9848 } 9849 } 9850 9851 /// Check for inner distribute directive. 9852 static const OMPExecutableDirective * 9853 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9854 const auto *CS = D.getInnermostCapturedStmt(); 9855 const auto *Body = 9856 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9857 const Stmt *ChildStmt = 9858 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9859 9860 if (const auto *NestedDir = 9861 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9862 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9863 switch (D.getDirectiveKind()) { 9864 case OMPD_target: 9865 if (isOpenMPDistributeDirective(DKind)) 9866 return NestedDir; 9867 if (DKind == OMPD_teams) { 9868 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9869 /*IgnoreCaptured=*/true); 9870 if (!Body) 9871 return nullptr; 9872 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9873 if (const auto *NND = 9874 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9875 DKind = NND->getDirectiveKind(); 9876 if (isOpenMPDistributeDirective(DKind)) 9877 return NND; 9878 } 9879 } 9880 return nullptr; 9881 case OMPD_target_teams: 9882 if (isOpenMPDistributeDirective(DKind)) 9883 return NestedDir; 9884 return nullptr; 9885 case OMPD_target_parallel: 9886 case OMPD_target_simd: 9887 case OMPD_target_parallel_for: 9888 case OMPD_target_parallel_for_simd: 9889 return nullptr; 9890 case OMPD_target_teams_distribute: 9891 case OMPD_target_teams_distribute_simd: 9892 case OMPD_target_teams_distribute_parallel_for: 9893 case OMPD_target_teams_distribute_parallel_for_simd: 9894 case OMPD_parallel: 9895 case OMPD_for: 9896 case OMPD_parallel_for: 9897 case OMPD_parallel_master: 9898 case OMPD_parallel_sections: 9899 case OMPD_for_simd: 9900 case OMPD_parallel_for_simd: 9901 case OMPD_cancel: 9902 case OMPD_cancellation_point: 9903 case OMPD_ordered: 9904 case OMPD_threadprivate: 9905 case OMPD_allocate: 9906 case OMPD_task: 9907 case OMPD_simd: 9908 case OMPD_tile: 9909 case OMPD_unroll: 9910 case OMPD_sections: 9911 case OMPD_section: 9912 case OMPD_single: 9913 case OMPD_master: 9914 case OMPD_critical: 9915 case OMPD_taskyield: 9916 case OMPD_barrier: 9917 case OMPD_taskwait: 9918 case OMPD_taskgroup: 9919 case OMPD_atomic: 9920 case OMPD_flush: 9921 case OMPD_depobj: 9922 case OMPD_scan: 9923 case OMPD_teams: 9924 case OMPD_target_data: 9925 case OMPD_target_exit_data: 9926 case OMPD_target_enter_data: 9927 case OMPD_distribute: 9928 case OMPD_distribute_simd: 9929 case OMPD_distribute_parallel_for: 9930 case OMPD_distribute_parallel_for_simd: 9931 case OMPD_teams_distribute: 9932 case OMPD_teams_distribute_simd: 9933 case OMPD_teams_distribute_parallel_for: 9934 case OMPD_teams_distribute_parallel_for_simd: 9935 case OMPD_target_update: 9936 case OMPD_declare_simd: 9937 case OMPD_declare_variant: 9938 case OMPD_begin_declare_variant: 9939 case OMPD_end_declare_variant: 9940 case OMPD_declare_target: 9941 case OMPD_end_declare_target: 9942 case OMPD_declare_reduction: 9943 case OMPD_declare_mapper: 9944 case OMPD_taskloop: 9945 case OMPD_taskloop_simd: 9946 case OMPD_master_taskloop: 9947 case OMPD_master_taskloop_simd: 9948 case OMPD_parallel_master_taskloop: 9949 case OMPD_parallel_master_taskloop_simd: 9950 case OMPD_requires: 9951 case OMPD_metadirective: 9952 case OMPD_unknown: 9953 default: 9954 llvm_unreachable("Unexpected directive."); 9955 } 9956 } 9957 9958 return nullptr; 9959 } 9960 9961 /// Emit the user-defined mapper function. The code generation follows the 9962 /// pattern in the example below. 9963 /// \code 9964 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9965 /// void *base, void *begin, 9966 /// int64_t size, int64_t type, 9967 /// void *name = nullptr) { 9968 /// // Allocate space for an array section first or add a base/begin for 9969 /// // pointer dereference. 9970 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9971 /// !maptype.IsDelete) 9972 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9973 /// size*sizeof(Ty), clearToFromMember(type)); 9974 /// // Map members. 9975 /// for (unsigned i = 0; i < size; i++) { 9976 /// // For each component specified by this mapper: 9977 /// for (auto c : begin[i]->all_components) { 9978 /// if (c.hasMapper()) 9979 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9980 /// c.arg_type, c.arg_name); 9981 /// else 9982 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9983 /// c.arg_begin, c.arg_size, c.arg_type, 9984 /// c.arg_name); 9985 /// } 9986 /// } 9987 /// // Delete the array section. 9988 /// if (size > 1 && maptype.IsDelete) 9989 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9990 /// size*sizeof(Ty), clearToFromMember(type)); 9991 /// } 9992 /// \endcode 9993 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9994 CodeGenFunction *CGF) { 9995 if (UDMMap.count(D) > 0) 9996 return; 9997 ASTContext &C = CGM.getContext(); 9998 QualType Ty = D->getType(); 9999 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 10000 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10001 auto *MapperVarDecl = 10002 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 10003 SourceLocation Loc = D->getLocation(); 10004 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 10005 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 10006 10007 // Prepare mapper function arguments and attributes. 10008 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 10009 C.VoidPtrTy, ImplicitParamDecl::Other); 10010 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 10011 ImplicitParamDecl::Other); 10012 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 10013 C.VoidPtrTy, ImplicitParamDecl::Other); 10014 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 10015 ImplicitParamDecl::Other); 10016 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 10017 ImplicitParamDecl::Other); 10018 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 10019 ImplicitParamDecl::Other); 10020 FunctionArgList Args; 10021 Args.push_back(&HandleArg); 10022 Args.push_back(&BaseArg); 10023 Args.push_back(&BeginArg); 10024 Args.push_back(&SizeArg); 10025 Args.push_back(&TypeArg); 10026 Args.push_back(&NameArg); 10027 const CGFunctionInfo &FnInfo = 10028 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 10029 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 10030 SmallString<64> TyStr; 10031 llvm::raw_svector_ostream Out(TyStr); 10032 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10033 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10034 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10035 Name, &CGM.getModule()); 10036 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10037 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10038 // Start the mapper function code generation. 10039 CodeGenFunction MapperCGF(CGM); 10040 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10041 // Compute the starting and end addresses of array elements. 10042 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10043 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10044 C.getPointerType(Int64Ty), Loc); 10045 // Prepare common arguments for array initiation and deletion. 10046 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10047 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10048 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10049 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10050 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10051 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10052 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10053 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10054 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10055 // Convert the size in bytes into the number of array elements. 10056 Size = MapperCGF.Builder.CreateExactUDiv( 10057 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10058 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10059 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10060 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 10061 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10062 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10063 C.getPointerType(Int64Ty), Loc); 10064 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10065 MapperCGF.GetAddrOfLocalVar(&NameArg), 10066 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10067 10068 // Emit array initiation if this is an array section and \p MapType indicates 10069 // that memory allocation is required. 10070 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10071 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10072 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10073 10074 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10075 10076 // Emit the loop header block. 10077 MapperCGF.EmitBlock(HeadBB); 10078 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10079 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10080 // Evaluate whether the initial condition is satisfied. 10081 llvm::Value *IsEmpty = 10082 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10083 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10084 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10085 10086 // Emit the loop body block. 10087 MapperCGF.EmitBlock(BodyBB); 10088 llvm::BasicBlock *LastBB = BodyBB; 10089 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10090 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10091 PtrPHI->addIncoming(PtrBegin, EntryBB); 10092 Address PtrCurrent(PtrPHI, ElemTy, 10093 MapperCGF.GetAddrOfLocalVar(&BeginArg) 10094 .getAlignment() 10095 .alignmentOfArrayElement(ElementSize)); 10096 // Privatize the declared variable of mapper to be the current array element. 10097 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10098 Scope.addPrivate(MapperVarDecl, PtrCurrent); 10099 (void)Scope.Privatize(); 10100 10101 // Get map clause information. Fill up the arrays with all mapped variables. 10102 MappableExprsHandler::MapCombinedInfoTy Info; 10103 MappableExprsHandler MEHandler(*D, MapperCGF); 10104 MEHandler.generateAllInfoForMapper(Info); 10105 10106 // Call the runtime API __tgt_mapper_num_components to get the number of 10107 // pre-existing components. 10108 llvm::Value *OffloadingArgs[] = {Handle}; 10109 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10110 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10111 OMPRTL___tgt_mapper_num_components), 10112 OffloadingArgs); 10113 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10114 PreviousSize, 10115 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10116 10117 // Fill up the runtime mapper handle for all components. 10118 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10119 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10120 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10121 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10122 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10123 llvm::Value *CurSizeArg = Info.Sizes[I]; 10124 llvm::Value *CurNameArg = 10125 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10126 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10127 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10128 10129 // Extract the MEMBER_OF field from the map type. 10130 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10131 llvm::Value *MemberMapType = 10132 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10133 10134 // Combine the map type inherited from user-defined mapper with that 10135 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10136 // bits of the \a MapType, which is the input argument of the mapper 10137 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10138 // bits of MemberMapType. 10139 // [OpenMP 5.0], 1.2.6. map-type decay. 10140 // | alloc | to | from | tofrom | release | delete 10141 // ---------------------------------------------------------- 10142 // alloc | alloc | alloc | alloc | alloc | release | delete 10143 // to | alloc | to | alloc | to | release | delete 10144 // from | alloc | alloc | from | from | release | delete 10145 // tofrom | alloc | to | from | tofrom | release | delete 10146 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10147 MapType, 10148 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10149 MappableExprsHandler::OMP_MAP_FROM)); 10150 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10151 llvm::BasicBlock *AllocElseBB = 10152 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10153 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10154 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10155 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10156 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10157 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10158 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10159 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10160 MapperCGF.EmitBlock(AllocBB); 10161 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10162 MemberMapType, 10163 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10164 MappableExprsHandler::OMP_MAP_FROM))); 10165 MapperCGF.Builder.CreateBr(EndBB); 10166 MapperCGF.EmitBlock(AllocElseBB); 10167 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10168 LeftToFrom, 10169 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10170 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10171 // In case of to, clear OMP_MAP_FROM. 10172 MapperCGF.EmitBlock(ToBB); 10173 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10174 MemberMapType, 10175 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10176 MapperCGF.Builder.CreateBr(EndBB); 10177 MapperCGF.EmitBlock(ToElseBB); 10178 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10179 LeftToFrom, 10180 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10181 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10182 // In case of from, clear OMP_MAP_TO. 10183 MapperCGF.EmitBlock(FromBB); 10184 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10185 MemberMapType, 10186 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10187 // In case of tofrom, do nothing. 10188 MapperCGF.EmitBlock(EndBB); 10189 LastBB = EndBB; 10190 llvm::PHINode *CurMapType = 10191 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10192 CurMapType->addIncoming(AllocMapType, AllocBB); 10193 CurMapType->addIncoming(ToMapType, ToBB); 10194 CurMapType->addIncoming(FromMapType, FromBB); 10195 CurMapType->addIncoming(MemberMapType, ToElseBB); 10196 10197 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10198 CurSizeArg, CurMapType, CurNameArg}; 10199 if (Info.Mappers[I]) { 10200 // Call the corresponding mapper function. 10201 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10202 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10203 assert(MapperFunc && "Expect a valid mapper function is available."); 10204 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10205 } else { 10206 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10207 // data structure. 10208 MapperCGF.EmitRuntimeCall( 10209 OMPBuilder.getOrCreateRuntimeFunction( 10210 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10211 OffloadingArgs); 10212 } 10213 } 10214 10215 // Update the pointer to point to the next element that needs to be mapped, 10216 // and check whether we have mapped all elements. 10217 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10218 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10219 PtrPHI->addIncoming(PtrNext, LastBB); 10220 llvm::Value *IsDone = 10221 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10222 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10223 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10224 10225 MapperCGF.EmitBlock(ExitBB); 10226 // Emit array deletion if this is an array section and \p MapType indicates 10227 // that deletion is required. 10228 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10229 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10230 10231 // Emit the function exit block. 10232 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10233 MapperCGF.FinishFunction(); 10234 UDMMap.try_emplace(D, Fn); 10235 if (CGF) { 10236 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10237 Decls.second.push_back(D); 10238 } 10239 } 10240 10241 /// Emit the array initialization or deletion portion for user-defined mapper 10242 /// code generation. First, it evaluates whether an array section is mapped and 10243 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10244 /// true, and \a MapType indicates to not delete this array, array 10245 /// initialization code is generated. If \a IsInit is false, and \a MapType 10246 /// indicates to not this array, array deletion code is generated. 10247 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10248 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10249 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10250 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10251 bool IsInit) { 10252 StringRef Prefix = IsInit ? ".init" : ".del"; 10253 10254 // Evaluate if this is an array section. 10255 llvm::BasicBlock *BodyBB = 10256 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10257 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10258 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10259 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10260 MapType, 10261 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10262 llvm::Value *DeleteCond; 10263 llvm::Value *Cond; 10264 if (IsInit) { 10265 // base != begin? 10266 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10267 // IsPtrAndObj? 10268 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10269 MapType, 10270 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10271 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10272 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10273 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10274 DeleteCond = MapperCGF.Builder.CreateIsNull( 10275 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10276 } else { 10277 Cond = IsArray; 10278 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10279 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10280 } 10281 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10282 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10283 10284 MapperCGF.EmitBlock(BodyBB); 10285 // Get the array size by multiplying element size and element number (i.e., \p 10286 // Size). 10287 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10288 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10289 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10290 // memory allocation/deletion purpose only. 10291 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10292 MapType, 10293 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10294 MappableExprsHandler::OMP_MAP_FROM))); 10295 MapTypeArg = MapperCGF.Builder.CreateOr( 10296 MapTypeArg, 10297 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10298 10299 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10300 // data structure. 10301 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10302 ArraySize, MapTypeArg, MapName}; 10303 MapperCGF.EmitRuntimeCall( 10304 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10305 OMPRTL___tgt_push_mapper_component), 10306 OffloadingArgs); 10307 } 10308 10309 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10310 const OMPDeclareMapperDecl *D) { 10311 auto I = UDMMap.find(D); 10312 if (I != UDMMap.end()) 10313 return I->second; 10314 emitUserDefinedMapper(D); 10315 return UDMMap.lookup(D); 10316 } 10317 10318 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10319 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10320 llvm::Value *DeviceID, 10321 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10322 const OMPLoopDirective &D)> 10323 SizeEmitter) { 10324 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10325 const OMPExecutableDirective *TD = &D; 10326 // Get nested teams distribute kind directive, if any. 10327 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10328 TD = getNestedDistributeDirective(CGM.getContext(), D); 10329 if (!TD) 10330 return; 10331 const auto *LD = cast<OMPLoopDirective>(TD); 10332 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10333 PrePostActionTy &) { 10334 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10335 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10336 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10337 CGF.EmitRuntimeCall( 10338 OMPBuilder.getOrCreateRuntimeFunction( 10339 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10340 Args); 10341 } 10342 }; 10343 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10344 } 10345 10346 void CGOpenMPRuntime::emitTargetCall( 10347 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10348 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10349 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10350 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10351 const OMPLoopDirective &D)> 10352 SizeEmitter) { 10353 if (!CGF.HaveInsertPoint()) 10354 return; 10355 10356 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10357 CGM.getLangOpts().OpenMPOffloadMandatory; 10358 10359 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10360 10361 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10362 D.hasClausesOfKind<OMPNowaitClause>(); 10363 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10364 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10365 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10366 PrePostActionTy &) { 10367 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10368 }; 10369 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10370 10371 CodeGenFunction::OMPTargetDataInfo InputInfo; 10372 llvm::Value *MapTypesArray = nullptr; 10373 llvm::Value *MapNamesArray = nullptr; 10374 // Generate code for the host fallback function. 10375 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10376 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10377 if (OffloadingMandatory) { 10378 CGF.Builder.CreateUnreachable(); 10379 } else { 10380 if (RequiresOuterTask) { 10381 CapturedVars.clear(); 10382 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10383 } 10384 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10385 } 10386 }; 10387 // Fill up the pointer arrays and transfer execution to the device. 10388 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10389 &MapNamesArray, SizeEmitter, 10390 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10391 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10392 // Reverse offloading is not supported, so just execute on the host. 10393 FallbackGen(CGF); 10394 return; 10395 } 10396 10397 // On top of the arrays that were filled up, the target offloading call 10398 // takes as arguments the device id as well as the host pointer. The host 10399 // pointer is used by the runtime library to identify the current target 10400 // region, so it only has to be unique and not necessarily point to 10401 // anything. It could be the pointer to the outlined function that 10402 // implements the target region, but we aren't using that so that the 10403 // compiler doesn't need to keep that, and could therefore inline the host 10404 // function if proven worthwhile during optimization. 10405 10406 // From this point on, we need to have an ID of the target region defined. 10407 assert(OutlinedFnID && "Invalid outlined function ID!"); 10408 (void)OutlinedFnID; 10409 10410 // Emit device ID if any. 10411 llvm::Value *DeviceID; 10412 if (Device.getPointer()) { 10413 assert((Device.getInt() == OMPC_DEVICE_unknown || 10414 Device.getInt() == OMPC_DEVICE_device_num) && 10415 "Expected device_num modifier."); 10416 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10417 DeviceID = 10418 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10419 } else { 10420 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10421 } 10422 10423 // Emit the number of elements in the offloading arrays. 10424 llvm::Value *PointerNum = 10425 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10426 10427 // Return value of the runtime offloading call. 10428 llvm::Value *Return; 10429 10430 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10431 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10432 10433 // Source location for the ident struct 10434 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10435 10436 // Emit tripcount for the target loop-based directive. 10437 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10438 10439 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10440 // The target region is an outlined function launched by the runtime 10441 // via calls __tgt_target() or __tgt_target_teams(). 10442 // 10443 // __tgt_target() launches a target region with one team and one thread, 10444 // executing a serial region. This master thread may in turn launch 10445 // more threads within its team upon encountering a parallel region, 10446 // however, no additional teams can be launched on the device. 10447 // 10448 // __tgt_target_teams() launches a target region with one or more teams, 10449 // each with one or more threads. This call is required for target 10450 // constructs such as: 10451 // 'target teams' 10452 // 'target' / 'teams' 10453 // 'target teams distribute parallel for' 10454 // 'target parallel' 10455 // and so on. 10456 // 10457 // Note that on the host and CPU targets, the runtime implementation of 10458 // these calls simply call the outlined function without forking threads. 10459 // The outlined functions themselves have runtime calls to 10460 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10461 // the compiler in emitTeamsCall() and emitParallelCall(). 10462 // 10463 // In contrast, on the NVPTX target, the implementation of 10464 // __tgt_target_teams() launches a GPU kernel with the requested number 10465 // of teams and threads so no additional calls to the runtime are required. 10466 if (NumTeams) { 10467 // If we have NumTeams defined this means that we have an enclosed teams 10468 // region. Therefore we also expect to have NumThreads defined. These two 10469 // values should be defined in the presence of a teams directive, 10470 // regardless of having any clauses associated. If the user is using teams 10471 // but no clauses, these two values will be the default that should be 10472 // passed to the runtime library - a 32-bit integer with the value zero. 10473 assert(NumThreads && "Thread limit expression should be available along " 10474 "with number of teams."); 10475 SmallVector<llvm::Value *> OffloadingArgs = { 10476 RTLoc, 10477 DeviceID, 10478 OutlinedFnID, 10479 PointerNum, 10480 InputInfo.BasePointersArray.getPointer(), 10481 InputInfo.PointersArray.getPointer(), 10482 InputInfo.SizesArray.getPointer(), 10483 MapTypesArray, 10484 MapNamesArray, 10485 InputInfo.MappersArray.getPointer(), 10486 NumTeams, 10487 NumThreads}; 10488 if (HasNowait) { 10489 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10490 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10491 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10492 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10493 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10494 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10495 } 10496 Return = CGF.EmitRuntimeCall( 10497 OMPBuilder.getOrCreateRuntimeFunction( 10498 CGM.getModule(), HasNowait 10499 ? OMPRTL___tgt_target_teams_nowait_mapper 10500 : OMPRTL___tgt_target_teams_mapper), 10501 OffloadingArgs); 10502 } else { 10503 SmallVector<llvm::Value *> OffloadingArgs = { 10504 RTLoc, 10505 DeviceID, 10506 OutlinedFnID, 10507 PointerNum, 10508 InputInfo.BasePointersArray.getPointer(), 10509 InputInfo.PointersArray.getPointer(), 10510 InputInfo.SizesArray.getPointer(), 10511 MapTypesArray, 10512 MapNamesArray, 10513 InputInfo.MappersArray.getPointer()}; 10514 if (HasNowait) { 10515 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10516 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10517 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10518 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10519 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10520 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10521 } 10522 Return = CGF.EmitRuntimeCall( 10523 OMPBuilder.getOrCreateRuntimeFunction( 10524 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10525 : OMPRTL___tgt_target_mapper), 10526 OffloadingArgs); 10527 } 10528 10529 // Check the error code and execute the host version if required. 10530 llvm::BasicBlock *OffloadFailedBlock = 10531 CGF.createBasicBlock("omp_offload.failed"); 10532 llvm::BasicBlock *OffloadContBlock = 10533 CGF.createBasicBlock("omp_offload.cont"); 10534 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10535 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10536 10537 CGF.EmitBlock(OffloadFailedBlock); 10538 FallbackGen(CGF); 10539 10540 CGF.EmitBranch(OffloadContBlock); 10541 10542 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10543 }; 10544 10545 // Notify that the host version must be executed. 10546 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10547 FallbackGen(CGF); 10548 }; 10549 10550 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10551 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10552 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10553 // Fill up the arrays with all the captured variables. 10554 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10555 10556 // Get mappable expression information. 10557 MappableExprsHandler MEHandler(D, CGF); 10558 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10559 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10560 10561 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10562 auto *CV = CapturedVars.begin(); 10563 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10564 CE = CS.capture_end(); 10565 CI != CE; ++CI, ++RI, ++CV) { 10566 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10567 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10568 10569 // VLA sizes are passed to the outlined region by copy and do not have map 10570 // information associated. 10571 if (CI->capturesVariableArrayType()) { 10572 CurInfo.Exprs.push_back(nullptr); 10573 CurInfo.BasePointers.push_back(*CV); 10574 CurInfo.Pointers.push_back(*CV); 10575 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10576 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10577 // Copy to the device as an argument. No need to retrieve it. 10578 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10579 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10580 MappableExprsHandler::OMP_MAP_IMPLICIT); 10581 CurInfo.Mappers.push_back(nullptr); 10582 } else { 10583 // If we have any information in the map clause, we use it, otherwise we 10584 // just do a default mapping. 10585 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10586 if (!CI->capturesThis()) 10587 MappedVarSet.insert(CI->getCapturedVar()); 10588 else 10589 MappedVarSet.insert(nullptr); 10590 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10591 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10592 // Generate correct mapping for variables captured by reference in 10593 // lambdas. 10594 if (CI->capturesVariable()) 10595 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10596 CurInfo, LambdaPointers); 10597 } 10598 // We expect to have at least an element of information for this capture. 10599 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10600 "Non-existing map pointer for capture!"); 10601 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10602 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10603 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10604 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10605 "Inconsistent map information sizes!"); 10606 10607 // If there is an entry in PartialStruct it means we have a struct with 10608 // individual members mapped. Emit an extra combined entry. 10609 if (PartialStruct.Base.isValid()) { 10610 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10611 MEHandler.emitCombinedEntry( 10612 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10613 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10614 } 10615 10616 // We need to append the results of this capture to what we already have. 10617 CombinedInfo.append(CurInfo); 10618 } 10619 // Adjust MEMBER_OF flags for the lambdas captures. 10620 MEHandler.adjustMemberOfForLambdaCaptures( 10621 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10622 CombinedInfo.Types); 10623 // Map any list items in a map clause that were not captures because they 10624 // weren't referenced within the construct. 10625 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10626 10627 TargetDataInfo Info; 10628 // Fill up the arrays and create the arguments. 10629 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10630 emitOffloadingArraysArgument( 10631 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10632 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10633 {/*ForEndCall=*/false}); 10634 10635 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10636 InputInfo.BasePointersArray = 10637 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 10638 InputInfo.PointersArray = 10639 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 10640 InputInfo.SizesArray = 10641 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 10642 InputInfo.MappersArray = 10643 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 10644 MapTypesArray = Info.MapTypesArray; 10645 MapNamesArray = Info.MapNamesArray; 10646 if (RequiresOuterTask) 10647 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10648 else 10649 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10650 }; 10651 10652 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10653 CodeGenFunction &CGF, PrePostActionTy &) { 10654 if (RequiresOuterTask) { 10655 CodeGenFunction::OMPTargetDataInfo InputInfo; 10656 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10657 } else { 10658 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10659 } 10660 }; 10661 10662 // If we have a target function ID it means that we need to support 10663 // offloading, otherwise, just execute on the host. We need to execute on host 10664 // regardless of the conditional in the if clause if, e.g., the user do not 10665 // specify target triples. 10666 if (OutlinedFnID) { 10667 if (IfCond) { 10668 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10669 } else { 10670 RegionCodeGenTy ThenRCG(TargetThenGen); 10671 ThenRCG(CGF); 10672 } 10673 } else { 10674 RegionCodeGenTy ElseRCG(TargetElseGen); 10675 ElseRCG(CGF); 10676 } 10677 } 10678 10679 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10680 StringRef ParentName) { 10681 if (!S) 10682 return; 10683 10684 // Codegen OMP target directives that offload compute to the device. 10685 bool RequiresDeviceCodegen = 10686 isa<OMPExecutableDirective>(S) && 10687 isOpenMPTargetExecutionDirective( 10688 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10689 10690 if (RequiresDeviceCodegen) { 10691 const auto &E = *cast<OMPExecutableDirective>(S); 10692 unsigned DeviceID; 10693 unsigned FileID; 10694 unsigned Line; 10695 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10696 FileID, Line); 10697 10698 // Is this a target region that should not be emitted as an entry point? If 10699 // so just signal we are done with this target region. 10700 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10701 ParentName, Line)) 10702 return; 10703 10704 switch (E.getDirectiveKind()) { 10705 case OMPD_target: 10706 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10707 cast<OMPTargetDirective>(E)); 10708 break; 10709 case OMPD_target_parallel: 10710 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10711 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10712 break; 10713 case OMPD_target_teams: 10714 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10715 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10716 break; 10717 case OMPD_target_teams_distribute: 10718 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10719 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10720 break; 10721 case OMPD_target_teams_distribute_simd: 10722 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10723 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10724 break; 10725 case OMPD_target_parallel_for: 10726 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10727 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10728 break; 10729 case OMPD_target_parallel_for_simd: 10730 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10731 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10732 break; 10733 case OMPD_target_simd: 10734 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10735 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10736 break; 10737 case OMPD_target_teams_distribute_parallel_for: 10738 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10739 CGM, ParentName, 10740 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10741 break; 10742 case OMPD_target_teams_distribute_parallel_for_simd: 10743 CodeGenFunction:: 10744 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10745 CGM, ParentName, 10746 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10747 break; 10748 case OMPD_parallel: 10749 case OMPD_for: 10750 case OMPD_parallel_for: 10751 case OMPD_parallel_master: 10752 case OMPD_parallel_sections: 10753 case OMPD_for_simd: 10754 case OMPD_parallel_for_simd: 10755 case OMPD_cancel: 10756 case OMPD_cancellation_point: 10757 case OMPD_ordered: 10758 case OMPD_threadprivate: 10759 case OMPD_allocate: 10760 case OMPD_task: 10761 case OMPD_simd: 10762 case OMPD_tile: 10763 case OMPD_unroll: 10764 case OMPD_sections: 10765 case OMPD_section: 10766 case OMPD_single: 10767 case OMPD_master: 10768 case OMPD_critical: 10769 case OMPD_taskyield: 10770 case OMPD_barrier: 10771 case OMPD_taskwait: 10772 case OMPD_taskgroup: 10773 case OMPD_atomic: 10774 case OMPD_flush: 10775 case OMPD_depobj: 10776 case OMPD_scan: 10777 case OMPD_teams: 10778 case OMPD_target_data: 10779 case OMPD_target_exit_data: 10780 case OMPD_target_enter_data: 10781 case OMPD_distribute: 10782 case OMPD_distribute_simd: 10783 case OMPD_distribute_parallel_for: 10784 case OMPD_distribute_parallel_for_simd: 10785 case OMPD_teams_distribute: 10786 case OMPD_teams_distribute_simd: 10787 case OMPD_teams_distribute_parallel_for: 10788 case OMPD_teams_distribute_parallel_for_simd: 10789 case OMPD_target_update: 10790 case OMPD_declare_simd: 10791 case OMPD_declare_variant: 10792 case OMPD_begin_declare_variant: 10793 case OMPD_end_declare_variant: 10794 case OMPD_declare_target: 10795 case OMPD_end_declare_target: 10796 case OMPD_declare_reduction: 10797 case OMPD_declare_mapper: 10798 case OMPD_taskloop: 10799 case OMPD_taskloop_simd: 10800 case OMPD_master_taskloop: 10801 case OMPD_master_taskloop_simd: 10802 case OMPD_parallel_master_taskloop: 10803 case OMPD_parallel_master_taskloop_simd: 10804 case OMPD_requires: 10805 case OMPD_metadirective: 10806 case OMPD_unknown: 10807 default: 10808 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10809 } 10810 return; 10811 } 10812 10813 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10814 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10815 return; 10816 10817 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10818 return; 10819 } 10820 10821 // If this is a lambda function, look into its body. 10822 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10823 S = L->getBody(); 10824 10825 // Keep looking for target regions recursively. 10826 for (const Stmt *II : S->children()) 10827 scanForTargetRegionsFunctions(II, ParentName); 10828 } 10829 10830 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10831 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10832 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10833 if (!DevTy) 10834 return false; 10835 // Do not emit device_type(nohost) functions for the host. 10836 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10837 return true; 10838 // Do not emit device_type(host) functions for the device. 10839 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10840 return true; 10841 return false; 10842 } 10843 10844 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10845 // If emitting code for the host, we do not process FD here. Instead we do 10846 // the normal code generation. 10847 if (!CGM.getLangOpts().OpenMPIsDevice) { 10848 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10849 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10850 CGM.getLangOpts().OpenMPIsDevice)) 10851 return true; 10852 return false; 10853 } 10854 10855 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10856 // Try to detect target regions in the function. 10857 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10858 StringRef Name = CGM.getMangledName(GD); 10859 scanForTargetRegionsFunctions(FD->getBody(), Name); 10860 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10861 CGM.getLangOpts().OpenMPIsDevice)) 10862 return true; 10863 } 10864 10865 // Do not to emit function if it is not marked as declare target. 10866 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10867 AlreadyEmittedTargetDecls.count(VD) == 0; 10868 } 10869 10870 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10871 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10872 CGM.getLangOpts().OpenMPIsDevice)) 10873 return true; 10874 10875 if (!CGM.getLangOpts().OpenMPIsDevice) 10876 return false; 10877 10878 // Check if there are Ctors/Dtors in this declaration and look for target 10879 // regions in it. We use the complete variant to produce the kernel name 10880 // mangling. 10881 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10882 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10883 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10884 StringRef ParentName = 10885 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10886 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10887 } 10888 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10889 StringRef ParentName = 10890 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10891 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10892 } 10893 } 10894 10895 // Do not to emit variable if it is not marked as declare target. 10896 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10897 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10898 cast<VarDecl>(GD.getDecl())); 10899 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10900 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10901 HasRequiresUnifiedSharedMemory)) { 10902 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10903 return true; 10904 } 10905 return false; 10906 } 10907 10908 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10909 llvm::Constant *Addr) { 10910 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10911 !CGM.getLangOpts().OpenMPIsDevice) 10912 return; 10913 10914 // If we have host/nohost variables, they do not need to be registered. 10915 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10916 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10917 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10918 return; 10919 10920 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10921 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10922 if (!Res) { 10923 if (CGM.getLangOpts().OpenMPIsDevice) { 10924 // Register non-target variables being emitted in device code (debug info 10925 // may cause this). 10926 StringRef VarName = CGM.getMangledName(VD); 10927 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10928 } 10929 return; 10930 } 10931 // Register declare target variables. 10932 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10933 StringRef VarName; 10934 CharUnits VarSize; 10935 llvm::GlobalValue::LinkageTypes Linkage; 10936 10937 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10938 !HasRequiresUnifiedSharedMemory) { 10939 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10940 VarName = CGM.getMangledName(VD); 10941 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10942 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10943 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10944 } else { 10945 VarSize = CharUnits::Zero(); 10946 } 10947 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10948 // Temp solution to prevent optimizations of the internal variables. 10949 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10950 // Do not create a "ref-variable" if the original is not also available 10951 // on the host. 10952 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10953 return; 10954 std::string RefName = getName({VarName, "ref"}); 10955 if (!CGM.GetGlobalValue(RefName)) { 10956 llvm::Constant *AddrRef = 10957 getOrCreateInternalVariable(Addr->getType(), RefName); 10958 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10959 GVAddrRef->setConstant(/*Val=*/true); 10960 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10961 GVAddrRef->setInitializer(Addr); 10962 CGM.addCompilerUsedGlobal(GVAddrRef); 10963 } 10964 } 10965 } else { 10966 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10967 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10968 HasRequiresUnifiedSharedMemory)) && 10969 "Declare target attribute must link or to with unified memory."); 10970 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10971 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10972 else 10973 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10974 10975 if (CGM.getLangOpts().OpenMPIsDevice) { 10976 VarName = Addr->getName(); 10977 Addr = nullptr; 10978 } else { 10979 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10980 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10981 } 10982 VarSize = CGM.getPointerSize(); 10983 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10984 } 10985 10986 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10987 VarName, Addr, VarSize, Flags, Linkage); 10988 } 10989 10990 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10991 if (isa<FunctionDecl>(GD.getDecl()) || 10992 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10993 return emitTargetFunctions(GD); 10994 10995 return emitTargetGlobalVariable(GD); 10996 } 10997 10998 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10999 for (const VarDecl *VD : DeferredGlobalVariables) { 11000 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 11001 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 11002 if (!Res) 11003 continue; 11004 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 11005 !HasRequiresUnifiedSharedMemory) { 11006 CGM.EmitGlobal(VD); 11007 } else { 11008 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 11009 (*Res == OMPDeclareTargetDeclAttr::MT_To && 11010 HasRequiresUnifiedSharedMemory)) && 11011 "Expected link clause or to clause with unified memory."); 11012 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 11013 } 11014 } 11015 } 11016 11017 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 11018 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 11019 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 11020 " Expected target-based directive."); 11021 } 11022 11023 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 11024 for (const OMPClause *Clause : D->clauselists()) { 11025 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 11026 HasRequiresUnifiedSharedMemory = true; 11027 } else if (const auto *AC = 11028 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 11029 switch (AC->getAtomicDefaultMemOrderKind()) { 11030 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 11031 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11032 break; 11033 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11034 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11035 break; 11036 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11037 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11038 break; 11039 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11040 break; 11041 } 11042 } 11043 } 11044 } 11045 11046 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11047 return RequiresAtomicOrdering; 11048 } 11049 11050 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11051 LangAS &AS) { 11052 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11053 return false; 11054 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11055 switch(A->getAllocatorType()) { 11056 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11057 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11058 // Not supported, fallback to the default mem space. 11059 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11060 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11061 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11062 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11063 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11064 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11065 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11066 AS = LangAS::Default; 11067 return true; 11068 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11069 llvm_unreachable("Expected predefined allocator for the variables with the " 11070 "static storage."); 11071 } 11072 return false; 11073 } 11074 11075 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11076 return HasRequiresUnifiedSharedMemory; 11077 } 11078 11079 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11080 CodeGenModule &CGM) 11081 : CGM(CGM) { 11082 if (CGM.getLangOpts().OpenMPIsDevice) { 11083 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11084 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11085 } 11086 } 11087 11088 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11089 if (CGM.getLangOpts().OpenMPIsDevice) 11090 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11091 } 11092 11093 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11094 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11095 return true; 11096 11097 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11098 // Do not to emit function if it is marked as declare target as it was already 11099 // emitted. 11100 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11101 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11102 if (auto *F = dyn_cast_or_null<llvm::Function>( 11103 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11104 return !F->isDeclaration(); 11105 return false; 11106 } 11107 return true; 11108 } 11109 11110 return !AlreadyEmittedTargetDecls.insert(D).second; 11111 } 11112 11113 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11114 // If we don't have entries or if we are emitting code for the device, we 11115 // don't need to do anything. 11116 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11117 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11118 (OffloadEntriesInfoManager.empty() && 11119 !HasEmittedDeclareTargetRegion && 11120 !HasEmittedTargetRegion)) 11121 return nullptr; 11122 11123 // Create and register the function that handles the requires directives. 11124 ASTContext &C = CGM.getContext(); 11125 11126 llvm::Function *RequiresRegFn; 11127 { 11128 CodeGenFunction CGF(CGM); 11129 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11130 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11131 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11132 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11133 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11134 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11135 // TODO: check for other requires clauses. 11136 // The requires directive takes effect only when a target region is 11137 // present in the compilation unit. Otherwise it is ignored and not 11138 // passed to the runtime. This avoids the runtime from throwing an error 11139 // for mismatching requires clauses across compilation units that don't 11140 // contain at least 1 target region. 11141 assert((HasEmittedTargetRegion || 11142 HasEmittedDeclareTargetRegion || 11143 !OffloadEntriesInfoManager.empty()) && 11144 "Target or declare target region expected."); 11145 if (HasRequiresUnifiedSharedMemory) 11146 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11147 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11148 CGM.getModule(), OMPRTL___tgt_register_requires), 11149 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11150 CGF.FinishFunction(); 11151 } 11152 return RequiresRegFn; 11153 } 11154 11155 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11156 const OMPExecutableDirective &D, 11157 SourceLocation Loc, 11158 llvm::Function *OutlinedFn, 11159 ArrayRef<llvm::Value *> CapturedVars) { 11160 if (!CGF.HaveInsertPoint()) 11161 return; 11162 11163 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11164 CodeGenFunction::RunCleanupsScope Scope(CGF); 11165 11166 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11167 llvm::Value *Args[] = { 11168 RTLoc, 11169 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11170 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11171 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11172 RealArgs.append(std::begin(Args), std::end(Args)); 11173 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11174 11175 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11176 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11177 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11178 } 11179 11180 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11181 const Expr *NumTeams, 11182 const Expr *ThreadLimit, 11183 SourceLocation Loc) { 11184 if (!CGF.HaveInsertPoint()) 11185 return; 11186 11187 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11188 11189 llvm::Value *NumTeamsVal = 11190 NumTeams 11191 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11192 CGF.CGM.Int32Ty, /* isSigned = */ true) 11193 : CGF.Builder.getInt32(0); 11194 11195 llvm::Value *ThreadLimitVal = 11196 ThreadLimit 11197 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11198 CGF.CGM.Int32Ty, /* isSigned = */ true) 11199 : CGF.Builder.getInt32(0); 11200 11201 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11202 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11203 ThreadLimitVal}; 11204 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11205 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11206 PushNumTeamsArgs); 11207 } 11208 11209 void CGOpenMPRuntime::emitTargetDataCalls( 11210 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11211 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11212 if (!CGF.HaveInsertPoint()) 11213 return; 11214 11215 // Action used to replace the default codegen action and turn privatization 11216 // off. 11217 PrePostActionTy NoPrivAction; 11218 11219 // Generate the code for the opening of the data environment. Capture all the 11220 // arguments of the runtime call by reference because they are used in the 11221 // closing of the region. 11222 auto &&BeginThenGen = [this, &D, Device, &Info, 11223 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11224 // Fill up the arrays with all the mapped variables. 11225 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11226 11227 // Get map clause information. 11228 MappableExprsHandler MEHandler(D, CGF); 11229 MEHandler.generateAllInfo(CombinedInfo); 11230 11231 // Fill up the arrays and create the arguments. 11232 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11233 /*IsNonContiguous=*/true); 11234 11235 llvm::Value *BasePointersArrayArg = nullptr; 11236 llvm::Value *PointersArrayArg = nullptr; 11237 llvm::Value *SizesArrayArg = nullptr; 11238 llvm::Value *MapTypesArrayArg = nullptr; 11239 llvm::Value *MapNamesArrayArg = nullptr; 11240 llvm::Value *MappersArrayArg = nullptr; 11241 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11242 SizesArrayArg, MapTypesArrayArg, 11243 MapNamesArrayArg, MappersArrayArg, Info); 11244 11245 // Emit device ID if any. 11246 llvm::Value *DeviceID = nullptr; 11247 if (Device) { 11248 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11249 CGF.Int64Ty, /*isSigned=*/true); 11250 } else { 11251 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11252 } 11253 11254 // Emit the number of elements in the offloading arrays. 11255 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11256 // 11257 // Source location for the ident struct 11258 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11259 11260 llvm::Value *OffloadingArgs[] = {RTLoc, 11261 DeviceID, 11262 PointerNum, 11263 BasePointersArrayArg, 11264 PointersArrayArg, 11265 SizesArrayArg, 11266 MapTypesArrayArg, 11267 MapNamesArrayArg, 11268 MappersArrayArg}; 11269 CGF.EmitRuntimeCall( 11270 OMPBuilder.getOrCreateRuntimeFunction( 11271 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11272 OffloadingArgs); 11273 11274 // If device pointer privatization is required, emit the body of the region 11275 // here. It will have to be duplicated: with and without privatization. 11276 if (!Info.CaptureDeviceAddrMap.empty()) 11277 CodeGen(CGF); 11278 }; 11279 11280 // Generate code for the closing of the data region. 11281 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11282 PrePostActionTy &) { 11283 assert(Info.isValid() && "Invalid data environment closing arguments."); 11284 11285 llvm::Value *BasePointersArrayArg = nullptr; 11286 llvm::Value *PointersArrayArg = nullptr; 11287 llvm::Value *SizesArrayArg = nullptr; 11288 llvm::Value *MapTypesArrayArg = nullptr; 11289 llvm::Value *MapNamesArrayArg = nullptr; 11290 llvm::Value *MappersArrayArg = nullptr; 11291 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11292 SizesArrayArg, MapTypesArrayArg, 11293 MapNamesArrayArg, MappersArrayArg, Info, 11294 {/*ForEndCall=*/true}); 11295 11296 // Emit device ID if any. 11297 llvm::Value *DeviceID = nullptr; 11298 if (Device) { 11299 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11300 CGF.Int64Ty, /*isSigned=*/true); 11301 } else { 11302 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11303 } 11304 11305 // Emit the number of elements in the offloading arrays. 11306 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11307 11308 // Source location for the ident struct 11309 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11310 11311 llvm::Value *OffloadingArgs[] = {RTLoc, 11312 DeviceID, 11313 PointerNum, 11314 BasePointersArrayArg, 11315 PointersArrayArg, 11316 SizesArrayArg, 11317 MapTypesArrayArg, 11318 MapNamesArrayArg, 11319 MappersArrayArg}; 11320 CGF.EmitRuntimeCall( 11321 OMPBuilder.getOrCreateRuntimeFunction( 11322 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11323 OffloadingArgs); 11324 }; 11325 11326 // If we need device pointer privatization, we need to emit the body of the 11327 // region with no privatization in the 'else' branch of the conditional. 11328 // Otherwise, we don't have to do anything. 11329 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11330 PrePostActionTy &) { 11331 if (!Info.CaptureDeviceAddrMap.empty()) { 11332 CodeGen.setAction(NoPrivAction); 11333 CodeGen(CGF); 11334 } 11335 }; 11336 11337 // We don't have to do anything to close the region if the if clause evaluates 11338 // to false. 11339 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11340 11341 if (IfCond) { 11342 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11343 } else { 11344 RegionCodeGenTy RCG(BeginThenGen); 11345 RCG(CGF); 11346 } 11347 11348 // If we don't require privatization of device pointers, we emit the body in 11349 // between the runtime calls. This avoids duplicating the body code. 11350 if (Info.CaptureDeviceAddrMap.empty()) { 11351 CodeGen.setAction(NoPrivAction); 11352 CodeGen(CGF); 11353 } 11354 11355 if (IfCond) { 11356 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11357 } else { 11358 RegionCodeGenTy RCG(EndThenGen); 11359 RCG(CGF); 11360 } 11361 } 11362 11363 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11364 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11365 const Expr *Device) { 11366 if (!CGF.HaveInsertPoint()) 11367 return; 11368 11369 assert((isa<OMPTargetEnterDataDirective>(D) || 11370 isa<OMPTargetExitDataDirective>(D) || 11371 isa<OMPTargetUpdateDirective>(D)) && 11372 "Expecting either target enter, exit data, or update directives."); 11373 11374 CodeGenFunction::OMPTargetDataInfo InputInfo; 11375 llvm::Value *MapTypesArray = nullptr; 11376 llvm::Value *MapNamesArray = nullptr; 11377 // Generate the code for the opening of the data environment. 11378 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11379 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11380 // Emit device ID if any. 11381 llvm::Value *DeviceID = nullptr; 11382 if (Device) { 11383 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11384 CGF.Int64Ty, /*isSigned=*/true); 11385 } else { 11386 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11387 } 11388 11389 // Emit the number of elements in the offloading arrays. 11390 llvm::Constant *PointerNum = 11391 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11392 11393 // Source location for the ident struct 11394 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11395 11396 llvm::Value *OffloadingArgs[] = {RTLoc, 11397 DeviceID, 11398 PointerNum, 11399 InputInfo.BasePointersArray.getPointer(), 11400 InputInfo.PointersArray.getPointer(), 11401 InputInfo.SizesArray.getPointer(), 11402 MapTypesArray, 11403 MapNamesArray, 11404 InputInfo.MappersArray.getPointer()}; 11405 11406 // Select the right runtime function call for each standalone 11407 // directive. 11408 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11409 RuntimeFunction RTLFn; 11410 switch (D.getDirectiveKind()) { 11411 case OMPD_target_enter_data: 11412 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11413 : OMPRTL___tgt_target_data_begin_mapper; 11414 break; 11415 case OMPD_target_exit_data: 11416 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11417 : OMPRTL___tgt_target_data_end_mapper; 11418 break; 11419 case OMPD_target_update: 11420 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11421 : OMPRTL___tgt_target_data_update_mapper; 11422 break; 11423 case OMPD_parallel: 11424 case OMPD_for: 11425 case OMPD_parallel_for: 11426 case OMPD_parallel_master: 11427 case OMPD_parallel_sections: 11428 case OMPD_for_simd: 11429 case OMPD_parallel_for_simd: 11430 case OMPD_cancel: 11431 case OMPD_cancellation_point: 11432 case OMPD_ordered: 11433 case OMPD_threadprivate: 11434 case OMPD_allocate: 11435 case OMPD_task: 11436 case OMPD_simd: 11437 case OMPD_tile: 11438 case OMPD_unroll: 11439 case OMPD_sections: 11440 case OMPD_section: 11441 case OMPD_single: 11442 case OMPD_master: 11443 case OMPD_critical: 11444 case OMPD_taskyield: 11445 case OMPD_barrier: 11446 case OMPD_taskwait: 11447 case OMPD_taskgroup: 11448 case OMPD_atomic: 11449 case OMPD_flush: 11450 case OMPD_depobj: 11451 case OMPD_scan: 11452 case OMPD_teams: 11453 case OMPD_target_data: 11454 case OMPD_distribute: 11455 case OMPD_distribute_simd: 11456 case OMPD_distribute_parallel_for: 11457 case OMPD_distribute_parallel_for_simd: 11458 case OMPD_teams_distribute: 11459 case OMPD_teams_distribute_simd: 11460 case OMPD_teams_distribute_parallel_for: 11461 case OMPD_teams_distribute_parallel_for_simd: 11462 case OMPD_declare_simd: 11463 case OMPD_declare_variant: 11464 case OMPD_begin_declare_variant: 11465 case OMPD_end_declare_variant: 11466 case OMPD_declare_target: 11467 case OMPD_end_declare_target: 11468 case OMPD_declare_reduction: 11469 case OMPD_declare_mapper: 11470 case OMPD_taskloop: 11471 case OMPD_taskloop_simd: 11472 case OMPD_master_taskloop: 11473 case OMPD_master_taskloop_simd: 11474 case OMPD_parallel_master_taskloop: 11475 case OMPD_parallel_master_taskloop_simd: 11476 case OMPD_target: 11477 case OMPD_target_simd: 11478 case OMPD_target_teams_distribute: 11479 case OMPD_target_teams_distribute_simd: 11480 case OMPD_target_teams_distribute_parallel_for: 11481 case OMPD_target_teams_distribute_parallel_for_simd: 11482 case OMPD_target_teams: 11483 case OMPD_target_parallel: 11484 case OMPD_target_parallel_for: 11485 case OMPD_target_parallel_for_simd: 11486 case OMPD_requires: 11487 case OMPD_metadirective: 11488 case OMPD_unknown: 11489 default: 11490 llvm_unreachable("Unexpected standalone target data directive."); 11491 break; 11492 } 11493 CGF.EmitRuntimeCall( 11494 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11495 OffloadingArgs); 11496 }; 11497 11498 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11499 &MapNamesArray](CodeGenFunction &CGF, 11500 PrePostActionTy &) { 11501 // Fill up the arrays with all the mapped variables. 11502 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11503 11504 // Get map clause information. 11505 MappableExprsHandler MEHandler(D, CGF); 11506 MEHandler.generateAllInfo(CombinedInfo); 11507 11508 TargetDataInfo Info; 11509 // Fill up the arrays and create the arguments. 11510 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11511 /*IsNonContiguous=*/true); 11512 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11513 D.hasClausesOfKind<OMPNowaitClause>(); 11514 emitOffloadingArraysArgument( 11515 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11516 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11517 {/*ForEndCall=*/false}); 11518 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11519 InputInfo.BasePointersArray = 11520 Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign()); 11521 InputInfo.PointersArray = 11522 Address::deprecated(Info.PointersArray, CGM.getPointerAlign()); 11523 InputInfo.SizesArray = 11524 Address::deprecated(Info.SizesArray, CGM.getPointerAlign()); 11525 InputInfo.MappersArray = 11526 Address::deprecated(Info.MappersArray, CGM.getPointerAlign()); 11527 MapTypesArray = Info.MapTypesArray; 11528 MapNamesArray = Info.MapNamesArray; 11529 if (RequiresOuterTask) 11530 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11531 else 11532 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11533 }; 11534 11535 if (IfCond) { 11536 emitIfClause(CGF, IfCond, TargetThenGen, 11537 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11538 } else { 11539 RegionCodeGenTy ThenRCG(TargetThenGen); 11540 ThenRCG(CGF); 11541 } 11542 } 11543 11544 namespace { 11545 /// Kind of parameter in a function with 'declare simd' directive. 11546 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11547 /// Attribute set of the parameter. 11548 struct ParamAttrTy { 11549 ParamKindTy Kind = Vector; 11550 llvm::APSInt StrideOrArg; 11551 llvm::APSInt Alignment; 11552 }; 11553 } // namespace 11554 11555 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11556 ArrayRef<ParamAttrTy> ParamAttrs) { 11557 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11558 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11559 // of that clause. The VLEN value must be power of 2. 11560 // In other case the notion of the function`s "characteristic data type" (CDT) 11561 // is used to compute the vector length. 11562 // CDT is defined in the following order: 11563 // a) For non-void function, the CDT is the return type. 11564 // b) If the function has any non-uniform, non-linear parameters, then the 11565 // CDT is the type of the first such parameter. 11566 // c) If the CDT determined by a) or b) above is struct, union, or class 11567 // type which is pass-by-value (except for the type that maps to the 11568 // built-in complex data type), the characteristic data type is int. 11569 // d) If none of the above three cases is applicable, the CDT is int. 11570 // The VLEN is then determined based on the CDT and the size of vector 11571 // register of that ISA for which current vector version is generated. The 11572 // VLEN is computed using the formula below: 11573 // VLEN = sizeof(vector_register) / sizeof(CDT), 11574 // where vector register size specified in section 3.2.1 Registers and the 11575 // Stack Frame of original AMD64 ABI document. 11576 QualType RetType = FD->getReturnType(); 11577 if (RetType.isNull()) 11578 return 0; 11579 ASTContext &C = FD->getASTContext(); 11580 QualType CDT; 11581 if (!RetType.isNull() && !RetType->isVoidType()) { 11582 CDT = RetType; 11583 } else { 11584 unsigned Offset = 0; 11585 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11586 if (ParamAttrs[Offset].Kind == Vector) 11587 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11588 ++Offset; 11589 } 11590 if (CDT.isNull()) { 11591 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11592 if (ParamAttrs[I + Offset].Kind == Vector) { 11593 CDT = FD->getParamDecl(I)->getType(); 11594 break; 11595 } 11596 } 11597 } 11598 } 11599 if (CDT.isNull()) 11600 CDT = C.IntTy; 11601 CDT = CDT->getCanonicalTypeUnqualified(); 11602 if (CDT->isRecordType() || CDT->isUnionType()) 11603 CDT = C.IntTy; 11604 return C.getTypeSize(CDT); 11605 } 11606 11607 static void 11608 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11609 const llvm::APSInt &VLENVal, 11610 ArrayRef<ParamAttrTy> ParamAttrs, 11611 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11612 struct ISADataTy { 11613 char ISA; 11614 unsigned VecRegSize; 11615 }; 11616 ISADataTy ISAData[] = { 11617 { 11618 'b', 128 11619 }, // SSE 11620 { 11621 'c', 256 11622 }, // AVX 11623 { 11624 'd', 256 11625 }, // AVX2 11626 { 11627 'e', 512 11628 }, // AVX512 11629 }; 11630 llvm::SmallVector<char, 2> Masked; 11631 switch (State) { 11632 case OMPDeclareSimdDeclAttr::BS_Undefined: 11633 Masked.push_back('N'); 11634 Masked.push_back('M'); 11635 break; 11636 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11637 Masked.push_back('N'); 11638 break; 11639 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11640 Masked.push_back('M'); 11641 break; 11642 } 11643 for (char Mask : Masked) { 11644 for (const ISADataTy &Data : ISAData) { 11645 SmallString<256> Buffer; 11646 llvm::raw_svector_ostream Out(Buffer); 11647 Out << "_ZGV" << Data.ISA << Mask; 11648 if (!VLENVal) { 11649 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11650 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11651 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11652 } else { 11653 Out << VLENVal; 11654 } 11655 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11656 switch (ParamAttr.Kind){ 11657 case LinearWithVarStride: 11658 Out << 's' << ParamAttr.StrideOrArg; 11659 break; 11660 case Linear: 11661 Out << 'l'; 11662 if (ParamAttr.StrideOrArg != 1) 11663 Out << ParamAttr.StrideOrArg; 11664 break; 11665 case Uniform: 11666 Out << 'u'; 11667 break; 11668 case Vector: 11669 Out << 'v'; 11670 break; 11671 } 11672 if (!!ParamAttr.Alignment) 11673 Out << 'a' << ParamAttr.Alignment; 11674 } 11675 Out << '_' << Fn->getName(); 11676 Fn->addFnAttr(Out.str()); 11677 } 11678 } 11679 } 11680 11681 // This are the Functions that are needed to mangle the name of the 11682 // vector functions generated by the compiler, according to the rules 11683 // defined in the "Vector Function ABI specifications for AArch64", 11684 // available at 11685 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11686 11687 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11688 /// 11689 /// TODO: Need to implement the behavior for reference marked with a 11690 /// var or no linear modifiers (1.b in the section). For this, we 11691 /// need to extend ParamKindTy to support the linear modifiers. 11692 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11693 QT = QT.getCanonicalType(); 11694 11695 if (QT->isVoidType()) 11696 return false; 11697 11698 if (Kind == ParamKindTy::Uniform) 11699 return false; 11700 11701 if (Kind == ParamKindTy::Linear) 11702 return false; 11703 11704 // TODO: Handle linear references with modifiers 11705 11706 if (Kind == ParamKindTy::LinearWithVarStride) 11707 return false; 11708 11709 return true; 11710 } 11711 11712 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11713 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11714 QT = QT.getCanonicalType(); 11715 unsigned Size = C.getTypeSize(QT); 11716 11717 // Only scalars and complex within 16 bytes wide set PVB to true. 11718 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11719 return false; 11720 11721 if (QT->isFloatingType()) 11722 return true; 11723 11724 if (QT->isIntegerType()) 11725 return true; 11726 11727 if (QT->isPointerType()) 11728 return true; 11729 11730 // TODO: Add support for complex types (section 3.1.2, item 2). 11731 11732 return false; 11733 } 11734 11735 /// Computes the lane size (LS) of a return type or of an input parameter, 11736 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11737 /// TODO: Add support for references, section 3.2.1, item 1. 11738 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11739 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11740 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11741 if (getAArch64PBV(PTy, C)) 11742 return C.getTypeSize(PTy); 11743 } 11744 if (getAArch64PBV(QT, C)) 11745 return C.getTypeSize(QT); 11746 11747 return C.getTypeSize(C.getUIntPtrType()); 11748 } 11749 11750 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11751 // signature of the scalar function, as defined in 3.2.2 of the 11752 // AAVFABI. 11753 static std::tuple<unsigned, unsigned, bool> 11754 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11755 QualType RetType = FD->getReturnType().getCanonicalType(); 11756 11757 ASTContext &C = FD->getASTContext(); 11758 11759 bool OutputBecomesInput = false; 11760 11761 llvm::SmallVector<unsigned, 8> Sizes; 11762 if (!RetType->isVoidType()) { 11763 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11764 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11765 OutputBecomesInput = true; 11766 } 11767 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11768 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11769 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11770 } 11771 11772 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11773 // The LS of a function parameter / return value can only be a power 11774 // of 2, starting from 8 bits, up to 128. 11775 assert(llvm::all_of(Sizes, 11776 [](unsigned Size) { 11777 return Size == 8 || Size == 16 || Size == 32 || 11778 Size == 64 || Size == 128; 11779 }) && 11780 "Invalid size"); 11781 11782 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11783 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11784 OutputBecomesInput); 11785 } 11786 11787 /// Mangle the parameter part of the vector function name according to 11788 /// their OpenMP classification. The mangling function is defined in 11789 /// section 3.5 of the AAVFABI. 11790 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11791 SmallString<256> Buffer; 11792 llvm::raw_svector_ostream Out(Buffer); 11793 for (const auto &ParamAttr : ParamAttrs) { 11794 switch (ParamAttr.Kind) { 11795 case LinearWithVarStride: 11796 Out << "ls" << ParamAttr.StrideOrArg; 11797 break; 11798 case Linear: 11799 Out << 'l'; 11800 // Don't print the step value if it is not present or if it is 11801 // equal to 1. 11802 if (ParamAttr.StrideOrArg != 1) 11803 Out << ParamAttr.StrideOrArg; 11804 break; 11805 case Uniform: 11806 Out << 'u'; 11807 break; 11808 case Vector: 11809 Out << 'v'; 11810 break; 11811 } 11812 11813 if (!!ParamAttr.Alignment) 11814 Out << 'a' << ParamAttr.Alignment; 11815 } 11816 11817 return std::string(Out.str()); 11818 } 11819 11820 // Function used to add the attribute. The parameter `VLEN` is 11821 // templated to allow the use of "x" when targeting scalable functions 11822 // for SVE. 11823 template <typename T> 11824 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11825 char ISA, StringRef ParSeq, 11826 StringRef MangledName, bool OutputBecomesInput, 11827 llvm::Function *Fn) { 11828 SmallString<256> Buffer; 11829 llvm::raw_svector_ostream Out(Buffer); 11830 Out << Prefix << ISA << LMask << VLEN; 11831 if (OutputBecomesInput) 11832 Out << "v"; 11833 Out << ParSeq << "_" << MangledName; 11834 Fn->addFnAttr(Out.str()); 11835 } 11836 11837 // Helper function to generate the Advanced SIMD names depending on 11838 // the value of the NDS when simdlen is not present. 11839 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11840 StringRef Prefix, char ISA, 11841 StringRef ParSeq, StringRef MangledName, 11842 bool OutputBecomesInput, 11843 llvm::Function *Fn) { 11844 switch (NDS) { 11845 case 8: 11846 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11847 OutputBecomesInput, Fn); 11848 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11849 OutputBecomesInput, Fn); 11850 break; 11851 case 16: 11852 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11853 OutputBecomesInput, Fn); 11854 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11855 OutputBecomesInput, Fn); 11856 break; 11857 case 32: 11858 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11859 OutputBecomesInput, Fn); 11860 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11861 OutputBecomesInput, Fn); 11862 break; 11863 case 64: 11864 case 128: 11865 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11866 OutputBecomesInput, Fn); 11867 break; 11868 default: 11869 llvm_unreachable("Scalar type is too wide."); 11870 } 11871 } 11872 11873 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11874 static void emitAArch64DeclareSimdFunction( 11875 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11876 ArrayRef<ParamAttrTy> ParamAttrs, 11877 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11878 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11879 11880 // Get basic data for building the vector signature. 11881 const auto Data = getNDSWDS(FD, ParamAttrs); 11882 const unsigned NDS = std::get<0>(Data); 11883 const unsigned WDS = std::get<1>(Data); 11884 const bool OutputBecomesInput = std::get<2>(Data); 11885 11886 // Check the values provided via `simdlen` by the user. 11887 // 1. A `simdlen(1)` doesn't produce vector signatures, 11888 if (UserVLEN == 1) { 11889 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11890 DiagnosticsEngine::Warning, 11891 "The clause simdlen(1) has no effect when targeting aarch64."); 11892 CGM.getDiags().Report(SLoc, DiagID); 11893 return; 11894 } 11895 11896 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11897 // Advanced SIMD output. 11898 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11899 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11900 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11901 "power of 2 when targeting Advanced SIMD."); 11902 CGM.getDiags().Report(SLoc, DiagID); 11903 return; 11904 } 11905 11906 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11907 // limits. 11908 if (ISA == 's' && UserVLEN != 0) { 11909 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11910 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11911 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11912 "lanes in the architectural constraints " 11913 "for SVE (min is 128-bit, max is " 11914 "2048-bit, by steps of 128-bit)"); 11915 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11916 return; 11917 } 11918 } 11919 11920 // Sort out parameter sequence. 11921 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11922 StringRef Prefix = "_ZGV"; 11923 // Generate simdlen from user input (if any). 11924 if (UserVLEN) { 11925 if (ISA == 's') { 11926 // SVE generates only a masked function. 11927 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11928 OutputBecomesInput, Fn); 11929 } else { 11930 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11931 // Advanced SIMD generates one or two functions, depending on 11932 // the `[not]inbranch` clause. 11933 switch (State) { 11934 case OMPDeclareSimdDeclAttr::BS_Undefined: 11935 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11936 OutputBecomesInput, Fn); 11937 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11938 OutputBecomesInput, Fn); 11939 break; 11940 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11941 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11942 OutputBecomesInput, Fn); 11943 break; 11944 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11945 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11946 OutputBecomesInput, Fn); 11947 break; 11948 } 11949 } 11950 } else { 11951 // If no user simdlen is provided, follow the AAVFABI rules for 11952 // generating the vector length. 11953 if (ISA == 's') { 11954 // SVE, section 3.4.1, item 1. 11955 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11956 OutputBecomesInput, Fn); 11957 } else { 11958 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11959 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11960 // two vector names depending on the use of the clause 11961 // `[not]inbranch`. 11962 switch (State) { 11963 case OMPDeclareSimdDeclAttr::BS_Undefined: 11964 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11965 OutputBecomesInput, Fn); 11966 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11967 OutputBecomesInput, Fn); 11968 break; 11969 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11970 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11971 OutputBecomesInput, Fn); 11972 break; 11973 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11974 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11975 OutputBecomesInput, Fn); 11976 break; 11977 } 11978 } 11979 } 11980 } 11981 11982 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11983 llvm::Function *Fn) { 11984 ASTContext &C = CGM.getContext(); 11985 FD = FD->getMostRecentDecl(); 11986 // Map params to their positions in function decl. 11987 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11988 if (isa<CXXMethodDecl>(FD)) 11989 ParamPositions.try_emplace(FD, 0); 11990 unsigned ParamPos = ParamPositions.size(); 11991 for (const ParmVarDecl *P : FD->parameters()) { 11992 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11993 ++ParamPos; 11994 } 11995 while (FD) { 11996 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11997 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11998 // Mark uniform parameters. 11999 for (const Expr *E : Attr->uniforms()) { 12000 E = E->IgnoreParenImpCasts(); 12001 unsigned Pos; 12002 if (isa<CXXThisExpr>(E)) { 12003 Pos = ParamPositions[FD]; 12004 } else { 12005 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12006 ->getCanonicalDecl(); 12007 Pos = ParamPositions[PVD]; 12008 } 12009 ParamAttrs[Pos].Kind = Uniform; 12010 } 12011 // Get alignment info. 12012 auto *NI = Attr->alignments_begin(); 12013 for (const Expr *E : Attr->aligneds()) { 12014 E = E->IgnoreParenImpCasts(); 12015 unsigned Pos; 12016 QualType ParmTy; 12017 if (isa<CXXThisExpr>(E)) { 12018 Pos = ParamPositions[FD]; 12019 ParmTy = E->getType(); 12020 } else { 12021 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12022 ->getCanonicalDecl(); 12023 Pos = ParamPositions[PVD]; 12024 ParmTy = PVD->getType(); 12025 } 12026 ParamAttrs[Pos].Alignment = 12027 (*NI) 12028 ? (*NI)->EvaluateKnownConstInt(C) 12029 : llvm::APSInt::getUnsigned( 12030 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 12031 .getQuantity()); 12032 ++NI; 12033 } 12034 // Mark linear parameters. 12035 auto *SI = Attr->steps_begin(); 12036 auto *MI = Attr->modifiers_begin(); 12037 for (const Expr *E : Attr->linears()) { 12038 E = E->IgnoreParenImpCasts(); 12039 unsigned Pos; 12040 // Rescaling factor needed to compute the linear parameter 12041 // value in the mangled name. 12042 unsigned PtrRescalingFactor = 1; 12043 if (isa<CXXThisExpr>(E)) { 12044 Pos = ParamPositions[FD]; 12045 } else { 12046 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12047 ->getCanonicalDecl(); 12048 Pos = ParamPositions[PVD]; 12049 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12050 PtrRescalingFactor = CGM.getContext() 12051 .getTypeSizeInChars(P->getPointeeType()) 12052 .getQuantity(); 12053 } 12054 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12055 ParamAttr.Kind = Linear; 12056 // Assuming a stride of 1, for `linear` without modifiers. 12057 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12058 if (*SI) { 12059 Expr::EvalResult Result; 12060 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12061 if (const auto *DRE = 12062 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12063 if (const auto *StridePVD = 12064 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 12065 ParamAttr.Kind = LinearWithVarStride; 12066 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12067 ParamPositions[StridePVD->getCanonicalDecl()]); 12068 } 12069 } 12070 } else { 12071 ParamAttr.StrideOrArg = Result.Val.getInt(); 12072 } 12073 } 12074 // If we are using a linear clause on a pointer, we need to 12075 // rescale the value of linear_step with the byte size of the 12076 // pointee type. 12077 if (Linear == ParamAttr.Kind) 12078 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12079 ++SI; 12080 ++MI; 12081 } 12082 llvm::APSInt VLENVal; 12083 SourceLocation ExprLoc; 12084 const Expr *VLENExpr = Attr->getSimdlen(); 12085 if (VLENExpr) { 12086 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12087 ExprLoc = VLENExpr->getExprLoc(); 12088 } 12089 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12090 if (CGM.getTriple().isX86()) { 12091 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12092 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12093 unsigned VLEN = VLENVal.getExtValue(); 12094 StringRef MangledName = Fn->getName(); 12095 if (CGM.getTarget().hasFeature("sve")) 12096 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12097 MangledName, 's', 128, Fn, ExprLoc); 12098 if (CGM.getTarget().hasFeature("neon")) 12099 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12100 MangledName, 'n', 128, Fn, ExprLoc); 12101 } 12102 } 12103 FD = FD->getPreviousDecl(); 12104 } 12105 } 12106 12107 namespace { 12108 /// Cleanup action for doacross support. 12109 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12110 public: 12111 static const int DoacrossFinArgs = 2; 12112 12113 private: 12114 llvm::FunctionCallee RTLFn; 12115 llvm::Value *Args[DoacrossFinArgs]; 12116 12117 public: 12118 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12119 ArrayRef<llvm::Value *> CallArgs) 12120 : RTLFn(RTLFn) { 12121 assert(CallArgs.size() == DoacrossFinArgs); 12122 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12123 } 12124 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12125 if (!CGF.HaveInsertPoint()) 12126 return; 12127 CGF.EmitRuntimeCall(RTLFn, Args); 12128 } 12129 }; 12130 } // namespace 12131 12132 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12133 const OMPLoopDirective &D, 12134 ArrayRef<Expr *> NumIterations) { 12135 if (!CGF.HaveInsertPoint()) 12136 return; 12137 12138 ASTContext &C = CGM.getContext(); 12139 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12140 RecordDecl *RD; 12141 if (KmpDimTy.isNull()) { 12142 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12143 // kmp_int64 lo; // lower 12144 // kmp_int64 up; // upper 12145 // kmp_int64 st; // stride 12146 // }; 12147 RD = C.buildImplicitRecord("kmp_dim"); 12148 RD->startDefinition(); 12149 addFieldToRecordDecl(C, RD, Int64Ty); 12150 addFieldToRecordDecl(C, RD, Int64Ty); 12151 addFieldToRecordDecl(C, RD, Int64Ty); 12152 RD->completeDefinition(); 12153 KmpDimTy = C.getRecordType(RD); 12154 } else { 12155 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12156 } 12157 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12158 QualType ArrayTy = 12159 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12160 12161 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12162 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12163 enum { LowerFD = 0, UpperFD, StrideFD }; 12164 // Fill dims with data. 12165 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12166 LValue DimsLVal = CGF.MakeAddrLValue( 12167 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12168 // dims.upper = num_iterations; 12169 LValue UpperLVal = CGF.EmitLValueForField( 12170 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12171 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12172 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12173 Int64Ty, NumIterations[I]->getExprLoc()); 12174 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12175 // dims.stride = 1; 12176 LValue StrideLVal = CGF.EmitLValueForField( 12177 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12178 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12179 StrideLVal); 12180 } 12181 12182 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12183 // kmp_int32 num_dims, struct kmp_dim * dims); 12184 llvm::Value *Args[] = { 12185 emitUpdateLocation(CGF, D.getBeginLoc()), 12186 getThreadID(CGF, D.getBeginLoc()), 12187 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12188 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12189 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12190 CGM.VoidPtrTy)}; 12191 12192 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12193 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12194 CGF.EmitRuntimeCall(RTLFn, Args); 12195 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12196 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12197 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12198 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12199 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12200 llvm::makeArrayRef(FiniArgs)); 12201 } 12202 12203 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12204 const OMPDependClause *C) { 12205 QualType Int64Ty = 12206 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12207 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12208 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12209 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12210 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12211 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12212 const Expr *CounterVal = C->getLoopData(I); 12213 assert(CounterVal); 12214 llvm::Value *CntVal = CGF.EmitScalarConversion( 12215 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12216 CounterVal->getExprLoc()); 12217 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12218 /*Volatile=*/false, Int64Ty); 12219 } 12220 llvm::Value *Args[] = { 12221 emitUpdateLocation(CGF, C->getBeginLoc()), 12222 getThreadID(CGF, C->getBeginLoc()), 12223 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12224 llvm::FunctionCallee RTLFn; 12225 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12226 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12227 OMPRTL___kmpc_doacross_post); 12228 } else { 12229 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12230 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12231 OMPRTL___kmpc_doacross_wait); 12232 } 12233 CGF.EmitRuntimeCall(RTLFn, Args); 12234 } 12235 12236 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12237 llvm::FunctionCallee Callee, 12238 ArrayRef<llvm::Value *> Args) const { 12239 assert(Loc.isValid() && "Outlined function call location must be valid."); 12240 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12241 12242 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12243 if (Fn->doesNotThrow()) { 12244 CGF.EmitNounwindRuntimeCall(Fn, Args); 12245 return; 12246 } 12247 } 12248 CGF.EmitRuntimeCall(Callee, Args); 12249 } 12250 12251 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12252 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12253 ArrayRef<llvm::Value *> Args) const { 12254 emitCall(CGF, Loc, OutlinedFn, Args); 12255 } 12256 12257 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12258 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12259 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12260 HasEmittedDeclareTargetRegion = true; 12261 } 12262 12263 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12264 const VarDecl *NativeParam, 12265 const VarDecl *TargetParam) const { 12266 return CGF.GetAddrOfLocalVar(NativeParam); 12267 } 12268 12269 /// Return allocator value from expression, or return a null allocator (default 12270 /// when no allocator specified). 12271 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12272 const Expr *Allocator) { 12273 llvm::Value *AllocVal; 12274 if (Allocator) { 12275 AllocVal = CGF.EmitScalarExpr(Allocator); 12276 // According to the standard, the original allocator type is a enum 12277 // (integer). Convert to pointer type, if required. 12278 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12279 CGF.getContext().VoidPtrTy, 12280 Allocator->getExprLoc()); 12281 } else { 12282 // If no allocator specified, it defaults to the null allocator. 12283 AllocVal = llvm::Constant::getNullValue( 12284 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12285 } 12286 return AllocVal; 12287 } 12288 12289 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12290 const VarDecl *VD) { 12291 if (!VD) 12292 return Address::invalid(); 12293 Address UntiedAddr = Address::invalid(); 12294 Address UntiedRealAddr = Address::invalid(); 12295 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12296 if (It != FunctionToUntiedTaskStackMap.end()) { 12297 const UntiedLocalVarsAddressesMap &UntiedData = 12298 UntiedLocalVarsStack[It->second]; 12299 auto I = UntiedData.find(VD); 12300 if (I != UntiedData.end()) { 12301 UntiedAddr = I->second.first; 12302 UntiedRealAddr = I->second.second; 12303 } 12304 } 12305 const VarDecl *CVD = VD->getCanonicalDecl(); 12306 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12307 // Use the default allocation. 12308 if (!isAllocatableDecl(VD)) 12309 return UntiedAddr; 12310 llvm::Value *Size; 12311 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12312 if (CVD->getType()->isVariablyModifiedType()) { 12313 Size = CGF.getTypeSize(CVD->getType()); 12314 // Align the size: ((size + align - 1) / align) * align 12315 Size = CGF.Builder.CreateNUWAdd( 12316 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12317 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12318 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12319 } else { 12320 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12321 Size = CGM.getSize(Sz.alignTo(Align)); 12322 } 12323 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12324 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12325 const Expr *Allocator = AA->getAllocator(); 12326 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12327 llvm::Value *Alignment = 12328 AA->getAlignment() 12329 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12330 CGM.SizeTy, /*isSigned=*/false) 12331 : nullptr; 12332 SmallVector<llvm::Value *, 4> Args; 12333 Args.push_back(ThreadID); 12334 if (Alignment) 12335 Args.push_back(Alignment); 12336 Args.push_back(Size); 12337 Args.push_back(AllocVal); 12338 llvm::omp::RuntimeFunction FnID = 12339 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12340 llvm::Value *Addr = CGF.EmitRuntimeCall( 12341 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12342 getName({CVD->getName(), ".void.addr"})); 12343 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12344 CGM.getModule(), OMPRTL___kmpc_free); 12345 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12346 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12347 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12348 if (UntiedAddr.isValid()) 12349 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12350 12351 // Cleanup action for allocate support. 12352 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12353 llvm::FunctionCallee RTLFn; 12354 SourceLocation::UIntTy LocEncoding; 12355 Address Addr; 12356 const Expr *AllocExpr; 12357 12358 public: 12359 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12360 SourceLocation::UIntTy LocEncoding, Address Addr, 12361 const Expr *AllocExpr) 12362 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12363 AllocExpr(AllocExpr) {} 12364 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12365 if (!CGF.HaveInsertPoint()) 12366 return; 12367 llvm::Value *Args[3]; 12368 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12369 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12370 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12371 Addr.getPointer(), CGF.VoidPtrTy); 12372 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12373 Args[2] = AllocVal; 12374 CGF.EmitRuntimeCall(RTLFn, Args); 12375 } 12376 }; 12377 Address VDAddr = UntiedRealAddr.isValid() 12378 ? UntiedRealAddr 12379 : Address::deprecated(Addr, Align); 12380 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12381 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12382 VDAddr, Allocator); 12383 if (UntiedRealAddr.isValid()) 12384 if (auto *Region = 12385 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12386 Region->emitUntiedSwitch(CGF); 12387 return VDAddr; 12388 } 12389 return UntiedAddr; 12390 } 12391 12392 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12393 const VarDecl *VD) const { 12394 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12395 if (It == FunctionToUntiedTaskStackMap.end()) 12396 return false; 12397 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12398 } 12399 12400 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12401 CodeGenModule &CGM, const OMPLoopDirective &S) 12402 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12403 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12404 if (!NeedToPush) 12405 return; 12406 NontemporalDeclsSet &DS = 12407 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12408 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12409 for (const Stmt *Ref : C->private_refs()) { 12410 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12411 const ValueDecl *VD; 12412 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12413 VD = DRE->getDecl(); 12414 } else { 12415 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12416 assert((ME->isImplicitCXXThis() || 12417 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12418 "Expected member of current class."); 12419 VD = ME->getMemberDecl(); 12420 } 12421 DS.insert(VD); 12422 } 12423 } 12424 } 12425 12426 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12427 if (!NeedToPush) 12428 return; 12429 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12430 } 12431 12432 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12433 CodeGenFunction &CGF, 12434 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12435 std::pair<Address, Address>> &LocalVars) 12436 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12437 if (!NeedToPush) 12438 return; 12439 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12440 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12441 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12442 } 12443 12444 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12445 if (!NeedToPush) 12446 return; 12447 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12448 } 12449 12450 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12451 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12452 12453 return llvm::any_of( 12454 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12455 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12456 } 12457 12458 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12459 const OMPExecutableDirective &S, 12460 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12461 const { 12462 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12463 // Vars in target/task regions must be excluded completely. 12464 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12465 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12466 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12467 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12468 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12469 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12470 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12471 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12472 } 12473 } 12474 // Exclude vars in private clauses. 12475 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12476 for (const Expr *Ref : C->varlists()) { 12477 if (!Ref->getType()->isScalarType()) 12478 continue; 12479 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12480 if (!DRE) 12481 continue; 12482 NeedToCheckForLPCs.insert(DRE->getDecl()); 12483 } 12484 } 12485 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12486 for (const Expr *Ref : C->varlists()) { 12487 if (!Ref->getType()->isScalarType()) 12488 continue; 12489 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12490 if (!DRE) 12491 continue; 12492 NeedToCheckForLPCs.insert(DRE->getDecl()); 12493 } 12494 } 12495 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12496 for (const Expr *Ref : C->varlists()) { 12497 if (!Ref->getType()->isScalarType()) 12498 continue; 12499 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12500 if (!DRE) 12501 continue; 12502 NeedToCheckForLPCs.insert(DRE->getDecl()); 12503 } 12504 } 12505 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12506 for (const Expr *Ref : C->varlists()) { 12507 if (!Ref->getType()->isScalarType()) 12508 continue; 12509 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12510 if (!DRE) 12511 continue; 12512 NeedToCheckForLPCs.insert(DRE->getDecl()); 12513 } 12514 } 12515 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12516 for (const Expr *Ref : C->varlists()) { 12517 if (!Ref->getType()->isScalarType()) 12518 continue; 12519 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12520 if (!DRE) 12521 continue; 12522 NeedToCheckForLPCs.insert(DRE->getDecl()); 12523 } 12524 } 12525 for (const Decl *VD : NeedToCheckForLPCs) { 12526 for (const LastprivateConditionalData &Data : 12527 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12528 if (Data.DeclToUniqueName.count(VD) > 0) { 12529 if (!Data.Disabled) 12530 NeedToAddForLPCsAsDisabled.insert(VD); 12531 break; 12532 } 12533 } 12534 } 12535 } 12536 12537 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12538 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12539 : CGM(CGF.CGM), 12540 Action((CGM.getLangOpts().OpenMP >= 50 && 12541 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12542 [](const OMPLastprivateClause *C) { 12543 return C->getKind() == 12544 OMPC_LASTPRIVATE_conditional; 12545 })) 12546 ? ActionToDo::PushAsLastprivateConditional 12547 : ActionToDo::DoNotPush) { 12548 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12549 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12550 return; 12551 assert(Action == ActionToDo::PushAsLastprivateConditional && 12552 "Expected a push action."); 12553 LastprivateConditionalData &Data = 12554 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12555 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12556 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12557 continue; 12558 12559 for (const Expr *Ref : C->varlists()) { 12560 Data.DeclToUniqueName.insert(std::make_pair( 12561 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12562 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12563 } 12564 } 12565 Data.IVLVal = IVLVal; 12566 Data.Fn = CGF.CurFn; 12567 } 12568 12569 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12570 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12571 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12572 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12573 if (CGM.getLangOpts().OpenMP < 50) 12574 return; 12575 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12576 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12577 if (!NeedToAddForLPCsAsDisabled.empty()) { 12578 Action = ActionToDo::DisableLastprivateConditional; 12579 LastprivateConditionalData &Data = 12580 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12581 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12582 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12583 Data.Fn = CGF.CurFn; 12584 Data.Disabled = true; 12585 } 12586 } 12587 12588 CGOpenMPRuntime::LastprivateConditionalRAII 12589 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12590 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12591 return LastprivateConditionalRAII(CGF, S); 12592 } 12593 12594 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12595 if (CGM.getLangOpts().OpenMP < 50) 12596 return; 12597 if (Action == ActionToDo::DisableLastprivateConditional) { 12598 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12599 "Expected list of disabled private vars."); 12600 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12601 } 12602 if (Action == ActionToDo::PushAsLastprivateConditional) { 12603 assert( 12604 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12605 "Expected list of lastprivate conditional vars."); 12606 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12607 } 12608 } 12609 12610 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12611 const VarDecl *VD) { 12612 ASTContext &C = CGM.getContext(); 12613 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12614 if (I == LastprivateConditionalToTypes.end()) 12615 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12616 QualType NewType; 12617 const FieldDecl *VDField; 12618 const FieldDecl *FiredField; 12619 LValue BaseLVal; 12620 auto VI = I->getSecond().find(VD); 12621 if (VI == I->getSecond().end()) { 12622 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12623 RD->startDefinition(); 12624 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12625 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12626 RD->completeDefinition(); 12627 NewType = C.getRecordType(RD); 12628 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12629 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12630 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12631 } else { 12632 NewType = std::get<0>(VI->getSecond()); 12633 VDField = std::get<1>(VI->getSecond()); 12634 FiredField = std::get<2>(VI->getSecond()); 12635 BaseLVal = std::get<3>(VI->getSecond()); 12636 } 12637 LValue FiredLVal = 12638 CGF.EmitLValueForField(BaseLVal, FiredField); 12639 CGF.EmitStoreOfScalar( 12640 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12641 FiredLVal); 12642 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12643 } 12644 12645 namespace { 12646 /// Checks if the lastprivate conditional variable is referenced in LHS. 12647 class LastprivateConditionalRefChecker final 12648 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12649 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12650 const Expr *FoundE = nullptr; 12651 const Decl *FoundD = nullptr; 12652 StringRef UniqueDeclName; 12653 LValue IVLVal; 12654 llvm::Function *FoundFn = nullptr; 12655 SourceLocation Loc; 12656 12657 public: 12658 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12659 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12660 llvm::reverse(LPM)) { 12661 auto It = D.DeclToUniqueName.find(E->getDecl()); 12662 if (It == D.DeclToUniqueName.end()) 12663 continue; 12664 if (D.Disabled) 12665 return false; 12666 FoundE = E; 12667 FoundD = E->getDecl()->getCanonicalDecl(); 12668 UniqueDeclName = It->second; 12669 IVLVal = D.IVLVal; 12670 FoundFn = D.Fn; 12671 break; 12672 } 12673 return FoundE == E; 12674 } 12675 bool VisitMemberExpr(const MemberExpr *E) { 12676 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12677 return false; 12678 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12679 llvm::reverse(LPM)) { 12680 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12681 if (It == D.DeclToUniqueName.end()) 12682 continue; 12683 if (D.Disabled) 12684 return false; 12685 FoundE = E; 12686 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12687 UniqueDeclName = It->second; 12688 IVLVal = D.IVLVal; 12689 FoundFn = D.Fn; 12690 break; 12691 } 12692 return FoundE == E; 12693 } 12694 bool VisitStmt(const Stmt *S) { 12695 for (const Stmt *Child : S->children()) { 12696 if (!Child) 12697 continue; 12698 if (const auto *E = dyn_cast<Expr>(Child)) 12699 if (!E->isGLValue()) 12700 continue; 12701 if (Visit(Child)) 12702 return true; 12703 } 12704 return false; 12705 } 12706 explicit LastprivateConditionalRefChecker( 12707 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12708 : LPM(LPM) {} 12709 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12710 getFoundData() const { 12711 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12712 } 12713 }; 12714 } // namespace 12715 12716 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12717 LValue IVLVal, 12718 StringRef UniqueDeclName, 12719 LValue LVal, 12720 SourceLocation Loc) { 12721 // Last updated loop counter for the lastprivate conditional var. 12722 // int<xx> last_iv = 0; 12723 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12724 llvm::Constant *LastIV = 12725 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12726 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12727 IVLVal.getAlignment().getAsAlign()); 12728 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12729 12730 // Last value of the lastprivate conditional. 12731 // decltype(priv_a) last_a; 12732 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12733 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12734 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12735 LValue LastLVal = CGF.MakeAddrLValue( 12736 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12737 12738 // Global loop counter. Required to handle inner parallel-for regions. 12739 // iv 12740 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12741 12742 // #pragma omp critical(a) 12743 // if (last_iv <= iv) { 12744 // last_iv = iv; 12745 // last_a = priv_a; 12746 // } 12747 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12748 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12749 Action.Enter(CGF); 12750 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12751 // (last_iv <= iv) ? Check if the variable is updated and store new 12752 // value in global var. 12753 llvm::Value *CmpRes; 12754 if (IVLVal.getType()->isSignedIntegerType()) { 12755 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12756 } else { 12757 assert(IVLVal.getType()->isUnsignedIntegerType() && 12758 "Loop iteration variable must be integer."); 12759 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12760 } 12761 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12762 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12763 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12764 // { 12765 CGF.EmitBlock(ThenBB); 12766 12767 // last_iv = iv; 12768 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12769 12770 // last_a = priv_a; 12771 switch (CGF.getEvaluationKind(LVal.getType())) { 12772 case TEK_Scalar: { 12773 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12774 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12775 break; 12776 } 12777 case TEK_Complex: { 12778 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12779 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12780 break; 12781 } 12782 case TEK_Aggregate: 12783 llvm_unreachable( 12784 "Aggregates are not supported in lastprivate conditional."); 12785 } 12786 // } 12787 CGF.EmitBranch(ExitBB); 12788 // There is no need to emit line number for unconditional branch. 12789 (void)ApplyDebugLocation::CreateEmpty(CGF); 12790 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12791 }; 12792 12793 if (CGM.getLangOpts().OpenMPSimd) { 12794 // Do not emit as a critical region as no parallel region could be emitted. 12795 RegionCodeGenTy ThenRCG(CodeGen); 12796 ThenRCG(CGF); 12797 } else { 12798 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12799 } 12800 } 12801 12802 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12803 const Expr *LHS) { 12804 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12805 return; 12806 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12807 if (!Checker.Visit(LHS)) 12808 return; 12809 const Expr *FoundE; 12810 const Decl *FoundD; 12811 StringRef UniqueDeclName; 12812 LValue IVLVal; 12813 llvm::Function *FoundFn; 12814 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12815 Checker.getFoundData(); 12816 if (FoundFn != CGF.CurFn) { 12817 // Special codegen for inner parallel regions. 12818 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12819 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12820 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12821 "Lastprivate conditional is not found in outer region."); 12822 QualType StructTy = std::get<0>(It->getSecond()); 12823 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12824 LValue PrivLVal = CGF.EmitLValue(FoundE); 12825 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12826 PrivLVal.getAddress(CGF), 12827 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12828 CGF.ConvertTypeForMem(StructTy)); 12829 LValue BaseLVal = 12830 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12831 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12832 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12833 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12834 FiredLVal, llvm::AtomicOrdering::Unordered, 12835 /*IsVolatile=*/true, /*isInit=*/false); 12836 return; 12837 } 12838 12839 // Private address of the lastprivate conditional in the current context. 12840 // priv_a 12841 LValue LVal = CGF.EmitLValue(FoundE); 12842 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12843 FoundE->getExprLoc()); 12844 } 12845 12846 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12847 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12848 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12849 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12850 return; 12851 auto Range = llvm::reverse(LastprivateConditionalStack); 12852 auto It = llvm::find_if( 12853 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12854 if (It == Range.end() || It->Fn != CGF.CurFn) 12855 return; 12856 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12857 assert(LPCI != LastprivateConditionalToTypes.end() && 12858 "Lastprivates must be registered already."); 12859 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12860 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12861 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12862 for (const auto &Pair : It->DeclToUniqueName) { 12863 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12864 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12865 continue; 12866 auto I = LPCI->getSecond().find(Pair.first); 12867 assert(I != LPCI->getSecond().end() && 12868 "Lastprivate must be rehistered already."); 12869 // bool Cmp = priv_a.Fired != 0; 12870 LValue BaseLVal = std::get<3>(I->getSecond()); 12871 LValue FiredLVal = 12872 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12873 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12874 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12875 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12876 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12877 // if (Cmp) { 12878 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12879 CGF.EmitBlock(ThenBB); 12880 Address Addr = CGF.GetAddrOfLocalVar(VD); 12881 LValue LVal; 12882 if (VD->getType()->isReferenceType()) 12883 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12884 AlignmentSource::Decl); 12885 else 12886 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12887 AlignmentSource::Decl); 12888 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12889 D.getBeginLoc()); 12890 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12891 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12892 // } 12893 } 12894 } 12895 12896 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12897 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12898 SourceLocation Loc) { 12899 if (CGF.getLangOpts().OpenMP < 50) 12900 return; 12901 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12902 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12903 "Unknown lastprivate conditional variable."); 12904 StringRef UniqueName = It->second; 12905 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12906 // The variable was not updated in the region - exit. 12907 if (!GV) 12908 return; 12909 LValue LPLVal = CGF.MakeAddrLValue( 12910 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12911 PrivLVal.getType().getNonReferenceType()); 12912 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12913 CGF.EmitStoreOfScalar(Res, PrivLVal); 12914 } 12915 12916 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12917 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12918 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12919 llvm_unreachable("Not supported in SIMD-only mode"); 12920 } 12921 12922 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12923 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12924 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12925 llvm_unreachable("Not supported in SIMD-only mode"); 12926 } 12927 12928 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12929 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12930 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12931 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12932 bool Tied, unsigned &NumberOfParts) { 12933 llvm_unreachable("Not supported in SIMD-only mode"); 12934 } 12935 12936 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12937 SourceLocation Loc, 12938 llvm::Function *OutlinedFn, 12939 ArrayRef<llvm::Value *> CapturedVars, 12940 const Expr *IfCond, 12941 llvm::Value *NumThreads) { 12942 llvm_unreachable("Not supported in SIMD-only mode"); 12943 } 12944 12945 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12946 CodeGenFunction &CGF, StringRef CriticalName, 12947 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12948 const Expr *Hint) { 12949 llvm_unreachable("Not supported in SIMD-only mode"); 12950 } 12951 12952 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12953 const RegionCodeGenTy &MasterOpGen, 12954 SourceLocation Loc) { 12955 llvm_unreachable("Not supported in SIMD-only mode"); 12956 } 12957 12958 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12959 const RegionCodeGenTy &MasterOpGen, 12960 SourceLocation Loc, 12961 const Expr *Filter) { 12962 llvm_unreachable("Not supported in SIMD-only mode"); 12963 } 12964 12965 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12966 SourceLocation Loc) { 12967 llvm_unreachable("Not supported in SIMD-only mode"); 12968 } 12969 12970 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12971 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12972 SourceLocation Loc) { 12973 llvm_unreachable("Not supported in SIMD-only mode"); 12974 } 12975 12976 void CGOpenMPSIMDRuntime::emitSingleRegion( 12977 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12978 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12979 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12980 ArrayRef<const Expr *> AssignmentOps) { 12981 llvm_unreachable("Not supported in SIMD-only mode"); 12982 } 12983 12984 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12985 const RegionCodeGenTy &OrderedOpGen, 12986 SourceLocation Loc, 12987 bool IsThreads) { 12988 llvm_unreachable("Not supported in SIMD-only mode"); 12989 } 12990 12991 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12992 SourceLocation Loc, 12993 OpenMPDirectiveKind Kind, 12994 bool EmitChecks, 12995 bool ForceSimpleCall) { 12996 llvm_unreachable("Not supported in SIMD-only mode"); 12997 } 12998 12999 void CGOpenMPSIMDRuntime::emitForDispatchInit( 13000 CodeGenFunction &CGF, SourceLocation Loc, 13001 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 13002 bool Ordered, const DispatchRTInput &DispatchValues) { 13003 llvm_unreachable("Not supported in SIMD-only mode"); 13004 } 13005 13006 void CGOpenMPSIMDRuntime::emitForStaticInit( 13007 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 13008 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 13009 llvm_unreachable("Not supported in SIMD-only mode"); 13010 } 13011 13012 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 13013 CodeGenFunction &CGF, SourceLocation Loc, 13014 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 13015 llvm_unreachable("Not supported in SIMD-only mode"); 13016 } 13017 13018 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 13019 SourceLocation Loc, 13020 unsigned IVSize, 13021 bool IVSigned) { 13022 llvm_unreachable("Not supported in SIMD-only mode"); 13023 } 13024 13025 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 13026 SourceLocation Loc, 13027 OpenMPDirectiveKind DKind) { 13028 llvm_unreachable("Not supported in SIMD-only mode"); 13029 } 13030 13031 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 13032 SourceLocation Loc, 13033 unsigned IVSize, bool IVSigned, 13034 Address IL, Address LB, 13035 Address UB, Address ST) { 13036 llvm_unreachable("Not supported in SIMD-only mode"); 13037 } 13038 13039 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 13040 llvm::Value *NumThreads, 13041 SourceLocation Loc) { 13042 llvm_unreachable("Not supported in SIMD-only mode"); 13043 } 13044 13045 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 13046 ProcBindKind ProcBind, 13047 SourceLocation Loc) { 13048 llvm_unreachable("Not supported in SIMD-only mode"); 13049 } 13050 13051 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13052 const VarDecl *VD, 13053 Address VDAddr, 13054 SourceLocation Loc) { 13055 llvm_unreachable("Not supported in SIMD-only mode"); 13056 } 13057 13058 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13059 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13060 CodeGenFunction *CGF) { 13061 llvm_unreachable("Not supported in SIMD-only mode"); 13062 } 13063 13064 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13065 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13066 llvm_unreachable("Not supported in SIMD-only mode"); 13067 } 13068 13069 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13070 ArrayRef<const Expr *> Vars, 13071 SourceLocation Loc, 13072 llvm::AtomicOrdering AO) { 13073 llvm_unreachable("Not supported in SIMD-only mode"); 13074 } 13075 13076 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13077 const OMPExecutableDirective &D, 13078 llvm::Function *TaskFunction, 13079 QualType SharedsTy, Address Shareds, 13080 const Expr *IfCond, 13081 const OMPTaskDataTy &Data) { 13082 llvm_unreachable("Not supported in SIMD-only mode"); 13083 } 13084 13085 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13086 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13087 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13088 const Expr *IfCond, const OMPTaskDataTy &Data) { 13089 llvm_unreachable("Not supported in SIMD-only mode"); 13090 } 13091 13092 void CGOpenMPSIMDRuntime::emitReduction( 13093 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13094 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13095 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13096 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13097 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13098 ReductionOps, Options); 13099 } 13100 13101 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13102 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13103 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13104 llvm_unreachable("Not supported in SIMD-only mode"); 13105 } 13106 13107 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13108 SourceLocation Loc, 13109 bool IsWorksharingReduction) { 13110 llvm_unreachable("Not supported in SIMD-only mode"); 13111 } 13112 13113 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13114 SourceLocation Loc, 13115 ReductionCodeGen &RCG, 13116 unsigned N) { 13117 llvm_unreachable("Not supported in SIMD-only mode"); 13118 } 13119 13120 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13121 SourceLocation Loc, 13122 llvm::Value *ReductionsPtr, 13123 LValue SharedLVal) { 13124 llvm_unreachable("Not supported in SIMD-only mode"); 13125 } 13126 13127 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13128 SourceLocation Loc, 13129 const OMPTaskDataTy &Data) { 13130 llvm_unreachable("Not supported in SIMD-only mode"); 13131 } 13132 13133 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13134 CodeGenFunction &CGF, SourceLocation Loc, 13135 OpenMPDirectiveKind CancelRegion) { 13136 llvm_unreachable("Not supported in SIMD-only mode"); 13137 } 13138 13139 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13140 SourceLocation Loc, const Expr *IfCond, 13141 OpenMPDirectiveKind CancelRegion) { 13142 llvm_unreachable("Not supported in SIMD-only mode"); 13143 } 13144 13145 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13146 const OMPExecutableDirective &D, StringRef ParentName, 13147 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13148 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13149 llvm_unreachable("Not supported in SIMD-only mode"); 13150 } 13151 13152 void CGOpenMPSIMDRuntime::emitTargetCall( 13153 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13154 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13155 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13156 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13157 const OMPLoopDirective &D)> 13158 SizeEmitter) { 13159 llvm_unreachable("Not supported in SIMD-only mode"); 13160 } 13161 13162 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13163 llvm_unreachable("Not supported in SIMD-only mode"); 13164 } 13165 13166 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13167 llvm_unreachable("Not supported in SIMD-only mode"); 13168 } 13169 13170 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13171 return false; 13172 } 13173 13174 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13175 const OMPExecutableDirective &D, 13176 SourceLocation Loc, 13177 llvm::Function *OutlinedFn, 13178 ArrayRef<llvm::Value *> CapturedVars) { 13179 llvm_unreachable("Not supported in SIMD-only mode"); 13180 } 13181 13182 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13183 const Expr *NumTeams, 13184 const Expr *ThreadLimit, 13185 SourceLocation Loc) { 13186 llvm_unreachable("Not supported in SIMD-only mode"); 13187 } 13188 13189 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13190 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13191 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13192 llvm_unreachable("Not supported in SIMD-only mode"); 13193 } 13194 13195 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13196 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13197 const Expr *Device) { 13198 llvm_unreachable("Not supported in SIMD-only mode"); 13199 } 13200 13201 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13202 const OMPLoopDirective &D, 13203 ArrayRef<Expr *> NumIterations) { 13204 llvm_unreachable("Not supported in SIMD-only mode"); 13205 } 13206 13207 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13208 const OMPDependClause *C) { 13209 llvm_unreachable("Not supported in SIMD-only mode"); 13210 } 13211 13212 const VarDecl * 13213 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13214 const VarDecl *NativeParam) const { 13215 llvm_unreachable("Not supported in SIMD-only mode"); 13216 } 13217 13218 Address 13219 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13220 const VarDecl *NativeParam, 13221 const VarDecl *TargetParam) const { 13222 llvm_unreachable("Not supported in SIMD-only mode"); 13223 } 13224