1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 46 using namespace clang; 47 using namespace CodeGen; 48 using namespace llvm::omp; 49 50 namespace { 51 /// Base class for handling code generation inside OpenMP regions. 52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 53 public: 54 /// Kinds of OpenMP regions used in codegen. 55 enum CGOpenMPRegionKind { 56 /// Region with outlined function for standalone 'parallel' 57 /// directive. 58 ParallelOutlinedRegion, 59 /// Region with outlined function for standalone 'task' directive. 60 TaskOutlinedRegion, 61 /// Region for constructs that do not require function outlining, 62 /// like 'for', 'sections', 'atomic' etc. directives. 63 InlinedRegion, 64 /// Region with outlined function for standalone 'target' directive. 65 TargetRegion, 66 }; 67 68 CGOpenMPRegionInfo(const CapturedStmt &CS, 69 const CGOpenMPRegionKind RegionKind, 70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 71 bool HasCancel) 72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 74 75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 77 bool HasCancel) 78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 79 Kind(Kind), HasCancel(HasCancel) {} 80 81 /// Get a variable or parameter for storing global thread id 82 /// inside OpenMP construct. 83 virtual const VarDecl *getThreadIDVariable() const = 0; 84 85 /// Emit the captured statement body. 86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 87 88 /// Get an LValue for the current ThreadID variable. 89 /// \return LValue for thread id variable. This LValue always has type int32*. 90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 91 92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 93 94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 95 96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 97 98 bool hasCancel() const { return HasCancel; } 99 100 static bool classof(const CGCapturedStmtInfo *Info) { 101 return Info->getKind() == CR_OpenMP; 102 } 103 104 ~CGOpenMPRegionInfo() override = default; 105 106 protected: 107 CGOpenMPRegionKind RegionKind; 108 RegionCodeGenTy CodeGen; 109 OpenMPDirectiveKind Kind; 110 bool HasCancel; 111 }; 112 113 /// API for captured statement code generation in OpenMP constructs. 114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 115 public: 116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 117 const RegionCodeGenTy &CodeGen, 118 OpenMPDirectiveKind Kind, bool HasCancel, 119 StringRef HelperName) 120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 121 HasCancel), 122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 124 } 125 126 /// Get a variable or parameter for storing global thread id 127 /// inside OpenMP construct. 128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 129 130 /// Get the name of the capture helper. 131 StringRef getHelperName() const override { return HelperName; } 132 133 static bool classof(const CGCapturedStmtInfo *Info) { 134 return CGOpenMPRegionInfo::classof(Info) && 135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 136 ParallelOutlinedRegion; 137 } 138 139 private: 140 /// A variable or parameter storing global thread id for OpenMP 141 /// constructs. 142 const VarDecl *ThreadIDVar; 143 StringRef HelperName; 144 }; 145 146 /// API for captured statement code generation in OpenMP constructs. 147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 148 public: 149 class UntiedTaskActionTy final : public PrePostActionTy { 150 bool Untied; 151 const VarDecl *PartIDVar; 152 const RegionCodeGenTy UntiedCodeGen; 153 llvm::SwitchInst *UntiedSwitch = nullptr; 154 155 public: 156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 157 const RegionCodeGenTy &UntiedCodeGen) 158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 159 void Enter(CodeGenFunction &CGF) override { 160 if (Untied) { 161 // Emit task switching point. 162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 llvm::Value *Res = 166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 169 CGF.EmitBlock(DoneBB); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 173 CGF.Builder.GetInsertBlock()); 174 emitUntiedSwitch(CGF); 175 } 176 } 177 void emitUntiedSwitch(CodeGenFunction &CGF) const { 178 if (Untied) { 179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 180 CGF.GetAddrOfLocalVar(PartIDVar), 181 PartIDVar->getType()->castAs<PointerType>()); 182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 183 PartIdLVal); 184 UntiedCodeGen(CGF); 185 CodeGenFunction::JumpDest CurPoint = 186 CGF.getJumpDestInCurrentScope(".untied.next."); 187 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 190 CGF.Builder.GetInsertBlock()); 191 CGF.EmitBranchThroughCleanup(CurPoint); 192 CGF.EmitBlock(CurPoint.getBlock()); 193 } 194 } 195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 196 }; 197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 198 const VarDecl *ThreadIDVar, 199 const RegionCodeGenTy &CodeGen, 200 OpenMPDirectiveKind Kind, bool HasCancel, 201 const UntiedTaskActionTy &Action) 202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 203 ThreadIDVar(ThreadIDVar), Action(Action) { 204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 205 } 206 207 /// Get a variable or parameter for storing global thread id 208 /// inside OpenMP construct. 209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 210 211 /// Get an LValue for the current ThreadID variable. 212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 213 214 /// Get the name of the capture helper. 215 StringRef getHelperName() const override { return ".omp_outlined."; } 216 217 void emitUntiedSwitch(CodeGenFunction &CGF) override { 218 Action.emitUntiedSwitch(CGF); 219 } 220 221 static bool classof(const CGCapturedStmtInfo *Info) { 222 return CGOpenMPRegionInfo::classof(Info) && 223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 224 TaskOutlinedRegion; 225 } 226 227 private: 228 /// A variable or parameter storing global thread id for OpenMP 229 /// constructs. 230 const VarDecl *ThreadIDVar; 231 /// Action for emitting code for untied tasks. 232 const UntiedTaskActionTy &Action; 233 }; 234 235 /// API for inlined captured statement code generation in OpenMP 236 /// constructs. 237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 238 public: 239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 240 const RegionCodeGenTy &CodeGen, 241 OpenMPDirectiveKind Kind, bool HasCancel) 242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 243 OldCSI(OldCSI), 244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 245 246 // Retrieve the value of the context parameter. 247 llvm::Value *getContextValue() const override { 248 if (OuterRegionInfo) 249 return OuterRegionInfo->getContextValue(); 250 llvm_unreachable("No context value for inlined OpenMP region"); 251 } 252 253 void setContextValue(llvm::Value *V) override { 254 if (OuterRegionInfo) { 255 OuterRegionInfo->setContextValue(V); 256 return; 257 } 258 llvm_unreachable("No context value for inlined OpenMP region"); 259 } 260 261 /// Lookup the captured field decl for a variable. 262 const FieldDecl *lookup(const VarDecl *VD) const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->lookup(VD); 265 // If there is no outer outlined region,no need to lookup in a list of 266 // captured variables, we can use the original one. 267 return nullptr; 268 } 269 270 FieldDecl *getThisFieldDecl() const override { 271 if (OuterRegionInfo) 272 return OuterRegionInfo->getThisFieldDecl(); 273 return nullptr; 274 } 275 276 /// Get a variable or parameter for storing global thread id 277 /// inside OpenMP construct. 278 const VarDecl *getThreadIDVariable() const override { 279 if (OuterRegionInfo) 280 return OuterRegionInfo->getThreadIDVariable(); 281 return nullptr; 282 } 283 284 /// Get an LValue for the current ThreadID variable. 285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 286 if (OuterRegionInfo) 287 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 288 llvm_unreachable("No LValue for inlined OpenMP construct"); 289 } 290 291 /// Get the name of the capture helper. 292 StringRef getHelperName() const override { 293 if (auto *OuterRegionInfo = getOldCSI()) 294 return OuterRegionInfo->getHelperName(); 295 llvm_unreachable("No helper name for inlined OpenMP construct"); 296 } 297 298 void emitUntiedSwitch(CodeGenFunction &CGF) override { 299 if (OuterRegionInfo) 300 OuterRegionInfo->emitUntiedSwitch(CGF); 301 } 302 303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 304 305 static bool classof(const CGCapturedStmtInfo *Info) { 306 return CGOpenMPRegionInfo::classof(Info) && 307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 308 } 309 310 ~CGOpenMPInlinedRegionInfo() override = default; 311 312 private: 313 /// CodeGen info about outer OpenMP region. 314 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 315 CGOpenMPRegionInfo *OuterRegionInfo; 316 }; 317 318 /// API for captured statement code generation in OpenMP target 319 /// constructs. For this captures, implicit parameters are used instead of the 320 /// captured fields. The name of the target region has to be unique in a given 321 /// application so it is provided by the client, because only the client has 322 /// the information to generate that. 323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 324 public: 325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 326 const RegionCodeGenTy &CodeGen, StringRef HelperName) 327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 328 /*HasCancel=*/false), 329 HelperName(HelperName) {} 330 331 /// This is unused for target regions because each starts executing 332 /// with a single thread. 333 const VarDecl *getThreadIDVariable() const override { return nullptr; } 334 335 /// Get the name of the capture helper. 336 StringRef getHelperName() const override { return HelperName; } 337 338 static bool classof(const CGCapturedStmtInfo *Info) { 339 return CGOpenMPRegionInfo::classof(Info) && 340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 341 } 342 343 private: 344 StringRef HelperName; 345 }; 346 347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 348 llvm_unreachable("No codegen for expressions"); 349 } 350 /// API for generation of expressions captured in a innermost OpenMP 351 /// region. 352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 353 public: 354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 356 OMPD_unknown, 357 /*HasCancel=*/false), 358 PrivScope(CGF) { 359 // Make sure the globals captured in the provided statement are local by 360 // using the privatization logic. We assume the same variable is not 361 // captured more than once. 362 for (const auto &C : CS.captures()) { 363 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 364 continue; 365 366 const VarDecl *VD = C.getCapturedVar(); 367 if (VD->isLocalVarDeclOrParm()) 368 continue; 369 370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 371 /*RefersToEnclosingVariableOrCapture=*/false, 372 VD->getType().getNonReferenceType(), VK_LValue, 373 C.getLocation()); 374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 if (DRD) 691 SrcAddr = 692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 693 694 llvm::Value *SrcBegin = nullptr; 695 if (DRD) 696 SrcBegin = SrcAddr.getPointer(); 697 llvm::Value *DestBegin = DestAddr.getPointer(); 698 // Cast from pointer to array type to pointer to single element. 699 llvm::Value *DestEnd = 700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, SrcAddr.getElementType(), 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, DestAddr.getElementType(), 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 746 "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 753 "omp.arraycpy.dest.element"); 754 // Check whether we've reached the end. 755 llvm::Value *Done = 756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 759 760 // Done. 761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 762 } 763 764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 765 return CGF.EmitOMPSharedLValue(E); 766 } 767 768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 769 const Expr *E) { 770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 772 return LValue(); 773 } 774 775 void ReductionCodeGen::emitAggregateInitialization( 776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 777 const OMPDeclareReductionDecl *DRD) { 778 // Emit VarDecl with copy init for arrays. 779 // Get the address of the original variable captured in current 780 // captured region. 781 const auto *PrivateVD = 782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 783 bool EmitDeclareReductionInit = 784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 786 EmitDeclareReductionInit, 787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 788 : PrivateVD->getInit(), 789 DRD, SharedAddr); 790 } 791 792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 793 ArrayRef<const Expr *> Origs, 794 ArrayRef<const Expr *> Privates, 795 ArrayRef<const Expr *> ReductionOps) { 796 ClausesData.reserve(Shareds.size()); 797 SharedAddresses.reserve(Shareds.size()); 798 Sizes.reserve(Shareds.size()); 799 BaseDecls.reserve(Shareds.size()); 800 const auto *IOrig = Origs.begin(); 801 const auto *IPriv = Privates.begin(); 802 const auto *IRed = ReductionOps.begin(); 803 for (const Expr *Ref : Shareds) { 804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 805 std::advance(IOrig, 1); 806 std::advance(IPriv, 1); 807 std::advance(IRed, 1); 808 } 809 } 810 811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 813 "Number of generated lvalues must be exactly N."); 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 816 SharedAddresses.emplace_back(First, Second); 817 if (ClausesData[N].Shared == ClausesData[N].Ref) { 818 OrigAddresses.emplace_back(First, Second); 819 } else { 820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 822 OrigAddresses.emplace_back(First, Second); 823 } 824 } 825 826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 827 QualType PrivateType = getPrivateType(N); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 839 if (AsArraySection) { 840 Size = CGF.Builder.CreatePtrDiff(ElemType, 841 OrigAddresses[N].second.getPointer(CGF), 842 OrigAddresses[N].first.getPointer(CGF)); 843 Size = CGF.Builder.CreateNUWAdd( 844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 846 } else { 847 SizeInChars = 848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 850 } 851 Sizes.emplace_back(SizeInChars, Size); 852 CodeGenFunction::OpaqueValueMapping OpaqueMap( 853 CGF, 854 cast<OpaqueValueExpr>( 855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 856 RValue::get(Size)); 857 CGF.EmitVariablyModifiedType(PrivateType); 858 } 859 860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 861 llvm::Value *Size) { 862 QualType PrivateType = getPrivateType(N); 863 if (!PrivateType->isVariablyModifiedType()) { 864 assert(!Size && !Sizes[N].second && 865 "Size should be nullptr for non-variably modified reduction " 866 "items."); 867 return; 868 } 869 CodeGenFunction::OpaqueValueMapping OpaqueMap( 870 CGF, 871 cast<OpaqueValueExpr>( 872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 873 RValue::get(Size)); 874 CGF.EmitVariablyModifiedType(PrivateType); 875 } 876 877 void ReductionCodeGen::emitInitialization( 878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 880 assert(SharedAddresses.size() > N && "No variable was generated"); 881 const auto *PrivateVD = 882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 883 const OMPDeclareReductionDecl *DRD = 884 getReductionInit(ClausesData[N].ReductionOp); 885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 886 if (DRD && DRD->getInitializer()) 887 (void)DefaultInit(CGF); 888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 890 (void)DefaultInit(CGF); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 893 PrivateAddr, SharedAddr, SharedType); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 QualType PrivateType = getPrivateType(N); 904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 905 return DTorKind != QualType::DK_none; 906 } 907 908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 909 Address PrivateAddr) { 910 QualType PrivateType = getPrivateType(N); 911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 912 if (needCleanups(N)) { 913 PrivateAddr = CGF.Builder.CreateElementBitCast( 914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 916 } 917 } 918 919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 920 LValue BaseLV) { 921 BaseTy = BaseTy.getNonReferenceType(); 922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 923 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 926 } else { 927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 929 } 930 BaseTy = BaseTy->getPointeeType(); 931 } 932 return CGF.MakeAddrLValue( 933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 934 CGF.ConvertTypeForMem(ElTy)), 935 BaseLV.getType(), BaseLV.getBaseInfo(), 936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 937 } 938 939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 Address OriginalBaseAddress, llvm::Value *Addr) { 941 Address Tmp = Address::invalid(); 942 Address TopTmp = Address::invalid(); 943 Address MostTopTmp = Address::invalid(); 944 BaseTy = BaseTy.getNonReferenceType(); 945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 946 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 947 Tmp = CGF.CreateMemTemp(BaseTy); 948 if (TopTmp.isValid()) 949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 950 else 951 MostTopTmp = Tmp; 952 TopTmp = Tmp; 953 BaseTy = BaseTy->getPointeeType(); 954 } 955 956 if (Tmp.isValid()) { 957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 958 Addr, Tmp.getElementType()); 959 CGF.Builder.CreateStore(Addr, Tmp); 960 return MostTopTmp; 961 } 962 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 964 Addr, OriginalBaseAddress.getType()); 965 return OriginalBaseAddress.withPointer(Addr); 966 } 967 968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 969 const VarDecl *OrigVD = nullptr; 970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 973 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 975 Base = TempASE->getBase()->IgnoreParenImpCasts(); 976 DE = cast<DeclRefExpr>(Base); 977 OrigVD = cast<VarDecl>(DE->getDecl()); 978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 981 Base = TempASE->getBase()->IgnoreParenImpCasts(); 982 DE = cast<DeclRefExpr>(Base); 983 OrigVD = cast<VarDecl>(DE->getDecl()); 984 } 985 return OrigVD; 986 } 987 988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 989 Address PrivateAddr) { 990 const DeclRefExpr *DE; 991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 992 BaseDecls.emplace_back(OrigVD); 993 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 994 LValue BaseLValue = 995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 996 OriginalBaseLValue); 997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1000 SharedAddr.getPointer()); 1001 llvm::Value *PrivatePointer = 1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1003 PrivateAddr.getPointer(), SharedAddr.getType()); 1004 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1005 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1006 return castToBase(CGF, OrigVD->getType(), 1007 SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue.getAddress(CGF), Ptr); 1009 } 1010 BaseDecls.emplace_back( 1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1012 return PrivateAddr; 1013 } 1014 1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1016 const OMPDeclareReductionDecl *DRD = 1017 getReductionInit(ClausesData[N].ReductionOp); 1018 return DRD && DRD->getInitializer(); 1019 } 1020 1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1022 return CGF.EmitLoadOfPointerLValue( 1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1024 getThreadIDVariable()->getType()->castAs<PointerType>()); 1025 } 1026 1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1028 if (!CGF.HaveInsertPoint()) 1029 return; 1030 // 1.2.2 OpenMP Language Terminology 1031 // Structured block - An executable statement with a single entry at the 1032 // top and a single exit at the bottom. 1033 // The point of exit cannot be a branch out of the structured block. 1034 // longjmp() and throw() must not violate the entry/exit criteria. 1035 CGF.EHStack.pushTerminate(); 1036 if (S) 1037 CGF.incrementProfileCounter(S); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1065 1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1067 OMPBuilder.initialize(); 1068 loadOffloadInfoMetadata(); 1069 } 1070 1071 void CGOpenMPRuntime::clear() { 1072 InternalVars.clear(); 1073 // Clean non-target variable declarations possibly used only in debug info. 1074 for (const auto &Data : EmittedNonTargetVariables) { 1075 if (!Data.getValue().pointsToAliveValue()) 1076 continue; 1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1078 if (!GV) 1079 continue; 1080 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1081 continue; 1082 GV->eraseFromParent(); 1083 } 1084 } 1085 1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1087 SmallString<128> Buffer; 1088 llvm::raw_svector_ostream OS(Buffer); 1089 StringRef Sep = FirstSeparator; 1090 for (StringRef Part : Parts) { 1091 OS << Sep << Part; 1092 Sep = Separator; 1093 } 1094 return std::string(OS.str()); 1095 } 1096 1097 static llvm::Function * 1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1099 const Expr *CombinerInitializer, const VarDecl *In, 1100 const VarDecl *Out, bool IsCombiner) { 1101 // void .omp_combiner.(Ty *in, Ty *out); 1102 ASTContext &C = CGM.getContext(); 1103 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1104 FunctionArgList Args; 1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 Args.push_back(&OmpOutParm); 1110 Args.push_back(&OmpInParm); 1111 const CGFunctionInfo &FnInfo = 1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1114 std::string Name = CGM.getOpenMPRuntime().getName( 1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1117 Name, &CGM.getModule()); 1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1119 if (CGM.getLangOpts().Optimize) { 1120 Fn->removeFnAttr(llvm::Attribute::NoInline); 1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1123 } 1124 CodeGenFunction CGF(CGM); 1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1128 Out->getLocation()); 1129 CodeGenFunction::OMPPrivateScope Scope(CGF); 1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1131 Scope.addPrivate( 1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1133 .getAddress(CGF)); 1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1135 Scope.addPrivate( 1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1137 .getAddress(CGF)); 1138 (void)Scope.Privatize(); 1139 if (!IsCombiner && Out->hasInit() && 1140 !CGF.isTrivialInitializer(Out->getInit())) { 1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1142 Out->getType().getQualifiers(), 1143 /*IsInitializer=*/true); 1144 } 1145 if (CombinerInitializer) 1146 CGF.EmitIgnoredExpr(CombinerInitializer); 1147 Scope.ForceCleanup(); 1148 CGF.FinishFunction(); 1149 return Fn; 1150 } 1151 1152 void CGOpenMPRuntime::emitUserDefinedReduction( 1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1154 if (UDRMap.count(D) > 0) 1155 return; 1156 llvm::Function *Combiner = emitCombinerOrInitializer( 1157 CGM, D->getType(), D->getCombiner(), 1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1160 /*IsCombiner=*/true); 1161 llvm::Function *Initializer = nullptr; 1162 if (const Expr *Init = D->getInitializer()) { 1163 Initializer = emitCombinerOrInitializer( 1164 CGM, D->getType(), 1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1166 : nullptr, 1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1169 /*IsCombiner=*/false); 1170 } 1171 UDRMap.try_emplace(D, Combiner, Initializer); 1172 if (CGF) { 1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1174 Decls.second.push_back(D); 1175 } 1176 } 1177 1178 std::pair<llvm::Function *, llvm::Function *> 1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1180 auto I = UDRMap.find(D); 1181 if (I != UDRMap.end()) 1182 return I->second; 1183 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1184 return UDRMap.lookup(D); 1185 } 1186 1187 namespace { 1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1189 // Builder if one is present. 1190 struct PushAndPopStackRAII { 1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1192 bool HasCancel, llvm::omp::Directive Kind) 1193 : OMPBuilder(OMPBuilder) { 1194 if (!OMPBuilder) 1195 return; 1196 1197 // The following callback is the crucial part of clangs cleanup process. 1198 // 1199 // NOTE: 1200 // Once the OpenMPIRBuilder is used to create parallel regions (and 1201 // similar), the cancellation destination (Dest below) is determined via 1202 // IP. That means if we have variables to finalize we split the block at IP, 1203 // use the new block (=BB) as destination to build a JumpDest (via 1204 // getJumpDestInCurrentScope(BB)) which then is fed to 1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1206 // to push & pop an FinalizationInfo object. 1207 // The FiniCB will still be needed but at the point where the 1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1210 assert(IP.getBlock()->end() == IP.getPoint() && 1211 "Clang CG should cause non-terminated block!"); 1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1213 CGF.Builder.restoreIP(IP); 1214 CodeGenFunction::JumpDest Dest = 1215 CGF.getOMPCancelDestination(OMPD_parallel); 1216 CGF.EmitBranchThroughCleanup(Dest); 1217 }; 1218 1219 // TODO: Remove this once we emit parallel regions through the 1220 // OpenMPIRBuilder as it can do this setup internally. 1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1222 OMPBuilder->pushFinalizationCB(std::move(FI)); 1223 } 1224 ~PushAndPopStackRAII() { 1225 if (OMPBuilder) 1226 OMPBuilder->popFinalizationCB(); 1227 } 1228 llvm::OpenMPIRBuilder *OMPBuilder; 1229 }; 1230 } // namespace 1231 1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1236 assert(ThreadIDVar->getType()->isPointerType() && 1237 "thread id variable must be of type kmp_int32 *"); 1238 CodeGenFunction CGF(CGM, true); 1239 bool HasCancel = false; 1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1241 HasCancel = OPD->hasCancel(); 1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1243 HasCancel = OPD->hasCancel(); 1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1245 HasCancel = OPSD->hasCancel(); 1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1247 HasCancel = OPFD->hasCancel(); 1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1249 HasCancel = OPFD->hasCancel(); 1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1251 HasCancel = OPFD->hasCancel(); 1252 else if (const auto *OPFD = 1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = 1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1257 HasCancel = OPFD->hasCancel(); 1258 1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1260 // parallel region to make cancellation barriers work properly. 1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1264 HasCancel, OutlinedHelperName); 1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1267 } 1268 1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1273 return emitParallelOrTeamsOutlinedFunction( 1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1275 } 1276 1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1281 return emitParallelOrTeamsOutlinedFunction( 1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1283 } 1284 1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1289 bool Tied, unsigned &NumberOfParts) { 1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1291 PrePostActionTy &) { 1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1294 llvm::Value *TaskArgs[] = { 1295 UpLoc, ThreadID, 1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1297 TaskTVar->getType()->castAs<PointerType>()) 1298 .getPointer(CGF)}; 1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1300 CGM.getModule(), OMPRTL___kmpc_omp_task), 1301 TaskArgs); 1302 }; 1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1304 UntiedCodeGen); 1305 CodeGen.setAction(Action); 1306 assert(!ThreadIDVar->getType()->isPointerType() && 1307 "thread id variable must be of type kmp_int32 for tasks"); 1308 const OpenMPDirectiveKind Region = 1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1310 : OMPD_task; 1311 const CapturedStmt *CS = D.getCapturedStmt(Region); 1312 bool HasCancel = false; 1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1318 HasCancel = TD->hasCancel(); 1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1320 HasCancel = TD->hasCancel(); 1321 1322 CodeGenFunction CGF(CGM, true); 1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1324 InnermostKind, HasCancel, Action); 1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1327 if (!Tied) 1328 NumberOfParts = Action.getNumberOfParts(); 1329 return Res; 1330 } 1331 1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1333 bool AtCurrentPoint) { 1334 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1335 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1336 1337 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1338 if (AtCurrentPoint) { 1339 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1340 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1341 } else { 1342 Elem.second.ServiceInsertPt = 1343 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1344 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1345 } 1346 } 1347 1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1349 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1350 if (Elem.second.ServiceInsertPt) { 1351 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1352 Elem.second.ServiceInsertPt = nullptr; 1353 Ptr->eraseFromParent(); 1354 } 1355 } 1356 1357 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1358 SourceLocation Loc, 1359 SmallString<128> &Buffer) { 1360 llvm::raw_svector_ostream OS(Buffer); 1361 // Build debug location 1362 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1363 OS << ";" << PLoc.getFilename() << ";"; 1364 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1365 OS << FD->getQualifiedNameAsString(); 1366 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1367 return OS.str(); 1368 } 1369 1370 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1371 SourceLocation Loc, 1372 unsigned Flags) { 1373 uint32_t SrcLocStrSize; 1374 llvm::Constant *SrcLocStr; 1375 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1376 Loc.isInvalid()) { 1377 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1378 } else { 1379 std::string FunctionName; 1380 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1381 FunctionName = FD->getQualifiedNameAsString(); 1382 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1383 const char *FileName = PLoc.getFilename(); 1384 unsigned Line = PLoc.getLine(); 1385 unsigned Column = PLoc.getColumn(); 1386 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1387 Column, SrcLocStrSize); 1388 } 1389 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1390 return OMPBuilder.getOrCreateIdent( 1391 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1392 } 1393 1394 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1395 SourceLocation Loc) { 1396 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1397 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1398 // the clang invariants used below might be broken. 1399 if (CGM.getLangOpts().OpenMPIRBuilder) { 1400 SmallString<128> Buffer; 1401 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1402 uint32_t SrcLocStrSize; 1403 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1404 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1405 return OMPBuilder.getOrCreateThreadID( 1406 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1407 } 1408 1409 llvm::Value *ThreadID = nullptr; 1410 // Check whether we've already cached a load of the thread id in this 1411 // function. 1412 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1413 if (I != OpenMPLocThreadIDMap.end()) { 1414 ThreadID = I->second.ThreadID; 1415 if (ThreadID != nullptr) 1416 return ThreadID; 1417 } 1418 // If exceptions are enabled, do not use parameter to avoid possible crash. 1419 if (auto *OMPRegionInfo = 1420 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1421 if (OMPRegionInfo->getThreadIDVariable()) { 1422 // Check if this an outlined function with thread id passed as argument. 1423 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1424 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1425 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1426 !CGF.getLangOpts().CXXExceptions || 1427 CGF.Builder.GetInsertBlock() == TopBlock || 1428 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1429 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1430 TopBlock || 1431 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1432 CGF.Builder.GetInsertBlock()) { 1433 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1434 // If value loaded in entry block, cache it and use it everywhere in 1435 // function. 1436 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1437 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1438 Elem.second.ThreadID = ThreadID; 1439 } 1440 return ThreadID; 1441 } 1442 } 1443 } 1444 1445 // This is not an outlined function region - need to call __kmpc_int32 1446 // kmpc_global_thread_num(ident_t *loc). 1447 // Generate thread id value and cache this value for use across the 1448 // function. 1449 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1450 if (!Elem.second.ServiceInsertPt) 1451 setLocThreadIdInsertPt(CGF); 1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1453 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1454 llvm::CallInst *Call = CGF.Builder.CreateCall( 1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1456 OMPRTL___kmpc_global_thread_num), 1457 emitUpdateLocation(CGF, Loc)); 1458 Call->setCallingConv(CGF.getRuntimeCC()); 1459 Elem.second.ThreadID = Call; 1460 return Call; 1461 } 1462 1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1466 clearLocThreadIdInsertPt(CGF); 1467 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1468 } 1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1470 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1471 UDRMap.erase(D); 1472 FunctionUDRMap.erase(CGF.CurFn); 1473 } 1474 auto I = FunctionUDMMap.find(CGF.CurFn); 1475 if (I != FunctionUDMMap.end()) { 1476 for(const auto *D : I->second) 1477 UDMMap.erase(D); 1478 FunctionUDMMap.erase(I); 1479 } 1480 LastprivateConditionalToTypes.erase(CGF.CurFn); 1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1482 } 1483 1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1485 return OMPBuilder.IdentPtr; 1486 } 1487 1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1489 if (!Kmpc_MicroTy) { 1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1492 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1494 } 1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1496 } 1497 1498 llvm::FunctionCallee 1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1500 bool IsGPUDistribute) { 1501 assert((IVSize == 32 || IVSize == 64) && 1502 "IV size is not compatible with the omp runtime"); 1503 StringRef Name; 1504 if (IsGPUDistribute) 1505 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1506 : "__kmpc_distribute_static_init_4u") 1507 : (IVSigned ? "__kmpc_distribute_static_init_8" 1508 : "__kmpc_distribute_static_init_8u"); 1509 else 1510 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1511 : "__kmpc_for_static_init_4u") 1512 : (IVSigned ? "__kmpc_for_static_init_8" 1513 : "__kmpc_for_static_init_8u"); 1514 1515 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1516 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1517 llvm::Type *TypeParams[] = { 1518 getIdentTyPointerTy(), // loc 1519 CGM.Int32Ty, // tid 1520 CGM.Int32Ty, // schedtype 1521 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1522 PtrTy, // p_lower 1523 PtrTy, // p_upper 1524 PtrTy, // p_stride 1525 ITy, // incr 1526 ITy // chunk 1527 }; 1528 auto *FnTy = 1529 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1530 return CGM.CreateRuntimeFunction(FnTy, Name); 1531 } 1532 1533 llvm::FunctionCallee 1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1535 assert((IVSize == 32 || IVSize == 64) && 1536 "IV size is not compatible with the omp runtime"); 1537 StringRef Name = 1538 IVSize == 32 1539 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1540 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1541 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1542 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1543 CGM.Int32Ty, // tid 1544 CGM.Int32Ty, // schedtype 1545 ITy, // lower 1546 ITy, // upper 1547 ITy, // stride 1548 ITy // chunk 1549 }; 1550 auto *FnTy = 1551 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1552 return CGM.CreateRuntimeFunction(FnTy, Name); 1553 } 1554 1555 llvm::FunctionCallee 1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1557 assert((IVSize == 32 || IVSize == 64) && 1558 "IV size is not compatible with the omp runtime"); 1559 StringRef Name = 1560 IVSize == 32 1561 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1562 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1563 llvm::Type *TypeParams[] = { 1564 getIdentTyPointerTy(), // loc 1565 CGM.Int32Ty, // tid 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1579 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1582 llvm::Type *TypeParams[] = { 1583 getIdentTyPointerTy(), // loc 1584 CGM.Int32Ty, // tid 1585 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1586 PtrTy, // p_lower 1587 PtrTy, // p_upper 1588 PtrTy // p_stride 1589 }; 1590 auto *FnTy = 1591 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1592 return CGM.CreateRuntimeFunction(FnTy, Name); 1593 } 1594 1595 /// Obtain information that uniquely identifies a target entry. This 1596 /// consists of the file and device IDs as well as line number associated with 1597 /// the relevant entry source location. 1598 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1599 unsigned &DeviceID, unsigned &FileID, 1600 unsigned &LineNum) { 1601 SourceManager &SM = C.getSourceManager(); 1602 1603 // The loc should be always valid and have a file ID (the user cannot use 1604 // #pragma directives in macros) 1605 1606 assert(Loc.isValid() && "Source location is expected to be always valid."); 1607 1608 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1609 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1610 1611 llvm::sys::fs::UniqueID ID; 1612 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1613 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1614 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1615 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1616 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1617 << PLoc.getFilename() << EC.message(); 1618 } 1619 1620 DeviceID = ID.getDevice(); 1621 FileID = ID.getFile(); 1622 LineNum = PLoc.getLine(); 1623 } 1624 1625 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1626 if (CGM.getLangOpts().OpenMPSimd) 1627 return Address::invalid(); 1628 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1629 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1630 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1631 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1632 HasRequiresUnifiedSharedMemory))) { 1633 SmallString<64> PtrName; 1634 { 1635 llvm::raw_svector_ostream OS(PtrName); 1636 OS << CGM.getMangledName(GlobalDecl(VD)); 1637 if (!VD->isExternallyVisible()) { 1638 unsigned DeviceID, FileID, Line; 1639 getTargetEntryUniqueInfo(CGM.getContext(), 1640 VD->getCanonicalDecl()->getBeginLoc(), 1641 DeviceID, FileID, Line); 1642 OS << llvm::format("_%x", FileID); 1643 } 1644 OS << "_decl_tgt_ref_ptr"; 1645 } 1646 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1647 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1648 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); 1649 if (!Ptr) { 1650 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName); 1651 1652 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1653 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1654 1655 if (!CGM.getLangOpts().OpenMPIsDevice) 1656 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1657 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1658 } 1659 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1660 } 1661 return Address::invalid(); 1662 } 1663 1664 llvm::Constant * 1665 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1666 assert(!CGM.getLangOpts().OpenMPUseTLS || 1667 !CGM.getContext().getTargetInfo().isTLSSupported()); 1668 // Lookup the entry, lazily creating it if necessary. 1669 std::string Suffix = getName({"cache", ""}); 1670 return getOrCreateInternalVariable( 1671 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1672 } 1673 1674 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1675 const VarDecl *VD, 1676 Address VDAddr, 1677 SourceLocation Loc) { 1678 if (CGM.getLangOpts().OpenMPUseTLS && 1679 CGM.getContext().getTargetInfo().isTLSSupported()) 1680 return VDAddr; 1681 1682 llvm::Type *VarTy = VDAddr.getElementType(); 1683 llvm::Value *Args[] = { 1684 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1685 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1686 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1687 getOrCreateThreadPrivateCache(VD)}; 1688 return Address( 1689 CGF.EmitRuntimeCall( 1690 OMPBuilder.getOrCreateRuntimeFunction( 1691 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1692 Args), 1693 CGF.Int8Ty, VDAddr.getAlignment()); 1694 } 1695 1696 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1697 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1698 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1699 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1700 // library. 1701 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1702 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1703 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1704 OMPLoc); 1705 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1706 // to register constructor/destructor for variable. 1707 llvm::Value *Args[] = { 1708 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1709 Ctor, CopyCtor, Dtor}; 1710 CGF.EmitRuntimeCall( 1711 OMPBuilder.getOrCreateRuntimeFunction( 1712 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1713 Args); 1714 } 1715 1716 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1717 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1718 bool PerformInit, CodeGenFunction *CGF) { 1719 if (CGM.getLangOpts().OpenMPUseTLS && 1720 CGM.getContext().getTargetInfo().isTLSSupported()) 1721 return nullptr; 1722 1723 VD = VD->getDefinition(CGM.getContext()); 1724 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1725 QualType ASTTy = VD->getType(); 1726 1727 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1728 const Expr *Init = VD->getAnyInitializer(); 1729 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1730 // Generate function that re-emits the declaration's initializer into the 1731 // threadprivate copy of the variable VD 1732 CodeGenFunction CtorCGF(CGM); 1733 FunctionArgList Args; 1734 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1735 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1736 ImplicitParamDecl::Other); 1737 Args.push_back(&Dst); 1738 1739 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1740 CGM.getContext().VoidPtrTy, Args); 1741 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1742 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1743 llvm::Function *Fn = 1744 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1745 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1746 Args, Loc, Loc); 1747 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1748 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1749 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1750 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); 1751 Arg = CtorCGF.Builder.CreateElementBitCast( 1752 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1753 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1754 /*IsInitializer=*/true); 1755 ArgVal = CtorCGF.EmitLoadOfScalar( 1756 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1757 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1758 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1759 CtorCGF.FinishFunction(); 1760 Ctor = Fn; 1761 } 1762 if (VD->getType().isDestructedType() != QualType::DK_none) { 1763 // Generate function that emits destructor call for the threadprivate copy 1764 // of the variable VD 1765 CodeGenFunction DtorCGF(CGM); 1766 FunctionArgList Args; 1767 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1768 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1769 ImplicitParamDecl::Other); 1770 Args.push_back(&Dst); 1771 1772 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1773 CGM.getContext().VoidTy, Args); 1774 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1775 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1776 llvm::Function *Fn = 1777 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1778 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1779 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1780 Loc, Loc); 1781 // Create a scope with an artificial location for the body of this function. 1782 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1783 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1784 DtorCGF.GetAddrOfLocalVar(&Dst), 1785 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1786 DtorCGF.emitDestroy( 1787 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1788 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1789 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1790 DtorCGF.FinishFunction(); 1791 Dtor = Fn; 1792 } 1793 // Do not emit init function if it is not required. 1794 if (!Ctor && !Dtor) 1795 return nullptr; 1796 1797 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1798 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1799 /*isVarArg=*/false) 1800 ->getPointerTo(); 1801 // Copying constructor for the threadprivate variable. 1802 // Must be NULL - reserved by runtime, but currently it requires that this 1803 // parameter is always NULL. Otherwise it fires assertion. 1804 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1805 if (Ctor == nullptr) { 1806 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1807 /*isVarArg=*/false) 1808 ->getPointerTo(); 1809 Ctor = llvm::Constant::getNullValue(CtorTy); 1810 } 1811 if (Dtor == nullptr) { 1812 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1813 /*isVarArg=*/false) 1814 ->getPointerTo(); 1815 Dtor = llvm::Constant::getNullValue(DtorTy); 1816 } 1817 if (!CGF) { 1818 auto *InitFunctionTy = 1819 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1820 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1821 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1822 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1823 CodeGenFunction InitCGF(CGM); 1824 FunctionArgList ArgList; 1825 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1826 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1827 Loc, Loc); 1828 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1829 InitCGF.FinishFunction(); 1830 return InitFunction; 1831 } 1832 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1833 } 1834 return nullptr; 1835 } 1836 1837 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1838 llvm::GlobalVariable *Addr, 1839 bool PerformInit) { 1840 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1841 !CGM.getLangOpts().OpenMPIsDevice) 1842 return false; 1843 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1844 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1845 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1846 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1847 HasRequiresUnifiedSharedMemory)) 1848 return CGM.getLangOpts().OpenMPIsDevice; 1849 VD = VD->getDefinition(CGM.getContext()); 1850 assert(VD && "Unknown VarDecl"); 1851 1852 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1853 return CGM.getLangOpts().OpenMPIsDevice; 1854 1855 QualType ASTTy = VD->getType(); 1856 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1857 1858 // Produce the unique prefix to identify the new target regions. We use 1859 // the source location of the variable declaration which we know to not 1860 // conflict with any target region. 1861 unsigned DeviceID; 1862 unsigned FileID; 1863 unsigned Line; 1864 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1865 SmallString<128> Buffer, Out; 1866 { 1867 llvm::raw_svector_ostream OS(Buffer); 1868 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1869 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1870 } 1871 1872 const Expr *Init = VD->getAnyInitializer(); 1873 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1874 llvm::Constant *Ctor; 1875 llvm::Constant *ID; 1876 if (CGM.getLangOpts().OpenMPIsDevice) { 1877 // Generate function that re-emits the declaration's initializer into 1878 // the threadprivate copy of the variable VD 1879 CodeGenFunction CtorCGF(CGM); 1880 1881 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1882 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1883 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1884 FTy, Twine(Buffer, "_ctor"), FI, Loc, false, 1885 llvm::GlobalValue::WeakODRLinkage); 1886 if (CGM.getTriple().isAMDGCN()) 1887 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1888 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1889 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1890 FunctionArgList(), Loc, Loc); 1891 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1892 llvm::Constant *AddrInAS0 = Addr; 1893 if (Addr->getAddressSpace() != 0) 1894 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1895 Addr, llvm::PointerType::getWithSamePointeeType( 1896 cast<llvm::PointerType>(Addr->getType()), 0)); 1897 CtorCGF.EmitAnyExprToMem(Init, 1898 Address(AddrInAS0, Addr->getValueType(), 1899 CGM.getContext().getDeclAlign(VD)), 1900 Init->getType().getQualifiers(), 1901 /*IsInitializer=*/true); 1902 CtorCGF.FinishFunction(); 1903 Ctor = Fn; 1904 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1905 } else { 1906 Ctor = new llvm::GlobalVariable( 1907 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1908 llvm::GlobalValue::PrivateLinkage, 1909 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1910 ID = Ctor; 1911 } 1912 1913 // Register the information for the entry associated with the constructor. 1914 Out.clear(); 1915 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1916 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1917 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1918 } 1919 if (VD->getType().isDestructedType() != QualType::DK_none) { 1920 llvm::Constant *Dtor; 1921 llvm::Constant *ID; 1922 if (CGM.getLangOpts().OpenMPIsDevice) { 1923 // Generate function that emits destructor call for the threadprivate 1924 // copy of the variable VD 1925 CodeGenFunction DtorCGF(CGM); 1926 1927 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1928 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1929 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1930 FTy, Twine(Buffer, "_dtor"), FI, Loc, false, 1931 llvm::GlobalValue::WeakODRLinkage); 1932 if (CGM.getTriple().isAMDGCN()) 1933 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1934 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1935 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1936 FunctionArgList(), Loc, Loc); 1937 // Create a scope with an artificial location for the body of this 1938 // function. 1939 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1940 llvm::Constant *AddrInAS0 = Addr; 1941 if (Addr->getAddressSpace() != 0) 1942 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1943 Addr, llvm::PointerType::getWithSamePointeeType( 1944 cast<llvm::PointerType>(Addr->getType()), 0)); 1945 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), 1946 CGM.getContext().getDeclAlign(VD)), 1947 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1948 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1949 DtorCGF.FinishFunction(); 1950 Dtor = Fn; 1951 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1952 } else { 1953 Dtor = new llvm::GlobalVariable( 1954 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1955 llvm::GlobalValue::PrivateLinkage, 1956 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1957 ID = Dtor; 1958 } 1959 // Register the information for the entry associated with the destructor. 1960 Out.clear(); 1961 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1962 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1963 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1964 } 1965 return CGM.getLangOpts().OpenMPIsDevice; 1966 } 1967 1968 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1969 QualType VarType, 1970 StringRef Name) { 1971 std::string Suffix = getName({"artificial", ""}); 1972 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1973 llvm::GlobalVariable *GAddr = 1974 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1975 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1976 CGM.getTarget().isTLSSupported()) { 1977 GAddr->setThreadLocal(/*Val=*/true); 1978 return Address(GAddr, GAddr->getValueType(), 1979 CGM.getContext().getTypeAlignInChars(VarType)); 1980 } 1981 std::string CacheSuffix = getName({"cache", ""}); 1982 llvm::Value *Args[] = { 1983 emitUpdateLocation(CGF, SourceLocation()), 1984 getThreadID(CGF, SourceLocation()), 1985 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1986 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1987 /*isSigned=*/false), 1988 getOrCreateInternalVariable( 1989 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 1990 return Address( 1991 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1992 CGF.EmitRuntimeCall( 1993 OMPBuilder.getOrCreateRuntimeFunction( 1994 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1995 Args), 1996 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1997 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1998 } 1999 2000 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2001 const RegionCodeGenTy &ThenGen, 2002 const RegionCodeGenTy &ElseGen) { 2003 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2004 2005 // If the condition constant folds and can be elided, try to avoid emitting 2006 // the condition and the dead arm of the if/else. 2007 bool CondConstant; 2008 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2009 if (CondConstant) 2010 ThenGen(CGF); 2011 else 2012 ElseGen(CGF); 2013 return; 2014 } 2015 2016 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2017 // emit the conditional branch. 2018 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2019 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2020 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2021 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2022 2023 // Emit the 'then' code. 2024 CGF.EmitBlock(ThenBlock); 2025 ThenGen(CGF); 2026 CGF.EmitBranch(ContBlock); 2027 // Emit the 'else' code if present. 2028 // There is no need to emit line number for unconditional branch. 2029 (void)ApplyDebugLocation::CreateEmpty(CGF); 2030 CGF.EmitBlock(ElseBlock); 2031 ElseGen(CGF); 2032 // There is no need to emit line number for unconditional branch. 2033 (void)ApplyDebugLocation::CreateEmpty(CGF); 2034 CGF.EmitBranch(ContBlock); 2035 // Emit the continuation block for code after the if. 2036 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2037 } 2038 2039 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2040 llvm::Function *OutlinedFn, 2041 ArrayRef<llvm::Value *> CapturedVars, 2042 const Expr *IfCond, 2043 llvm::Value *NumThreads) { 2044 if (!CGF.HaveInsertPoint()) 2045 return; 2046 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2047 auto &M = CGM.getModule(); 2048 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2049 this](CodeGenFunction &CGF, PrePostActionTy &) { 2050 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2051 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2052 llvm::Value *Args[] = { 2053 RTLoc, 2054 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2055 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2056 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2057 RealArgs.append(std::begin(Args), std::end(Args)); 2058 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2059 2060 llvm::FunctionCallee RTLFn = 2061 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2062 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2063 }; 2064 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2065 this](CodeGenFunction &CGF, PrePostActionTy &) { 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2068 // Build calls: 2069 // __kmpc_serialized_parallel(&Loc, GTid); 2070 llvm::Value *Args[] = {RTLoc, ThreadID}; 2071 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2072 M, OMPRTL___kmpc_serialized_parallel), 2073 Args); 2074 2075 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2076 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2077 Address ZeroAddrBound = 2078 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2079 /*Name=*/".bound.zero.addr"); 2080 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2081 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2082 // ThreadId for serialized parallels is 0. 2083 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2084 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2085 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2086 2087 // Ensure we do not inline the function. This is trivially true for the ones 2088 // passed to __kmpc_fork_call but the ones called in serialized regions 2089 // could be inlined. This is not a perfect but it is closer to the invariant 2090 // we want, namely, every data environment starts with a new function. 2091 // TODO: We should pass the if condition to the runtime function and do the 2092 // handling there. Much cleaner code. 2093 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2094 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2095 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2096 2097 // __kmpc_end_serialized_parallel(&Loc, GTid); 2098 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2099 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2100 M, OMPRTL___kmpc_end_serialized_parallel), 2101 EndArgs); 2102 }; 2103 if (IfCond) { 2104 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2105 } else { 2106 RegionCodeGenTy ThenRCG(ThenGen); 2107 ThenRCG(CGF); 2108 } 2109 } 2110 2111 // If we're inside an (outlined) parallel region, use the region info's 2112 // thread-ID variable (it is passed in a first argument of the outlined function 2113 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2114 // regular serial code region, get thread ID by calling kmp_int32 2115 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2116 // return the address of that temp. 2117 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2118 SourceLocation Loc) { 2119 if (auto *OMPRegionInfo = 2120 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2121 if (OMPRegionInfo->getThreadIDVariable()) 2122 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2123 2124 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2125 QualType Int32Ty = 2126 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2127 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2128 CGF.EmitStoreOfScalar(ThreadID, 2129 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2130 2131 return ThreadIDTemp; 2132 } 2133 2134 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2135 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2136 SmallString<256> Buffer; 2137 llvm::raw_svector_ostream Out(Buffer); 2138 Out << Name; 2139 StringRef RuntimeName = Out.str(); 2140 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2141 if (Elem.second) { 2142 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2143 "OMP internal variable has different type than requested"); 2144 return &*Elem.second; 2145 } 2146 2147 return Elem.second = new llvm::GlobalVariable( 2148 CGM.getModule(), Ty, /*IsConstant*/ false, 2149 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2150 Elem.first(), /*InsertBefore=*/nullptr, 2151 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2152 } 2153 2154 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2155 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2156 std::string Name = getName({Prefix, "var"}); 2157 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2158 } 2159 2160 namespace { 2161 /// Common pre(post)-action for different OpenMP constructs. 2162 class CommonActionTy final : public PrePostActionTy { 2163 llvm::FunctionCallee EnterCallee; 2164 ArrayRef<llvm::Value *> EnterArgs; 2165 llvm::FunctionCallee ExitCallee; 2166 ArrayRef<llvm::Value *> ExitArgs; 2167 bool Conditional; 2168 llvm::BasicBlock *ContBlock = nullptr; 2169 2170 public: 2171 CommonActionTy(llvm::FunctionCallee EnterCallee, 2172 ArrayRef<llvm::Value *> EnterArgs, 2173 llvm::FunctionCallee ExitCallee, 2174 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2175 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2176 ExitArgs(ExitArgs), Conditional(Conditional) {} 2177 void Enter(CodeGenFunction &CGF) override { 2178 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2179 if (Conditional) { 2180 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2181 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2182 ContBlock = CGF.createBasicBlock("omp_if.end"); 2183 // Generate the branch (If-stmt) 2184 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2185 CGF.EmitBlock(ThenBlock); 2186 } 2187 } 2188 void Done(CodeGenFunction &CGF) { 2189 // Emit the rest of blocks/branches 2190 CGF.EmitBranch(ContBlock); 2191 CGF.EmitBlock(ContBlock, true); 2192 } 2193 void Exit(CodeGenFunction &CGF) override { 2194 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2195 } 2196 }; 2197 } // anonymous namespace 2198 2199 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2200 StringRef CriticalName, 2201 const RegionCodeGenTy &CriticalOpGen, 2202 SourceLocation Loc, const Expr *Hint) { 2203 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2204 // CriticalOpGen(); 2205 // __kmpc_end_critical(ident_t *, gtid, Lock); 2206 // Prepare arguments and build a call to __kmpc_critical 2207 if (!CGF.HaveInsertPoint()) 2208 return; 2209 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2210 getCriticalRegionLock(CriticalName)}; 2211 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2212 std::end(Args)); 2213 if (Hint) { 2214 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2215 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2216 } 2217 CommonActionTy Action( 2218 OMPBuilder.getOrCreateRuntimeFunction( 2219 CGM.getModule(), 2220 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2221 EnterArgs, 2222 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2223 OMPRTL___kmpc_end_critical), 2224 Args); 2225 CriticalOpGen.setAction(Action); 2226 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2227 } 2228 2229 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2230 const RegionCodeGenTy &MasterOpGen, 2231 SourceLocation Loc) { 2232 if (!CGF.HaveInsertPoint()) 2233 return; 2234 // if(__kmpc_master(ident_t *, gtid)) { 2235 // MasterOpGen(); 2236 // __kmpc_end_master(ident_t *, gtid); 2237 // } 2238 // Prepare arguments and build a call to __kmpc_master 2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2240 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2241 CGM.getModule(), OMPRTL___kmpc_master), 2242 Args, 2243 OMPBuilder.getOrCreateRuntimeFunction( 2244 CGM.getModule(), OMPRTL___kmpc_end_master), 2245 Args, 2246 /*Conditional=*/true); 2247 MasterOpGen.setAction(Action); 2248 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2249 Action.Done(CGF); 2250 } 2251 2252 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2253 const RegionCodeGenTy &MaskedOpGen, 2254 SourceLocation Loc, const Expr *Filter) { 2255 if (!CGF.HaveInsertPoint()) 2256 return; 2257 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2258 // MaskedOpGen(); 2259 // __kmpc_end_masked(iden_t *, gtid); 2260 // } 2261 // Prepare arguments and build a call to __kmpc_masked 2262 llvm::Value *FilterVal = Filter 2263 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2264 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2265 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2266 FilterVal}; 2267 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2268 getThreadID(CGF, Loc)}; 2269 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_masked), 2271 Args, 2272 OMPBuilder.getOrCreateRuntimeFunction( 2273 CGM.getModule(), OMPRTL___kmpc_end_masked), 2274 ArgsEnd, 2275 /*Conditional=*/true); 2276 MaskedOpGen.setAction(Action); 2277 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2278 Action.Done(CGF); 2279 } 2280 2281 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2282 SourceLocation Loc) { 2283 if (!CGF.HaveInsertPoint()) 2284 return; 2285 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2286 OMPBuilder.createTaskyield(CGF.Builder); 2287 } else { 2288 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2289 llvm::Value *Args[] = { 2290 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2291 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2292 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2293 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2294 Args); 2295 } 2296 2297 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2298 Region->emitUntiedSwitch(CGF); 2299 } 2300 2301 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2302 const RegionCodeGenTy &TaskgroupOpGen, 2303 SourceLocation Loc) { 2304 if (!CGF.HaveInsertPoint()) 2305 return; 2306 // __kmpc_taskgroup(ident_t *, gtid); 2307 // TaskgroupOpGen(); 2308 // __kmpc_end_taskgroup(ident_t *, gtid); 2309 // Prepare arguments and build a call to __kmpc_taskgroup 2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2311 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2312 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2313 Args, 2314 OMPBuilder.getOrCreateRuntimeFunction( 2315 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2316 Args); 2317 TaskgroupOpGen.setAction(Action); 2318 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2319 } 2320 2321 /// Given an array of pointers to variables, project the address of a 2322 /// given variable. 2323 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2324 unsigned Index, const VarDecl *Var) { 2325 // Pull out the pointer to the variable. 2326 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2327 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2328 2329 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2330 return Address( 2331 CGF.Builder.CreateBitCast( 2332 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2333 ElemTy, CGF.getContext().getDeclAlign(Var)); 2334 } 2335 2336 static llvm::Value *emitCopyprivateCopyFunction( 2337 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2338 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2339 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2340 SourceLocation Loc) { 2341 ASTContext &C = CGM.getContext(); 2342 // void copy_func(void *LHSArg, void *RHSArg); 2343 FunctionArgList Args; 2344 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2345 ImplicitParamDecl::Other); 2346 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2347 ImplicitParamDecl::Other); 2348 Args.push_back(&LHSArg); 2349 Args.push_back(&RHSArg); 2350 const auto &CGFI = 2351 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2352 std::string Name = 2353 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2354 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2355 llvm::GlobalValue::InternalLinkage, Name, 2356 &CGM.getModule()); 2357 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2358 Fn->setDoesNotRecurse(); 2359 CodeGenFunction CGF(CGM); 2360 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2361 // Dest = (void*[n])(LHSArg); 2362 // Src = (void*[n])(RHSArg); 2363 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2364 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2365 ArgsElemType->getPointerTo()), 2366 ArgsElemType, CGF.getPointerAlign()); 2367 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2368 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2369 ArgsElemType->getPointerTo()), 2370 ArgsElemType, CGF.getPointerAlign()); 2371 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2372 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2373 // ... 2374 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2375 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2376 const auto *DestVar = 2377 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2378 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2379 2380 const auto *SrcVar = 2381 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2382 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2383 2384 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2385 QualType Type = VD->getType(); 2386 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2387 } 2388 CGF.FinishFunction(); 2389 return Fn; 2390 } 2391 2392 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2393 const RegionCodeGenTy &SingleOpGen, 2394 SourceLocation Loc, 2395 ArrayRef<const Expr *> CopyprivateVars, 2396 ArrayRef<const Expr *> SrcExprs, 2397 ArrayRef<const Expr *> DstExprs, 2398 ArrayRef<const Expr *> AssignmentOps) { 2399 if (!CGF.HaveInsertPoint()) 2400 return; 2401 assert(CopyprivateVars.size() == SrcExprs.size() && 2402 CopyprivateVars.size() == DstExprs.size() && 2403 CopyprivateVars.size() == AssignmentOps.size()); 2404 ASTContext &C = CGM.getContext(); 2405 // int32 did_it = 0; 2406 // if(__kmpc_single(ident_t *, gtid)) { 2407 // SingleOpGen(); 2408 // __kmpc_end_single(ident_t *, gtid); 2409 // did_it = 1; 2410 // } 2411 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2412 // <copy_func>, did_it); 2413 2414 Address DidIt = Address::invalid(); 2415 if (!CopyprivateVars.empty()) { 2416 // int32 did_it = 0; 2417 QualType KmpInt32Ty = 2418 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2419 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2420 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2421 } 2422 // Prepare arguments and build a call to __kmpc_single 2423 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2424 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2425 CGM.getModule(), OMPRTL___kmpc_single), 2426 Args, 2427 OMPBuilder.getOrCreateRuntimeFunction( 2428 CGM.getModule(), OMPRTL___kmpc_end_single), 2429 Args, 2430 /*Conditional=*/true); 2431 SingleOpGen.setAction(Action); 2432 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2433 if (DidIt.isValid()) { 2434 // did_it = 1; 2435 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2436 } 2437 Action.Done(CGF); 2438 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2439 // <copy_func>, did_it); 2440 if (DidIt.isValid()) { 2441 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2442 QualType CopyprivateArrayTy = C.getConstantArrayType( 2443 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2444 /*IndexTypeQuals=*/0); 2445 // Create a list of all private variables for copyprivate. 2446 Address CopyprivateList = 2447 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2448 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2449 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2450 CGF.Builder.CreateStore( 2451 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2452 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2453 CGF.VoidPtrTy), 2454 Elem); 2455 } 2456 // Build function that copies private values from single region to all other 2457 // threads in the corresponding parallel region. 2458 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2459 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2460 SrcExprs, DstExprs, AssignmentOps, Loc); 2461 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2462 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2463 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2464 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2465 llvm::Value *Args[] = { 2466 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2467 getThreadID(CGF, Loc), // i32 <gtid> 2468 BufSize, // size_t <buf_size> 2469 CL.getPointer(), // void *<copyprivate list> 2470 CpyFn, // void (*) (void *, void *) <copy_func> 2471 DidItVal // i32 did_it 2472 }; 2473 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2474 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2475 Args); 2476 } 2477 } 2478 2479 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2480 const RegionCodeGenTy &OrderedOpGen, 2481 SourceLocation Loc, bool IsThreads) { 2482 if (!CGF.HaveInsertPoint()) 2483 return; 2484 // __kmpc_ordered(ident_t *, gtid); 2485 // OrderedOpGen(); 2486 // __kmpc_end_ordered(ident_t *, gtid); 2487 // Prepare arguments and build a call to __kmpc_ordered 2488 if (IsThreads) { 2489 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2490 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2491 CGM.getModule(), OMPRTL___kmpc_ordered), 2492 Args, 2493 OMPBuilder.getOrCreateRuntimeFunction( 2494 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2495 Args); 2496 OrderedOpGen.setAction(Action); 2497 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2498 return; 2499 } 2500 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2501 } 2502 2503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2504 unsigned Flags; 2505 if (Kind == OMPD_for) 2506 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2507 else if (Kind == OMPD_sections) 2508 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2509 else if (Kind == OMPD_single) 2510 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2511 else if (Kind == OMPD_barrier) 2512 Flags = OMP_IDENT_BARRIER_EXPL; 2513 else 2514 Flags = OMP_IDENT_BARRIER_IMPL; 2515 return Flags; 2516 } 2517 2518 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2519 CodeGenFunction &CGF, const OMPLoopDirective &S, 2520 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2521 // Check if the loop directive is actually a doacross loop directive. In this 2522 // case choose static, 1 schedule. 2523 if (llvm::any_of( 2524 S.getClausesOfKind<OMPOrderedClause>(), 2525 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2526 ScheduleKind = OMPC_SCHEDULE_static; 2527 // Chunk size is 1 in this case. 2528 llvm::APInt ChunkSize(32, 1); 2529 ChunkExpr = IntegerLiteral::Create( 2530 CGF.getContext(), ChunkSize, 2531 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2532 SourceLocation()); 2533 } 2534 } 2535 2536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2537 OpenMPDirectiveKind Kind, bool EmitChecks, 2538 bool ForceSimpleCall) { 2539 // Check if we should use the OMPBuilder 2540 auto *OMPRegionInfo = 2541 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2542 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2543 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2544 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2545 return; 2546 } 2547 2548 if (!CGF.HaveInsertPoint()) 2549 return; 2550 // Build call __kmpc_cancel_barrier(loc, thread_id); 2551 // Build call __kmpc_barrier(loc, thread_id); 2552 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2553 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2554 // thread_id); 2555 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2556 getThreadID(CGF, Loc)}; 2557 if (OMPRegionInfo) { 2558 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2559 llvm::Value *Result = CGF.EmitRuntimeCall( 2560 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2561 OMPRTL___kmpc_cancel_barrier), 2562 Args); 2563 if (EmitChecks) { 2564 // if (__kmpc_cancel_barrier()) { 2565 // exit from construct; 2566 // } 2567 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2568 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2569 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2570 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2571 CGF.EmitBlock(ExitBB); 2572 // exit from construct; 2573 CodeGenFunction::JumpDest CancelDestination = 2574 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2575 CGF.EmitBranchThroughCleanup(CancelDestination); 2576 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2577 } 2578 return; 2579 } 2580 } 2581 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2582 CGM.getModule(), OMPRTL___kmpc_barrier), 2583 Args); 2584 } 2585 2586 /// Map the OpenMP loop schedule to the runtime enumeration. 2587 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2588 bool Chunked, bool Ordered) { 2589 switch (ScheduleKind) { 2590 case OMPC_SCHEDULE_static: 2591 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2592 : (Ordered ? OMP_ord_static : OMP_sch_static); 2593 case OMPC_SCHEDULE_dynamic: 2594 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2595 case OMPC_SCHEDULE_guided: 2596 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2597 case OMPC_SCHEDULE_runtime: 2598 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2599 case OMPC_SCHEDULE_auto: 2600 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2601 case OMPC_SCHEDULE_unknown: 2602 assert(!Chunked && "chunk was specified but schedule kind not known"); 2603 return Ordered ? OMP_ord_static : OMP_sch_static; 2604 } 2605 llvm_unreachable("Unexpected runtime schedule"); 2606 } 2607 2608 /// Map the OpenMP distribute schedule to the runtime enumeration. 2609 static OpenMPSchedType 2610 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2611 // only static is allowed for dist_schedule 2612 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2613 } 2614 2615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2616 bool Chunked) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2619 return Schedule == OMP_sch_static; 2620 } 2621 2622 bool CGOpenMPRuntime::isStaticNonchunked( 2623 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2624 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2625 return Schedule == OMP_dist_sch_static; 2626 } 2627 2628 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2629 bool Chunked) const { 2630 OpenMPSchedType Schedule = 2631 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2632 return Schedule == OMP_sch_static_chunked; 2633 } 2634 2635 bool CGOpenMPRuntime::isStaticChunked( 2636 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2637 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2638 return Schedule == OMP_dist_sch_static_chunked; 2639 } 2640 2641 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2642 OpenMPSchedType Schedule = 2643 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2644 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2645 return Schedule != OMP_sch_static; 2646 } 2647 2648 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2649 OpenMPScheduleClauseModifier M1, 2650 OpenMPScheduleClauseModifier M2) { 2651 int Modifier = 0; 2652 switch (M1) { 2653 case OMPC_SCHEDULE_MODIFIER_monotonic: 2654 Modifier = OMP_sch_modifier_monotonic; 2655 break; 2656 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2657 Modifier = OMP_sch_modifier_nonmonotonic; 2658 break; 2659 case OMPC_SCHEDULE_MODIFIER_simd: 2660 if (Schedule == OMP_sch_static_chunked) 2661 Schedule = OMP_sch_static_balanced_chunked; 2662 break; 2663 case OMPC_SCHEDULE_MODIFIER_last: 2664 case OMPC_SCHEDULE_MODIFIER_unknown: 2665 break; 2666 } 2667 switch (M2) { 2668 case OMPC_SCHEDULE_MODIFIER_monotonic: 2669 Modifier = OMP_sch_modifier_monotonic; 2670 break; 2671 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2672 Modifier = OMP_sch_modifier_nonmonotonic; 2673 break; 2674 case OMPC_SCHEDULE_MODIFIER_simd: 2675 if (Schedule == OMP_sch_static_chunked) 2676 Schedule = OMP_sch_static_balanced_chunked; 2677 break; 2678 case OMPC_SCHEDULE_MODIFIER_last: 2679 case OMPC_SCHEDULE_MODIFIER_unknown: 2680 break; 2681 } 2682 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2683 // If the static schedule kind is specified or if the ordered clause is 2684 // specified, and if the nonmonotonic modifier is not specified, the effect is 2685 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2686 // modifier is specified, the effect is as if the nonmonotonic modifier is 2687 // specified. 2688 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2689 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2690 Schedule == OMP_sch_static_balanced_chunked || 2691 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2692 Schedule == OMP_dist_sch_static_chunked || 2693 Schedule == OMP_dist_sch_static)) 2694 Modifier = OMP_sch_modifier_nonmonotonic; 2695 } 2696 return Schedule | Modifier; 2697 } 2698 2699 void CGOpenMPRuntime::emitForDispatchInit( 2700 CodeGenFunction &CGF, SourceLocation Loc, 2701 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2702 bool Ordered, const DispatchRTInput &DispatchValues) { 2703 if (!CGF.HaveInsertPoint()) 2704 return; 2705 OpenMPSchedType Schedule = getRuntimeSchedule( 2706 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2707 assert(Ordered || 2708 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2709 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2710 Schedule != OMP_sch_static_balanced_chunked)); 2711 // Call __kmpc_dispatch_init( 2712 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2713 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2714 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2715 2716 // If the Chunk was not specified in the clause - use default value 1. 2717 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2718 : CGF.Builder.getIntN(IVSize, 1); 2719 llvm::Value *Args[] = { 2720 emitUpdateLocation(CGF, Loc), 2721 getThreadID(CGF, Loc), 2722 CGF.Builder.getInt32(addMonoNonMonoModifier( 2723 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2724 DispatchValues.LB, // Lower 2725 DispatchValues.UB, // Upper 2726 CGF.Builder.getIntN(IVSize, 1), // Stride 2727 Chunk // Chunk 2728 }; 2729 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2730 } 2731 2732 static void emitForStaticInitCall( 2733 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2734 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2735 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2736 const CGOpenMPRuntime::StaticRTInput &Values) { 2737 if (!CGF.HaveInsertPoint()) 2738 return; 2739 2740 assert(!Values.Ordered); 2741 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2742 Schedule == OMP_sch_static_balanced_chunked || 2743 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2744 Schedule == OMP_dist_sch_static || 2745 Schedule == OMP_dist_sch_static_chunked); 2746 2747 // Call __kmpc_for_static_init( 2748 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2749 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2750 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2751 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2752 llvm::Value *Chunk = Values.Chunk; 2753 if (Chunk == nullptr) { 2754 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2755 Schedule == OMP_dist_sch_static) && 2756 "expected static non-chunked schedule"); 2757 // If the Chunk was not specified in the clause - use default value 1. 2758 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2759 } else { 2760 assert((Schedule == OMP_sch_static_chunked || 2761 Schedule == OMP_sch_static_balanced_chunked || 2762 Schedule == OMP_ord_static_chunked || 2763 Schedule == OMP_dist_sch_static_chunked) && 2764 "expected static chunked schedule"); 2765 } 2766 llvm::Value *Args[] = { 2767 UpdateLocation, 2768 ThreadId, 2769 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2770 M2)), // Schedule type 2771 Values.IL.getPointer(), // &isLastIter 2772 Values.LB.getPointer(), // &LB 2773 Values.UB.getPointer(), // &UB 2774 Values.ST.getPointer(), // &Stride 2775 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2776 Chunk // Chunk 2777 }; 2778 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2779 } 2780 2781 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2782 SourceLocation Loc, 2783 OpenMPDirectiveKind DKind, 2784 const OpenMPScheduleTy &ScheduleKind, 2785 const StaticRTInput &Values) { 2786 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2787 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2788 assert(isOpenMPWorksharingDirective(DKind) && 2789 "Expected loop-based or sections-based directive."); 2790 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2791 isOpenMPLoopDirective(DKind) 2792 ? OMP_IDENT_WORK_LOOP 2793 : OMP_IDENT_WORK_SECTIONS); 2794 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2795 llvm::FunctionCallee StaticInitFunction = 2796 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2797 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2798 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2799 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2800 } 2801 2802 void CGOpenMPRuntime::emitDistributeStaticInit( 2803 CodeGenFunction &CGF, SourceLocation Loc, 2804 OpenMPDistScheduleClauseKind SchedKind, 2805 const CGOpenMPRuntime::StaticRTInput &Values) { 2806 OpenMPSchedType ScheduleNum = 2807 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2808 llvm::Value *UpdatedLocation = 2809 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2810 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2811 llvm::FunctionCallee StaticInitFunction; 2812 bool isGPUDistribute = 2813 CGM.getLangOpts().OpenMPIsDevice && 2814 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2815 StaticInitFunction = createForStaticInitFunction( 2816 Values.IVSize, Values.IVSigned, isGPUDistribute); 2817 2818 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2819 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2820 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2821 } 2822 2823 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2824 SourceLocation Loc, 2825 OpenMPDirectiveKind DKind) { 2826 if (!CGF.HaveInsertPoint()) 2827 return; 2828 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2829 llvm::Value *Args[] = { 2830 emitUpdateLocation(CGF, Loc, 2831 isOpenMPDistributeDirective(DKind) 2832 ? OMP_IDENT_WORK_DISTRIBUTE 2833 : isOpenMPLoopDirective(DKind) 2834 ? OMP_IDENT_WORK_LOOP 2835 : OMP_IDENT_WORK_SECTIONS), 2836 getThreadID(CGF, Loc)}; 2837 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2838 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2839 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2840 CGF.EmitRuntimeCall( 2841 OMPBuilder.getOrCreateRuntimeFunction( 2842 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2843 Args); 2844 else 2845 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2846 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2847 Args); 2848 } 2849 2850 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2851 SourceLocation Loc, 2852 unsigned IVSize, 2853 bool IVSigned) { 2854 if (!CGF.HaveInsertPoint()) 2855 return; 2856 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2857 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2858 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2859 } 2860 2861 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2862 SourceLocation Loc, unsigned IVSize, 2863 bool IVSigned, Address IL, 2864 Address LB, Address UB, 2865 Address ST) { 2866 // Call __kmpc_dispatch_next( 2867 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2868 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2869 // kmp_int[32|64] *p_stride); 2870 llvm::Value *Args[] = { 2871 emitUpdateLocation(CGF, Loc), 2872 getThreadID(CGF, Loc), 2873 IL.getPointer(), // &isLastIter 2874 LB.getPointer(), // &Lower 2875 UB.getPointer(), // &Upper 2876 ST.getPointer() // &Stride 2877 }; 2878 llvm::Value *Call = 2879 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2880 return CGF.EmitScalarConversion( 2881 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2882 CGF.getContext().BoolTy, Loc); 2883 } 2884 2885 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2886 llvm::Value *NumThreads, 2887 SourceLocation Loc) { 2888 if (!CGF.HaveInsertPoint()) 2889 return; 2890 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2891 llvm::Value *Args[] = { 2892 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2893 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2894 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2895 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2896 Args); 2897 } 2898 2899 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2900 ProcBindKind ProcBind, 2901 SourceLocation Loc) { 2902 if (!CGF.HaveInsertPoint()) 2903 return; 2904 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2905 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2906 llvm::Value *Args[] = { 2907 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2908 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2910 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2911 Args); 2912 } 2913 2914 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2915 SourceLocation Loc, llvm::AtomicOrdering AO) { 2916 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2917 OMPBuilder.createFlush(CGF.Builder); 2918 } else { 2919 if (!CGF.HaveInsertPoint()) 2920 return; 2921 // Build call void __kmpc_flush(ident_t *loc) 2922 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2923 CGM.getModule(), OMPRTL___kmpc_flush), 2924 emitUpdateLocation(CGF, Loc)); 2925 } 2926 } 2927 2928 namespace { 2929 /// Indexes of fields for type kmp_task_t. 2930 enum KmpTaskTFields { 2931 /// List of shared variables. 2932 KmpTaskTShareds, 2933 /// Task routine. 2934 KmpTaskTRoutine, 2935 /// Partition id for the untied tasks. 2936 KmpTaskTPartId, 2937 /// Function with call of destructors for private variables. 2938 Data1, 2939 /// Task priority. 2940 Data2, 2941 /// (Taskloops only) Lower bound. 2942 KmpTaskTLowerBound, 2943 /// (Taskloops only) Upper bound. 2944 KmpTaskTUpperBound, 2945 /// (Taskloops only) Stride. 2946 KmpTaskTStride, 2947 /// (Taskloops only) Is last iteration flag. 2948 KmpTaskTLastIter, 2949 /// (Taskloops only) Reduction data. 2950 KmpTaskTReductions, 2951 }; 2952 } // anonymous namespace 2953 2954 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2955 return OffloadEntriesTargetRegion.empty() && 2956 OffloadEntriesDeviceGlobalVar.empty(); 2957 } 2958 2959 /// Initialize target region entry. 2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2961 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2962 StringRef ParentName, unsigned LineNum, 2963 unsigned Order) { 2964 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2965 "only required for the device " 2966 "code generation."); 2967 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2968 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2969 OMPTargetRegionEntryTargetRegion); 2970 ++OffloadingEntriesNum; 2971 } 2972 2973 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2974 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2975 StringRef ParentName, unsigned LineNum, 2976 llvm::Constant *Addr, llvm::Constant *ID, 2977 OMPTargetRegionEntryKind Flags) { 2978 // If we are emitting code for a target, the entry is already initialized, 2979 // only has to be registered. 2980 if (CGM.getLangOpts().OpenMPIsDevice) { 2981 // This could happen if the device compilation is invoked standalone. 2982 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2983 return; 2984 auto &Entry = 2985 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2986 Entry.setAddress(Addr); 2987 Entry.setID(ID); 2988 Entry.setFlags(Flags); 2989 } else { 2990 if (Flags == 2991 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2992 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2993 /*IgnoreAddressId*/ true)) 2994 return; 2995 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2996 "Target region entry already registered!"); 2997 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2998 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2999 ++OffloadingEntriesNum; 3000 } 3001 } 3002 3003 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3004 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3005 bool IgnoreAddressId) const { 3006 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3007 if (PerDevice == OffloadEntriesTargetRegion.end()) 3008 return false; 3009 auto PerFile = PerDevice->second.find(FileID); 3010 if (PerFile == PerDevice->second.end()) 3011 return false; 3012 auto PerParentName = PerFile->second.find(ParentName); 3013 if (PerParentName == PerFile->second.end()) 3014 return false; 3015 auto PerLine = PerParentName->second.find(LineNum); 3016 if (PerLine == PerParentName->second.end()) 3017 return false; 3018 // Fail if this entry is already registered. 3019 if (!IgnoreAddressId && 3020 (PerLine->second.getAddress() || PerLine->second.getID())) 3021 return false; 3022 return true; 3023 } 3024 3025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3026 const OffloadTargetRegionEntryInfoActTy &Action) { 3027 // Scan all target region entries and perform the provided action. 3028 for (const auto &D : OffloadEntriesTargetRegion) 3029 for (const auto &F : D.second) 3030 for (const auto &P : F.second) 3031 for (const auto &L : P.second) 3032 Action(D.first, F.first, P.first(), L.first, L.second); 3033 } 3034 3035 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3036 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3037 OMPTargetGlobalVarEntryKind Flags, 3038 unsigned Order) { 3039 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3040 "only required for the device " 3041 "code generation."); 3042 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3043 ++OffloadingEntriesNum; 3044 } 3045 3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3047 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3048 CharUnits VarSize, 3049 OMPTargetGlobalVarEntryKind Flags, 3050 llvm::GlobalValue::LinkageTypes Linkage) { 3051 if (CGM.getLangOpts().OpenMPIsDevice) { 3052 // This could happen if the device compilation is invoked standalone. 3053 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3054 return; 3055 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3056 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3057 if (Entry.getVarSize().isZero()) { 3058 Entry.setVarSize(VarSize); 3059 Entry.setLinkage(Linkage); 3060 } 3061 return; 3062 } 3063 Entry.setVarSize(VarSize); 3064 Entry.setLinkage(Linkage); 3065 Entry.setAddress(Addr); 3066 } else { 3067 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3068 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3069 assert(Entry.isValid() && Entry.getFlags() == Flags && 3070 "Entry not initialized!"); 3071 if (Entry.getVarSize().isZero()) { 3072 Entry.setVarSize(VarSize); 3073 Entry.setLinkage(Linkage); 3074 } 3075 return; 3076 } 3077 OffloadEntriesDeviceGlobalVar.try_emplace( 3078 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3079 ++OffloadingEntriesNum; 3080 } 3081 } 3082 3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3084 actOnDeviceGlobalVarEntriesInfo( 3085 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3086 // Scan all target region entries and perform the provided action. 3087 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3088 Action(E.getKey(), E.getValue()); 3089 } 3090 3091 void CGOpenMPRuntime::createOffloadEntry( 3092 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3093 llvm::GlobalValue::LinkageTypes Linkage) { 3094 OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags); 3095 } 3096 3097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3098 // Emit the offloading entries and metadata so that the device codegen side 3099 // can easily figure out what to emit. The produced metadata looks like 3100 // this: 3101 // 3102 // !omp_offload.info = !{!1, ...} 3103 // 3104 // Right now we only generate metadata for function that contain target 3105 // regions. 3106 3107 // If we are in simd mode or there are no entries, we don't need to do 3108 // anything. 3109 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3110 return; 3111 3112 llvm::Module &M = CGM.getModule(); 3113 llvm::LLVMContext &C = M.getContext(); 3114 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3115 SourceLocation, StringRef>, 3116 16> 3117 OrderedEntries(OffloadEntriesInfoManager.size()); 3118 llvm::SmallVector<StringRef, 16> ParentFunctions( 3119 OffloadEntriesInfoManager.size()); 3120 3121 // Auxiliary methods to create metadata values and strings. 3122 auto &&GetMDInt = [this](unsigned V) { 3123 return llvm::ConstantAsMetadata::get( 3124 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3125 }; 3126 3127 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3128 3129 // Create the offloading info metadata node. 3130 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3131 3132 // Create function that emits metadata for each target region entry; 3133 auto &&TargetRegionMetadataEmitter = 3134 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3135 &GetMDString]( 3136 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3137 unsigned Line, 3138 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3139 // Generate metadata for target regions. Each entry of this metadata 3140 // contains: 3141 // - Entry 0 -> Kind of this type of metadata (0). 3142 // - Entry 1 -> Device ID of the file where the entry was identified. 3143 // - Entry 2 -> File ID of the file where the entry was identified. 3144 // - Entry 3 -> Mangled name of the function where the entry was 3145 // identified. 3146 // - Entry 4 -> Line in the file where the entry was identified. 3147 // - Entry 5 -> Order the entry was created. 3148 // The first element of the metadata node is the kind. 3149 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3150 GetMDInt(FileID), GetMDString(ParentName), 3151 GetMDInt(Line), GetMDInt(E.getOrder())}; 3152 3153 SourceLocation Loc; 3154 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3155 E = CGM.getContext().getSourceManager().fileinfo_end(); 3156 I != E; ++I) { 3157 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3158 I->getFirst()->getUniqueID().getFile() == FileID) { 3159 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3160 I->getFirst(), Line, 1); 3161 break; 3162 } 3163 } 3164 // Save this entry in the right position of the ordered entries array. 3165 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3166 ParentFunctions[E.getOrder()] = ParentName; 3167 3168 // Add metadata to the named metadata node. 3169 MD->addOperand(llvm::MDNode::get(C, Ops)); 3170 }; 3171 3172 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3173 TargetRegionMetadataEmitter); 3174 3175 // Create function that emits metadata for each device global variable entry; 3176 auto &&DeviceGlobalVarMetadataEmitter = 3177 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3178 MD](StringRef MangledName, 3179 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3180 &E) { 3181 // Generate metadata for global variables. Each entry of this metadata 3182 // contains: 3183 // - Entry 0 -> Kind of this type of metadata (1). 3184 // - Entry 1 -> Mangled name of the variable. 3185 // - Entry 2 -> Declare target kind. 3186 // - Entry 3 -> Order the entry was created. 3187 // The first element of the metadata node is the kind. 3188 llvm::Metadata *Ops[] = { 3189 GetMDInt(E.getKind()), GetMDString(MangledName), 3190 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3191 3192 // Save this entry in the right position of the ordered entries array. 3193 OrderedEntries[E.getOrder()] = 3194 std::make_tuple(&E, SourceLocation(), MangledName); 3195 3196 // Add metadata to the named metadata node. 3197 MD->addOperand(llvm::MDNode::get(C, Ops)); 3198 }; 3199 3200 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3201 DeviceGlobalVarMetadataEmitter); 3202 3203 for (const auto &E : OrderedEntries) { 3204 assert(std::get<0>(E) && "All ordered entries must exist!"); 3205 if (const auto *CE = 3206 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3207 std::get<0>(E))) { 3208 if (!CE->getID() || !CE->getAddress()) { 3209 // Do not blame the entry if the parent funtion is not emitted. 3210 StringRef FnName = ParentFunctions[CE->getOrder()]; 3211 if (!CGM.GetGlobalValue(FnName)) 3212 continue; 3213 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3214 DiagnosticsEngine::Error, 3215 "Offloading entry for target region in %0 is incorrect: either the " 3216 "address or the ID is invalid."); 3217 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3218 continue; 3219 } 3220 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3221 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3222 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3223 OffloadEntryInfoDeviceGlobalVar>( 3224 std::get<0>(E))) { 3225 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3226 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3227 CE->getFlags()); 3228 switch (Flags) { 3229 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3230 if (CGM.getLangOpts().OpenMPIsDevice && 3231 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3232 continue; 3233 if (!CE->getAddress()) { 3234 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3235 DiagnosticsEngine::Error, "Offloading entry for declare target " 3236 "variable %0 is incorrect: the " 3237 "address is invalid."); 3238 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3239 continue; 3240 } 3241 // The vaiable has no definition - no need to add the entry. 3242 if (CE->getVarSize().isZero()) 3243 continue; 3244 break; 3245 } 3246 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3247 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3248 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3249 "Declaret target link address is set."); 3250 if (CGM.getLangOpts().OpenMPIsDevice) 3251 continue; 3252 if (!CE->getAddress()) { 3253 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3254 DiagnosticsEngine::Error, 3255 "Offloading entry for declare target variable is incorrect: the " 3256 "address is invalid."); 3257 CGM.getDiags().Report(DiagID); 3258 continue; 3259 } 3260 break; 3261 } 3262 3263 // Hidden or internal symbols on the device are not externally visible. We 3264 // should not attempt to register them by creating an offloading entry. 3265 if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress())) 3266 if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) 3267 continue; 3268 3269 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3270 CE->getVarSize().getQuantity(), Flags, 3271 CE->getLinkage()); 3272 } else { 3273 llvm_unreachable("Unsupported entry kind."); 3274 } 3275 } 3276 } 3277 3278 /// Loads all the offload entries information from the host IR 3279 /// metadata. 3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3281 // If we are in target mode, load the metadata from the host IR. This code has 3282 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3283 3284 if (!CGM.getLangOpts().OpenMPIsDevice) 3285 return; 3286 3287 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3288 return; 3289 3290 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3291 if (auto EC = Buf.getError()) { 3292 CGM.getDiags().Report(diag::err_cannot_open_file) 3293 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3294 return; 3295 } 3296 3297 llvm::LLVMContext C; 3298 auto ME = expectedToErrorOrAndEmitErrors( 3299 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3300 3301 if (auto EC = ME.getError()) { 3302 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3303 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3304 CGM.getDiags().Report(DiagID) 3305 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3306 return; 3307 } 3308 3309 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3310 if (!MD) 3311 return; 3312 3313 for (llvm::MDNode *MN : MD->operands()) { 3314 auto &&GetMDInt = [MN](unsigned Idx) { 3315 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3316 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3317 }; 3318 3319 auto &&GetMDString = [MN](unsigned Idx) { 3320 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3321 return V->getString(); 3322 }; 3323 3324 switch (GetMDInt(0)) { 3325 default: 3326 llvm_unreachable("Unexpected metadata!"); 3327 break; 3328 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3329 OffloadingEntryInfoTargetRegion: 3330 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3331 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3332 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3333 /*Order=*/GetMDInt(5)); 3334 break; 3335 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3336 OffloadingEntryInfoDeviceGlobalVar: 3337 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3338 /*MangledName=*/GetMDString(1), 3339 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3340 /*Flags=*/GetMDInt(2)), 3341 /*Order=*/GetMDInt(3)); 3342 break; 3343 } 3344 } 3345 } 3346 3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3348 if (!KmpRoutineEntryPtrTy) { 3349 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3350 ASTContext &C = CGM.getContext(); 3351 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3352 FunctionProtoType::ExtProtoInfo EPI; 3353 KmpRoutineEntryPtrQTy = C.getPointerType( 3354 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3355 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3356 } 3357 } 3358 3359 namespace { 3360 struct PrivateHelpersTy { 3361 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3362 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3363 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3364 PrivateElemInit(PrivateElemInit) {} 3365 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3366 const Expr *OriginalRef = nullptr; 3367 const VarDecl *Original = nullptr; 3368 const VarDecl *PrivateCopy = nullptr; 3369 const VarDecl *PrivateElemInit = nullptr; 3370 bool isLocalPrivate() const { 3371 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3372 } 3373 }; 3374 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3375 } // anonymous namespace 3376 3377 static bool isAllocatableDecl(const VarDecl *VD) { 3378 const VarDecl *CVD = VD->getCanonicalDecl(); 3379 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3380 return false; 3381 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3382 // Use the default allocation. 3383 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3384 !AA->getAllocator()); 3385 } 3386 3387 static RecordDecl * 3388 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3389 if (!Privates.empty()) { 3390 ASTContext &C = CGM.getContext(); 3391 // Build struct .kmp_privates_t. { 3392 // /* private vars */ 3393 // }; 3394 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3395 RD->startDefinition(); 3396 for (const auto &Pair : Privates) { 3397 const VarDecl *VD = Pair.second.Original; 3398 QualType Type = VD->getType().getNonReferenceType(); 3399 // If the private variable is a local variable with lvalue ref type, 3400 // allocate the pointer instead of the pointee type. 3401 if (Pair.second.isLocalPrivate()) { 3402 if (VD->getType()->isLValueReferenceType()) 3403 Type = C.getPointerType(Type); 3404 if (isAllocatableDecl(VD)) 3405 Type = C.getPointerType(Type); 3406 } 3407 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3408 if (VD->hasAttrs()) { 3409 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3410 E(VD->getAttrs().end()); 3411 I != E; ++I) 3412 FD->addAttr(*I); 3413 } 3414 } 3415 RD->completeDefinition(); 3416 return RD; 3417 } 3418 return nullptr; 3419 } 3420 3421 static RecordDecl * 3422 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3423 QualType KmpInt32Ty, 3424 QualType KmpRoutineEntryPointerQTy) { 3425 ASTContext &C = CGM.getContext(); 3426 // Build struct kmp_task_t { 3427 // void * shareds; 3428 // kmp_routine_entry_t routine; 3429 // kmp_int32 part_id; 3430 // kmp_cmplrdata_t data1; 3431 // kmp_cmplrdata_t data2; 3432 // For taskloops additional fields: 3433 // kmp_uint64 lb; 3434 // kmp_uint64 ub; 3435 // kmp_int64 st; 3436 // kmp_int32 liter; 3437 // void * reductions; 3438 // }; 3439 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3440 UD->startDefinition(); 3441 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3442 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3443 UD->completeDefinition(); 3444 QualType KmpCmplrdataTy = C.getRecordType(UD); 3445 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3446 RD->startDefinition(); 3447 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3448 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3449 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3450 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3451 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3452 if (isOpenMPTaskLoopDirective(Kind)) { 3453 QualType KmpUInt64Ty = 3454 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3455 QualType KmpInt64Ty = 3456 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3457 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3458 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3459 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3460 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3461 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3462 } 3463 RD->completeDefinition(); 3464 return RD; 3465 } 3466 3467 static RecordDecl * 3468 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3469 ArrayRef<PrivateDataTy> Privates) { 3470 ASTContext &C = CGM.getContext(); 3471 // Build struct kmp_task_t_with_privates { 3472 // kmp_task_t task_data; 3473 // .kmp_privates_t. privates; 3474 // }; 3475 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3476 RD->startDefinition(); 3477 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3478 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3479 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3480 RD->completeDefinition(); 3481 return RD; 3482 } 3483 3484 /// Emit a proxy function which accepts kmp_task_t as the second 3485 /// argument. 3486 /// \code 3487 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3488 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3489 /// For taskloops: 3490 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3491 /// tt->reductions, tt->shareds); 3492 /// return 0; 3493 /// } 3494 /// \endcode 3495 static llvm::Function * 3496 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3497 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3498 QualType KmpTaskTWithPrivatesPtrQTy, 3499 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3500 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3501 llvm::Value *TaskPrivatesMap) { 3502 ASTContext &C = CGM.getContext(); 3503 FunctionArgList Args; 3504 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3505 ImplicitParamDecl::Other); 3506 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3507 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3508 ImplicitParamDecl::Other); 3509 Args.push_back(&GtidArg); 3510 Args.push_back(&TaskTypeArg); 3511 const auto &TaskEntryFnInfo = 3512 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3513 llvm::FunctionType *TaskEntryTy = 3514 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3515 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3516 auto *TaskEntry = llvm::Function::Create( 3517 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3518 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3519 TaskEntry->setDoesNotRecurse(); 3520 CodeGenFunction CGF(CGM); 3521 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3522 Loc, Loc); 3523 3524 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3525 // tt, 3526 // For taskloops: 3527 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3528 // tt->task_data.shareds); 3529 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3530 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3531 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3532 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3533 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3534 const auto *KmpTaskTWithPrivatesQTyRD = 3535 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3536 LValue Base = 3537 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3538 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3539 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3540 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3541 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3542 3543 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3544 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3545 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3546 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3547 CGF.ConvertTypeForMem(SharedsPtrTy)); 3548 3549 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3550 llvm::Value *PrivatesParam; 3551 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3552 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3553 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3554 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3555 } else { 3556 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3557 } 3558 3559 llvm::Value *CommonArgs[] = { 3560 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3561 CGF.Builder 3562 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3563 CGF.VoidPtrTy, CGF.Int8Ty) 3564 .getPointer()}; 3565 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3566 std::end(CommonArgs)); 3567 if (isOpenMPTaskLoopDirective(Kind)) { 3568 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3569 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3570 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3571 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3572 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3573 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3574 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3575 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3576 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3577 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3578 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3579 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3580 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3581 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3582 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3583 CallArgs.push_back(LBParam); 3584 CallArgs.push_back(UBParam); 3585 CallArgs.push_back(StParam); 3586 CallArgs.push_back(LIParam); 3587 CallArgs.push_back(RParam); 3588 } 3589 CallArgs.push_back(SharedsParam); 3590 3591 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3592 CallArgs); 3593 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3594 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3595 CGF.FinishFunction(); 3596 return TaskEntry; 3597 } 3598 3599 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3600 SourceLocation Loc, 3601 QualType KmpInt32Ty, 3602 QualType KmpTaskTWithPrivatesPtrQTy, 3603 QualType KmpTaskTWithPrivatesQTy) { 3604 ASTContext &C = CGM.getContext(); 3605 FunctionArgList Args; 3606 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3607 ImplicitParamDecl::Other); 3608 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3609 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3610 ImplicitParamDecl::Other); 3611 Args.push_back(&GtidArg); 3612 Args.push_back(&TaskTypeArg); 3613 const auto &DestructorFnInfo = 3614 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3615 llvm::FunctionType *DestructorFnTy = 3616 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3617 std::string Name = 3618 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3619 auto *DestructorFn = 3620 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3621 Name, &CGM.getModule()); 3622 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3623 DestructorFnInfo); 3624 DestructorFn->setDoesNotRecurse(); 3625 CodeGenFunction CGF(CGM); 3626 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3627 Args, Loc, Loc); 3628 3629 LValue Base = CGF.EmitLoadOfPointerLValue( 3630 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3631 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3632 const auto *KmpTaskTWithPrivatesQTyRD = 3633 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3634 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3635 Base = CGF.EmitLValueForField(Base, *FI); 3636 for (const auto *Field : 3637 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3638 if (QualType::DestructionKind DtorKind = 3639 Field->getType().isDestructedType()) { 3640 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3641 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3642 } 3643 } 3644 CGF.FinishFunction(); 3645 return DestructorFn; 3646 } 3647 3648 /// Emit a privates mapping function for correct handling of private and 3649 /// firstprivate variables. 3650 /// \code 3651 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3652 /// **noalias priv1,..., <tyn> **noalias privn) { 3653 /// *priv1 = &.privates.priv1; 3654 /// ...; 3655 /// *privn = &.privates.privn; 3656 /// } 3657 /// \endcode 3658 static llvm::Value * 3659 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3660 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3661 ArrayRef<PrivateDataTy> Privates) { 3662 ASTContext &C = CGM.getContext(); 3663 FunctionArgList Args; 3664 ImplicitParamDecl TaskPrivatesArg( 3665 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3666 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3667 ImplicitParamDecl::Other); 3668 Args.push_back(&TaskPrivatesArg); 3669 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3670 unsigned Counter = 1; 3671 for (const Expr *E : Data.PrivateVars) { 3672 Args.push_back(ImplicitParamDecl::Create( 3673 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3674 C.getPointerType(C.getPointerType(E->getType())) 3675 .withConst() 3676 .withRestrict(), 3677 ImplicitParamDecl::Other)); 3678 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3679 PrivateVarsPos[VD] = Counter; 3680 ++Counter; 3681 } 3682 for (const Expr *E : Data.FirstprivateVars) { 3683 Args.push_back(ImplicitParamDecl::Create( 3684 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3685 C.getPointerType(C.getPointerType(E->getType())) 3686 .withConst() 3687 .withRestrict(), 3688 ImplicitParamDecl::Other)); 3689 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3690 PrivateVarsPos[VD] = Counter; 3691 ++Counter; 3692 } 3693 for (const Expr *E : Data.LastprivateVars) { 3694 Args.push_back(ImplicitParamDecl::Create( 3695 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3696 C.getPointerType(C.getPointerType(E->getType())) 3697 .withConst() 3698 .withRestrict(), 3699 ImplicitParamDecl::Other)); 3700 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3701 PrivateVarsPos[VD] = Counter; 3702 ++Counter; 3703 } 3704 for (const VarDecl *VD : Data.PrivateLocals) { 3705 QualType Ty = VD->getType().getNonReferenceType(); 3706 if (VD->getType()->isLValueReferenceType()) 3707 Ty = C.getPointerType(Ty); 3708 if (isAllocatableDecl(VD)) 3709 Ty = C.getPointerType(Ty); 3710 Args.push_back(ImplicitParamDecl::Create( 3711 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3712 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3713 ImplicitParamDecl::Other)); 3714 PrivateVarsPos[VD] = Counter; 3715 ++Counter; 3716 } 3717 const auto &TaskPrivatesMapFnInfo = 3718 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3719 llvm::FunctionType *TaskPrivatesMapTy = 3720 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3721 std::string Name = 3722 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3723 auto *TaskPrivatesMap = llvm::Function::Create( 3724 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3725 &CGM.getModule()); 3726 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3727 TaskPrivatesMapFnInfo); 3728 if (CGM.getLangOpts().Optimize) { 3729 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3730 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3731 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3732 } 3733 CodeGenFunction CGF(CGM); 3734 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3735 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3736 3737 // *privi = &.privates.privi; 3738 LValue Base = CGF.EmitLoadOfPointerLValue( 3739 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3740 TaskPrivatesArg.getType()->castAs<PointerType>()); 3741 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3742 Counter = 0; 3743 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3744 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3745 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3746 LValue RefLVal = 3747 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3748 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3749 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3750 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3751 ++Counter; 3752 } 3753 CGF.FinishFunction(); 3754 return TaskPrivatesMap; 3755 } 3756 3757 /// Emit initialization for private variables in task-based directives. 3758 static void emitPrivatesInit(CodeGenFunction &CGF, 3759 const OMPExecutableDirective &D, 3760 Address KmpTaskSharedsPtr, LValue TDBase, 3761 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3762 QualType SharedsTy, QualType SharedsPtrTy, 3763 const OMPTaskDataTy &Data, 3764 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3765 ASTContext &C = CGF.getContext(); 3766 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3767 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3768 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3769 ? OMPD_taskloop 3770 : OMPD_task; 3771 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3772 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3773 LValue SrcBase; 3774 bool IsTargetTask = 3775 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3776 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3777 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3778 // PointersArray, SizesArray, and MappersArray. The original variables for 3779 // these arrays are not captured and we get their addresses explicitly. 3780 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3781 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3782 SrcBase = CGF.MakeAddrLValue( 3783 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3784 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3785 CGF.ConvertTypeForMem(SharedsTy)), 3786 SharedsTy); 3787 } 3788 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3789 for (const PrivateDataTy &Pair : Privates) { 3790 // Do not initialize private locals. 3791 if (Pair.second.isLocalPrivate()) { 3792 ++FI; 3793 continue; 3794 } 3795 const VarDecl *VD = Pair.second.PrivateCopy; 3796 const Expr *Init = VD->getAnyInitializer(); 3797 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3798 !CGF.isTrivialInitializer(Init)))) { 3799 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3800 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3801 const VarDecl *OriginalVD = Pair.second.Original; 3802 // Check if the variable is the target-based BasePointersArray, 3803 // PointersArray, SizesArray, or MappersArray. 3804 LValue SharedRefLValue; 3805 QualType Type = PrivateLValue.getType(); 3806 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3807 if (IsTargetTask && !SharedField) { 3808 assert(isa<ImplicitParamDecl>(OriginalVD) && 3809 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3810 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3811 ->getNumParams() == 0 && 3812 isa<TranslationUnitDecl>( 3813 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3814 ->getDeclContext()) && 3815 "Expected artificial target data variable."); 3816 SharedRefLValue = 3817 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3818 } else if (ForDup) { 3819 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3820 SharedRefLValue = CGF.MakeAddrLValue( 3821 SharedRefLValue.getAddress(CGF).withAlignment( 3822 C.getDeclAlign(OriginalVD)), 3823 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3824 SharedRefLValue.getTBAAInfo()); 3825 } else if (CGF.LambdaCaptureFields.count( 3826 Pair.second.Original->getCanonicalDecl()) > 0 || 3827 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3828 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3829 } else { 3830 // Processing for implicitly captured variables. 3831 InlinedOpenMPRegionRAII Region( 3832 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3833 /*HasCancel=*/false, /*NoInheritance=*/true); 3834 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3835 } 3836 if (Type->isArrayType()) { 3837 // Initialize firstprivate array. 3838 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3839 // Perform simple memcpy. 3840 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3841 } else { 3842 // Initialize firstprivate array using element-by-element 3843 // initialization. 3844 CGF.EmitOMPAggregateAssign( 3845 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3846 Type, 3847 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3848 Address SrcElement) { 3849 // Clean up any temporaries needed by the initialization. 3850 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3851 InitScope.addPrivate(Elem, SrcElement); 3852 (void)InitScope.Privatize(); 3853 // Emit initialization for single element. 3854 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3855 CGF, &CapturesInfo); 3856 CGF.EmitAnyExprToMem(Init, DestElement, 3857 Init->getType().getQualifiers(), 3858 /*IsInitializer=*/false); 3859 }); 3860 } 3861 } else { 3862 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3863 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3864 (void)InitScope.Privatize(); 3865 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3866 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3867 /*capturedByInit=*/false); 3868 } 3869 } else { 3870 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3871 } 3872 } 3873 ++FI; 3874 } 3875 } 3876 3877 /// Check if duplication function is required for taskloops. 3878 static bool checkInitIsRequired(CodeGenFunction &CGF, 3879 ArrayRef<PrivateDataTy> Privates) { 3880 bool InitRequired = false; 3881 for (const PrivateDataTy &Pair : Privates) { 3882 if (Pair.second.isLocalPrivate()) 3883 continue; 3884 const VarDecl *VD = Pair.second.PrivateCopy; 3885 const Expr *Init = VD->getAnyInitializer(); 3886 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3887 !CGF.isTrivialInitializer(Init)); 3888 if (InitRequired) 3889 break; 3890 } 3891 return InitRequired; 3892 } 3893 3894 3895 /// Emit task_dup function (for initialization of 3896 /// private/firstprivate/lastprivate vars and last_iter flag) 3897 /// \code 3898 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3899 /// lastpriv) { 3900 /// // setup lastprivate flag 3901 /// task_dst->last = lastpriv; 3902 /// // could be constructor calls here... 3903 /// } 3904 /// \endcode 3905 static llvm::Value * 3906 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3907 const OMPExecutableDirective &D, 3908 QualType KmpTaskTWithPrivatesPtrQTy, 3909 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3910 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3911 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3912 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3913 ASTContext &C = CGM.getContext(); 3914 FunctionArgList Args; 3915 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3916 KmpTaskTWithPrivatesPtrQTy, 3917 ImplicitParamDecl::Other); 3918 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3919 KmpTaskTWithPrivatesPtrQTy, 3920 ImplicitParamDecl::Other); 3921 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3922 ImplicitParamDecl::Other); 3923 Args.push_back(&DstArg); 3924 Args.push_back(&SrcArg); 3925 Args.push_back(&LastprivArg); 3926 const auto &TaskDupFnInfo = 3927 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3928 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3929 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3930 auto *TaskDup = llvm::Function::Create( 3931 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3932 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3933 TaskDup->setDoesNotRecurse(); 3934 CodeGenFunction CGF(CGM); 3935 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3936 Loc); 3937 3938 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3939 CGF.GetAddrOfLocalVar(&DstArg), 3940 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3941 // task_dst->liter = lastpriv; 3942 if (WithLastIter) { 3943 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3944 LValue Base = CGF.EmitLValueForField( 3945 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3946 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3947 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3948 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3949 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3950 } 3951 3952 // Emit initial values for private copies (if any). 3953 assert(!Privates.empty()); 3954 Address KmpTaskSharedsPtr = Address::invalid(); 3955 if (!Data.FirstprivateVars.empty()) { 3956 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3957 CGF.GetAddrOfLocalVar(&SrcArg), 3958 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3959 LValue Base = CGF.EmitLValueForField( 3960 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3961 KmpTaskSharedsPtr = Address( 3962 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3963 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3964 KmpTaskTShareds)), 3965 Loc), 3966 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3967 } 3968 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3969 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3970 CGF.FinishFunction(); 3971 return TaskDup; 3972 } 3973 3974 /// Checks if destructor function is required to be generated. 3975 /// \return true if cleanups are required, false otherwise. 3976 static bool 3977 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3978 ArrayRef<PrivateDataTy> Privates) { 3979 for (const PrivateDataTy &P : Privates) { 3980 if (P.second.isLocalPrivate()) 3981 continue; 3982 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3983 if (Ty.isDestructedType()) 3984 return true; 3985 } 3986 return false; 3987 } 3988 3989 namespace { 3990 /// Loop generator for OpenMP iterator expression. 3991 class OMPIteratorGeneratorScope final 3992 : public CodeGenFunction::OMPPrivateScope { 3993 CodeGenFunction &CGF; 3994 const OMPIteratorExpr *E = nullptr; 3995 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3996 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3997 OMPIteratorGeneratorScope() = delete; 3998 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3999 4000 public: 4001 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4002 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4003 if (!E) 4004 return; 4005 SmallVector<llvm::Value *, 4> Uppers; 4006 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4007 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4008 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4009 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 4010 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4011 addPrivate( 4012 HelperData.CounterVD, 4013 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 4014 } 4015 Privatize(); 4016 4017 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4018 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4019 LValue CLVal = 4020 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4021 HelperData.CounterVD->getType()); 4022 // Counter = 0; 4023 CGF.EmitStoreOfScalar( 4024 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4025 CLVal); 4026 CodeGenFunction::JumpDest &ContDest = 4027 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4028 CodeGenFunction::JumpDest &ExitDest = 4029 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4030 // N = <number-of_iterations>; 4031 llvm::Value *N = Uppers[I]; 4032 // cont: 4033 // if (Counter < N) goto body; else goto exit; 4034 CGF.EmitBlock(ContDest.getBlock()); 4035 auto *CVal = 4036 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4037 llvm::Value *Cmp = 4038 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4039 ? CGF.Builder.CreateICmpSLT(CVal, N) 4040 : CGF.Builder.CreateICmpULT(CVal, N); 4041 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4042 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4043 // body: 4044 CGF.EmitBlock(BodyBB); 4045 // Iteri = Begini + Counter * Stepi; 4046 CGF.EmitIgnoredExpr(HelperData.Update); 4047 } 4048 } 4049 ~OMPIteratorGeneratorScope() { 4050 if (!E) 4051 return; 4052 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4053 // Counter = Counter + 1; 4054 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4055 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4056 // goto cont; 4057 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4058 // exit: 4059 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4060 } 4061 } 4062 }; 4063 } // namespace 4064 4065 static std::pair<llvm::Value *, llvm::Value *> 4066 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4067 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4068 llvm::Value *Addr; 4069 if (OASE) { 4070 const Expr *Base = OASE->getBase(); 4071 Addr = CGF.EmitScalarExpr(Base); 4072 } else { 4073 Addr = CGF.EmitLValue(E).getPointer(CGF); 4074 } 4075 llvm::Value *SizeVal; 4076 QualType Ty = E->getType(); 4077 if (OASE) { 4078 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4079 for (const Expr *SE : OASE->getDimensions()) { 4080 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4081 Sz = CGF.EmitScalarConversion( 4082 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4083 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4084 } 4085 } else if (const auto *ASE = 4086 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4087 LValue UpAddrLVal = 4088 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4089 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4090 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4091 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4092 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4093 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4094 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4095 } else { 4096 SizeVal = CGF.getTypeSize(Ty); 4097 } 4098 return std::make_pair(Addr, SizeVal); 4099 } 4100 4101 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4102 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4103 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4104 if (KmpTaskAffinityInfoTy.isNull()) { 4105 RecordDecl *KmpAffinityInfoRD = 4106 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4107 KmpAffinityInfoRD->startDefinition(); 4108 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4109 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4110 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4111 KmpAffinityInfoRD->completeDefinition(); 4112 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4113 } 4114 } 4115 4116 CGOpenMPRuntime::TaskResultTy 4117 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4118 const OMPExecutableDirective &D, 4119 llvm::Function *TaskFunction, QualType SharedsTy, 4120 Address Shareds, const OMPTaskDataTy &Data) { 4121 ASTContext &C = CGM.getContext(); 4122 llvm::SmallVector<PrivateDataTy, 4> Privates; 4123 // Aggregate privates and sort them by the alignment. 4124 const auto *I = Data.PrivateCopies.begin(); 4125 for (const Expr *E : Data.PrivateVars) { 4126 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4127 Privates.emplace_back( 4128 C.getDeclAlign(VD), 4129 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4130 /*PrivateElemInit=*/nullptr)); 4131 ++I; 4132 } 4133 I = Data.FirstprivateCopies.begin(); 4134 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4135 for (const Expr *E : Data.FirstprivateVars) { 4136 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4137 Privates.emplace_back( 4138 C.getDeclAlign(VD), 4139 PrivateHelpersTy( 4140 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4141 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4142 ++I; 4143 ++IElemInitRef; 4144 } 4145 I = Data.LastprivateCopies.begin(); 4146 for (const Expr *E : Data.LastprivateVars) { 4147 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4148 Privates.emplace_back( 4149 C.getDeclAlign(VD), 4150 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4151 /*PrivateElemInit=*/nullptr)); 4152 ++I; 4153 } 4154 for (const VarDecl *VD : Data.PrivateLocals) { 4155 if (isAllocatableDecl(VD)) 4156 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4157 else 4158 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4159 } 4160 llvm::stable_sort(Privates, 4161 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4162 return L.first > R.first; 4163 }); 4164 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4165 // Build type kmp_routine_entry_t (if not built yet). 4166 emitKmpRoutineEntryT(KmpInt32Ty); 4167 // Build type kmp_task_t (if not built yet). 4168 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4169 if (SavedKmpTaskloopTQTy.isNull()) { 4170 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4171 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4172 } 4173 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4174 } else { 4175 assert((D.getDirectiveKind() == OMPD_task || 4176 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4177 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4178 "Expected taskloop, task or target directive"); 4179 if (SavedKmpTaskTQTy.isNull()) { 4180 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4181 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4182 } 4183 KmpTaskTQTy = SavedKmpTaskTQTy; 4184 } 4185 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4186 // Build particular struct kmp_task_t for the given task. 4187 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4188 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4189 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4190 QualType KmpTaskTWithPrivatesPtrQTy = 4191 C.getPointerType(KmpTaskTWithPrivatesQTy); 4192 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4193 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4194 KmpTaskTWithPrivatesTy->getPointerTo(); 4195 llvm::Value *KmpTaskTWithPrivatesTySize = 4196 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4197 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4198 4199 // Emit initial values for private copies (if any). 4200 llvm::Value *TaskPrivatesMap = nullptr; 4201 llvm::Type *TaskPrivatesMapTy = 4202 std::next(TaskFunction->arg_begin(), 3)->getType(); 4203 if (!Privates.empty()) { 4204 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4205 TaskPrivatesMap = 4206 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4207 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4208 TaskPrivatesMap, TaskPrivatesMapTy); 4209 } else { 4210 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4211 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4212 } 4213 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4214 // kmp_task_t *tt); 4215 llvm::Function *TaskEntry = emitProxyTaskFunction( 4216 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4217 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4218 TaskPrivatesMap); 4219 4220 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4221 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4222 // kmp_routine_entry_t *task_entry); 4223 // Task flags. Format is taken from 4224 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4225 // description of kmp_tasking_flags struct. 4226 enum { 4227 TiedFlag = 0x1, 4228 FinalFlag = 0x2, 4229 DestructorsFlag = 0x8, 4230 PriorityFlag = 0x20, 4231 DetachableFlag = 0x40, 4232 }; 4233 unsigned Flags = Data.Tied ? TiedFlag : 0; 4234 bool NeedsCleanup = false; 4235 if (!Privates.empty()) { 4236 NeedsCleanup = 4237 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4238 if (NeedsCleanup) 4239 Flags = Flags | DestructorsFlag; 4240 } 4241 if (Data.Priority.getInt()) 4242 Flags = Flags | PriorityFlag; 4243 if (D.hasClausesOfKind<OMPDetachClause>()) 4244 Flags = Flags | DetachableFlag; 4245 llvm::Value *TaskFlags = 4246 Data.Final.getPointer() 4247 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4248 CGF.Builder.getInt32(FinalFlag), 4249 CGF.Builder.getInt32(/*C=*/0)) 4250 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4251 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4252 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4253 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4254 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4255 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4256 TaskEntry, KmpRoutineEntryPtrTy)}; 4257 llvm::Value *NewTask; 4258 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4259 // Check if we have any device clause associated with the directive. 4260 const Expr *Device = nullptr; 4261 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4262 Device = C->getDevice(); 4263 // Emit device ID if any otherwise use default value. 4264 llvm::Value *DeviceID; 4265 if (Device) 4266 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4267 CGF.Int64Ty, /*isSigned=*/true); 4268 else 4269 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4270 AllocArgs.push_back(DeviceID); 4271 NewTask = CGF.EmitRuntimeCall( 4272 OMPBuilder.getOrCreateRuntimeFunction( 4273 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4274 AllocArgs); 4275 } else { 4276 NewTask = 4277 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4278 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4279 AllocArgs); 4280 } 4281 // Emit detach clause initialization. 4282 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4283 // task_descriptor); 4284 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4285 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4286 LValue EvtLVal = CGF.EmitLValue(Evt); 4287 4288 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4289 // int gtid, kmp_task_t *task); 4290 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4291 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4292 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4293 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4294 OMPBuilder.getOrCreateRuntimeFunction( 4295 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4296 {Loc, Tid, NewTask}); 4297 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4298 Evt->getExprLoc()); 4299 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4300 } 4301 // Process affinity clauses. 4302 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4303 // Process list of affinity data. 4304 ASTContext &C = CGM.getContext(); 4305 Address AffinitiesArray = Address::invalid(); 4306 // Calculate number of elements to form the array of affinity data. 4307 llvm::Value *NumOfElements = nullptr; 4308 unsigned NumAffinities = 0; 4309 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4310 if (const Expr *Modifier = C->getModifier()) { 4311 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4312 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4313 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4314 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4315 NumOfElements = 4316 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4317 } 4318 } else { 4319 NumAffinities += C->varlist_size(); 4320 } 4321 } 4322 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4323 // Fields ids in kmp_task_affinity_info record. 4324 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4325 4326 QualType KmpTaskAffinityInfoArrayTy; 4327 if (NumOfElements) { 4328 NumOfElements = CGF.Builder.CreateNUWAdd( 4329 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4330 auto *OVE = new (C) OpaqueValueExpr( 4331 Loc, 4332 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4333 VK_PRValue); 4334 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4335 RValue::get(NumOfElements)); 4336 KmpTaskAffinityInfoArrayTy = 4337 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4338 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4339 // Properly emit variable-sized array. 4340 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4341 ImplicitParamDecl::Other); 4342 CGF.EmitVarDecl(*PD); 4343 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4344 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4345 /*isSigned=*/false); 4346 } else { 4347 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4348 KmpTaskAffinityInfoTy, 4349 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4350 ArrayType::Normal, /*IndexTypeQuals=*/0); 4351 AffinitiesArray = 4352 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4353 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4354 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4355 /*isSigned=*/false); 4356 } 4357 4358 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4359 // Fill array by elements without iterators. 4360 unsigned Pos = 0; 4361 bool HasIterator = false; 4362 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4363 if (C->getModifier()) { 4364 HasIterator = true; 4365 continue; 4366 } 4367 for (const Expr *E : C->varlists()) { 4368 llvm::Value *Addr; 4369 llvm::Value *Size; 4370 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4371 LValue Base = 4372 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4373 KmpTaskAffinityInfoTy); 4374 // affs[i].base_addr = &<Affinities[i].second>; 4375 LValue BaseAddrLVal = CGF.EmitLValueForField( 4376 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4377 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4378 BaseAddrLVal); 4379 // affs[i].len = sizeof(<Affinities[i].second>); 4380 LValue LenLVal = CGF.EmitLValueForField( 4381 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4382 CGF.EmitStoreOfScalar(Size, LenLVal); 4383 ++Pos; 4384 } 4385 } 4386 LValue PosLVal; 4387 if (HasIterator) { 4388 PosLVal = CGF.MakeAddrLValue( 4389 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4390 C.getSizeType()); 4391 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4392 } 4393 // Process elements with iterators. 4394 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4395 const Expr *Modifier = C->getModifier(); 4396 if (!Modifier) 4397 continue; 4398 OMPIteratorGeneratorScope IteratorScope( 4399 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4400 for (const Expr *E : C->varlists()) { 4401 llvm::Value *Addr; 4402 llvm::Value *Size; 4403 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4404 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4405 LValue Base = CGF.MakeAddrLValue( 4406 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4407 // affs[i].base_addr = &<Affinities[i].second>; 4408 LValue BaseAddrLVal = CGF.EmitLValueForField( 4409 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4410 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4411 BaseAddrLVal); 4412 // affs[i].len = sizeof(<Affinities[i].second>); 4413 LValue LenLVal = CGF.EmitLValueForField( 4414 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4415 CGF.EmitStoreOfScalar(Size, LenLVal); 4416 Idx = CGF.Builder.CreateNUWAdd( 4417 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4418 CGF.EmitStoreOfScalar(Idx, PosLVal); 4419 } 4420 } 4421 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4422 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4423 // naffins, kmp_task_affinity_info_t *affin_list); 4424 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4425 llvm::Value *GTid = getThreadID(CGF, Loc); 4426 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4427 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4428 // FIXME: Emit the function and ignore its result for now unless the 4429 // runtime function is properly implemented. 4430 (void)CGF.EmitRuntimeCall( 4431 OMPBuilder.getOrCreateRuntimeFunction( 4432 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4433 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4434 } 4435 llvm::Value *NewTaskNewTaskTTy = 4436 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4437 NewTask, KmpTaskTWithPrivatesPtrTy); 4438 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4439 KmpTaskTWithPrivatesQTy); 4440 LValue TDBase = 4441 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4442 // Fill the data in the resulting kmp_task_t record. 4443 // Copy shareds if there are any. 4444 Address KmpTaskSharedsPtr = Address::invalid(); 4445 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4446 KmpTaskSharedsPtr = Address( 4447 CGF.EmitLoadOfScalar( 4448 CGF.EmitLValueForField( 4449 TDBase, 4450 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4451 Loc), 4452 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4453 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4454 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4455 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4456 } 4457 // Emit initial values for private copies (if any). 4458 TaskResultTy Result; 4459 if (!Privates.empty()) { 4460 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4461 SharedsTy, SharedsPtrTy, Data, Privates, 4462 /*ForDup=*/false); 4463 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4464 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4465 Result.TaskDupFn = emitTaskDupFunction( 4466 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4467 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4468 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4469 } 4470 } 4471 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4472 enum { Priority = 0, Destructors = 1 }; 4473 // Provide pointer to function with destructors for privates. 4474 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4475 const RecordDecl *KmpCmplrdataUD = 4476 (*FI)->getType()->getAsUnionType()->getDecl(); 4477 if (NeedsCleanup) { 4478 llvm::Value *DestructorFn = emitDestructorsFunction( 4479 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4480 KmpTaskTWithPrivatesQTy); 4481 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4482 LValue DestructorsLV = CGF.EmitLValueForField( 4483 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4484 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4485 DestructorFn, KmpRoutineEntryPtrTy), 4486 DestructorsLV); 4487 } 4488 // Set priority. 4489 if (Data.Priority.getInt()) { 4490 LValue Data2LV = CGF.EmitLValueForField( 4491 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4492 LValue PriorityLV = CGF.EmitLValueForField( 4493 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4494 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4495 } 4496 Result.NewTask = NewTask; 4497 Result.TaskEntry = TaskEntry; 4498 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4499 Result.TDBase = TDBase; 4500 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4501 return Result; 4502 } 4503 4504 namespace { 4505 /// Dependence kind for RTL. 4506 enum RTLDependenceKindTy { 4507 DepIn = 0x01, 4508 DepInOut = 0x3, 4509 DepMutexInOutSet = 0x4, 4510 DepInOutSet = 0x8 4511 }; 4512 /// Fields ids in kmp_depend_info record. 4513 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4514 } // namespace 4515 4516 /// Translates internal dependency kind into the runtime kind. 4517 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4518 RTLDependenceKindTy DepKind; 4519 switch (K) { 4520 case OMPC_DEPEND_in: 4521 DepKind = DepIn; 4522 break; 4523 // Out and InOut dependencies must use the same code. 4524 case OMPC_DEPEND_out: 4525 case OMPC_DEPEND_inout: 4526 DepKind = DepInOut; 4527 break; 4528 case OMPC_DEPEND_mutexinoutset: 4529 DepKind = DepMutexInOutSet; 4530 break; 4531 case OMPC_DEPEND_inoutset: 4532 DepKind = DepInOutSet; 4533 break; 4534 case OMPC_DEPEND_source: 4535 case OMPC_DEPEND_sink: 4536 case OMPC_DEPEND_depobj: 4537 case OMPC_DEPEND_unknown: 4538 llvm_unreachable("Unknown task dependence type"); 4539 } 4540 return DepKind; 4541 } 4542 4543 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4544 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4545 QualType &FlagsTy) { 4546 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4547 if (KmpDependInfoTy.isNull()) { 4548 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4549 KmpDependInfoRD->startDefinition(); 4550 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4551 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4552 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4553 KmpDependInfoRD->completeDefinition(); 4554 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4555 } 4556 } 4557 4558 std::pair<llvm::Value *, LValue> 4559 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4560 SourceLocation Loc) { 4561 ASTContext &C = CGM.getContext(); 4562 QualType FlagsTy; 4563 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4564 RecordDecl *KmpDependInfoRD = 4565 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4566 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4567 LValue Base = CGF.EmitLoadOfPointerLValue( 4568 CGF.Builder.CreateElementBitCast( 4569 DepobjLVal.getAddress(CGF), 4570 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4571 KmpDependInfoPtrTy->castAs<PointerType>()); 4572 Address DepObjAddr = CGF.Builder.CreateGEP( 4573 Base.getAddress(CGF), 4574 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4575 LValue NumDepsBase = CGF.MakeAddrLValue( 4576 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4577 // NumDeps = deps[i].base_addr; 4578 LValue BaseAddrLVal = CGF.EmitLValueForField( 4579 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4580 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4581 return std::make_pair(NumDeps, Base); 4582 } 4583 4584 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4585 llvm::PointerUnion<unsigned *, LValue *> Pos, 4586 const OMPTaskDataTy::DependData &Data, 4587 Address DependenciesArray) { 4588 CodeGenModule &CGM = CGF.CGM; 4589 ASTContext &C = CGM.getContext(); 4590 QualType FlagsTy; 4591 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4592 RecordDecl *KmpDependInfoRD = 4593 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4594 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4595 4596 OMPIteratorGeneratorScope IteratorScope( 4597 CGF, cast_or_null<OMPIteratorExpr>( 4598 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4599 : nullptr)); 4600 for (const Expr *E : Data.DepExprs) { 4601 llvm::Value *Addr; 4602 llvm::Value *Size; 4603 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4604 LValue Base; 4605 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4606 Base = CGF.MakeAddrLValue( 4607 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4608 } else { 4609 LValue &PosLVal = *Pos.get<LValue *>(); 4610 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4611 Base = CGF.MakeAddrLValue( 4612 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4613 } 4614 // deps[i].base_addr = &<Dependencies[i].second>; 4615 LValue BaseAddrLVal = CGF.EmitLValueForField( 4616 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4617 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4618 BaseAddrLVal); 4619 // deps[i].len = sizeof(<Dependencies[i].second>); 4620 LValue LenLVal = CGF.EmitLValueForField( 4621 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4622 CGF.EmitStoreOfScalar(Size, LenLVal); 4623 // deps[i].flags = <Dependencies[i].first>; 4624 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4625 LValue FlagsLVal = CGF.EmitLValueForField( 4626 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4627 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4628 FlagsLVal); 4629 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4630 ++(*P); 4631 } else { 4632 LValue &PosLVal = *Pos.get<LValue *>(); 4633 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4634 Idx = CGF.Builder.CreateNUWAdd(Idx, 4635 llvm::ConstantInt::get(Idx->getType(), 1)); 4636 CGF.EmitStoreOfScalar(Idx, PosLVal); 4637 } 4638 } 4639 } 4640 4641 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4642 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4643 const OMPTaskDataTy::DependData &Data) { 4644 assert(Data.DepKind == OMPC_DEPEND_depobj && 4645 "Expected depobj dependecy kind."); 4646 SmallVector<llvm::Value *, 4> Sizes; 4647 SmallVector<LValue, 4> SizeLVals; 4648 ASTContext &C = CGF.getContext(); 4649 { 4650 OMPIteratorGeneratorScope IteratorScope( 4651 CGF, cast_or_null<OMPIteratorExpr>( 4652 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4653 : nullptr)); 4654 for (const Expr *E : Data.DepExprs) { 4655 llvm::Value *NumDeps; 4656 LValue Base; 4657 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4658 std::tie(NumDeps, Base) = 4659 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4660 LValue NumLVal = CGF.MakeAddrLValue( 4661 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4662 C.getUIntPtrType()); 4663 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4664 NumLVal.getAddress(CGF)); 4665 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4666 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4667 CGF.EmitStoreOfScalar(Add, NumLVal); 4668 SizeLVals.push_back(NumLVal); 4669 } 4670 } 4671 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4672 llvm::Value *Size = 4673 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4674 Sizes.push_back(Size); 4675 } 4676 return Sizes; 4677 } 4678 4679 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4680 QualType &KmpDependInfoTy, 4681 LValue PosLVal, 4682 const OMPTaskDataTy::DependData &Data, 4683 Address DependenciesArray) { 4684 assert(Data.DepKind == OMPC_DEPEND_depobj && 4685 "Expected depobj dependecy kind."); 4686 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4687 { 4688 OMPIteratorGeneratorScope IteratorScope( 4689 CGF, cast_or_null<OMPIteratorExpr>( 4690 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4691 : nullptr)); 4692 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4693 const Expr *E = Data.DepExprs[I]; 4694 llvm::Value *NumDeps; 4695 LValue Base; 4696 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4697 std::tie(NumDeps, Base) = 4698 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4699 4700 // memcopy dependency data. 4701 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4702 ElSize, 4703 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4704 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4705 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4706 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4707 4708 // Increase pos. 4709 // pos += size; 4710 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4711 CGF.EmitStoreOfScalar(Add, PosLVal); 4712 } 4713 } 4714 } 4715 4716 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4717 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4718 SourceLocation Loc) { 4719 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4720 return D.DepExprs.empty(); 4721 })) 4722 return std::make_pair(nullptr, Address::invalid()); 4723 // Process list of dependencies. 4724 ASTContext &C = CGM.getContext(); 4725 Address DependenciesArray = Address::invalid(); 4726 llvm::Value *NumOfElements = nullptr; 4727 unsigned NumDependencies = std::accumulate( 4728 Dependencies.begin(), Dependencies.end(), 0, 4729 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4730 return D.DepKind == OMPC_DEPEND_depobj 4731 ? V 4732 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4733 }); 4734 QualType FlagsTy; 4735 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4736 bool HasDepobjDeps = false; 4737 bool HasRegularWithIterators = false; 4738 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4739 llvm::Value *NumOfRegularWithIterators = 4740 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4741 // Calculate number of depobj dependecies and regular deps with the iterators. 4742 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4743 if (D.DepKind == OMPC_DEPEND_depobj) { 4744 SmallVector<llvm::Value *, 4> Sizes = 4745 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4746 for (llvm::Value *Size : Sizes) { 4747 NumOfDepobjElements = 4748 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4749 } 4750 HasDepobjDeps = true; 4751 continue; 4752 } 4753 // Include number of iterations, if any. 4754 4755 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4756 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4757 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4758 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4759 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4760 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4761 NumOfRegularWithIterators = 4762 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4763 } 4764 HasRegularWithIterators = true; 4765 continue; 4766 } 4767 } 4768 4769 QualType KmpDependInfoArrayTy; 4770 if (HasDepobjDeps || HasRegularWithIterators) { 4771 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4772 /*isSigned=*/false); 4773 if (HasDepobjDeps) { 4774 NumOfElements = 4775 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4776 } 4777 if (HasRegularWithIterators) { 4778 NumOfElements = 4779 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4780 } 4781 auto *OVE = new (C) OpaqueValueExpr( 4782 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4783 VK_PRValue); 4784 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4785 RValue::get(NumOfElements)); 4786 KmpDependInfoArrayTy = 4787 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4788 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4789 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4790 // Properly emit variable-sized array. 4791 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4792 ImplicitParamDecl::Other); 4793 CGF.EmitVarDecl(*PD); 4794 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4795 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4796 /*isSigned=*/false); 4797 } else { 4798 KmpDependInfoArrayTy = C.getConstantArrayType( 4799 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4800 ArrayType::Normal, /*IndexTypeQuals=*/0); 4801 DependenciesArray = 4802 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4803 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4804 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4805 /*isSigned=*/false); 4806 } 4807 unsigned Pos = 0; 4808 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4809 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4810 Dependencies[I].IteratorExpr) 4811 continue; 4812 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4813 DependenciesArray); 4814 } 4815 // Copy regular dependecies with iterators. 4816 LValue PosLVal = CGF.MakeAddrLValue( 4817 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4818 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4819 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4820 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4821 !Dependencies[I].IteratorExpr) 4822 continue; 4823 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4824 DependenciesArray); 4825 } 4826 // Copy final depobj arrays without iterators. 4827 if (HasDepobjDeps) { 4828 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4829 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4830 continue; 4831 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4832 DependenciesArray); 4833 } 4834 } 4835 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4836 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4837 return std::make_pair(NumOfElements, DependenciesArray); 4838 } 4839 4840 Address CGOpenMPRuntime::emitDepobjDependClause( 4841 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4842 SourceLocation Loc) { 4843 if (Dependencies.DepExprs.empty()) 4844 return Address::invalid(); 4845 // Process list of dependencies. 4846 ASTContext &C = CGM.getContext(); 4847 Address DependenciesArray = Address::invalid(); 4848 unsigned NumDependencies = Dependencies.DepExprs.size(); 4849 QualType FlagsTy; 4850 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4851 RecordDecl *KmpDependInfoRD = 4852 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4853 4854 llvm::Value *Size; 4855 // Define type kmp_depend_info[<Dependencies.size()>]; 4856 // For depobj reserve one extra element to store the number of elements. 4857 // It is required to handle depobj(x) update(in) construct. 4858 // kmp_depend_info[<Dependencies.size()>] deps; 4859 llvm::Value *NumDepsVal; 4860 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4861 if (const auto *IE = 4862 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4863 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4864 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4865 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4866 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4867 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4868 } 4869 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4870 NumDepsVal); 4871 CharUnits SizeInBytes = 4872 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4873 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4874 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4875 NumDepsVal = 4876 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4877 } else { 4878 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4879 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4880 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4881 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4882 Size = CGM.getSize(Sz.alignTo(Align)); 4883 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4884 } 4885 // Need to allocate on the dynamic memory. 4886 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4887 // Use default allocator. 4888 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4889 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4890 4891 llvm::Value *Addr = 4892 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4893 CGM.getModule(), OMPRTL___kmpc_alloc), 4894 Args, ".dep.arr.addr"); 4895 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4896 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4897 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4898 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4899 // Write number of elements in the first element of array for depobj. 4900 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4901 // deps[i].base_addr = NumDependencies; 4902 LValue BaseAddrLVal = CGF.EmitLValueForField( 4903 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4904 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4905 llvm::PointerUnion<unsigned *, LValue *> Pos; 4906 unsigned Idx = 1; 4907 LValue PosLVal; 4908 if (Dependencies.IteratorExpr) { 4909 PosLVal = CGF.MakeAddrLValue( 4910 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4911 C.getSizeType()); 4912 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4913 /*IsInit=*/true); 4914 Pos = &PosLVal; 4915 } else { 4916 Pos = &Idx; 4917 } 4918 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4919 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4920 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4921 CGF.Int8Ty); 4922 return DependenciesArray; 4923 } 4924 4925 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4926 SourceLocation Loc) { 4927 ASTContext &C = CGM.getContext(); 4928 QualType FlagsTy; 4929 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4930 LValue Base = CGF.EmitLoadOfPointerLValue( 4931 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 4932 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4933 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4934 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4935 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4936 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4937 Addr.getElementType(), Addr.getPointer(), 4938 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4939 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4940 CGF.VoidPtrTy); 4941 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4942 // Use default allocator. 4943 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4944 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4945 4946 // _kmpc_free(gtid, addr, nullptr); 4947 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4948 CGM.getModule(), OMPRTL___kmpc_free), 4949 Args); 4950 } 4951 4952 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4953 OpenMPDependClauseKind NewDepKind, 4954 SourceLocation Loc) { 4955 ASTContext &C = CGM.getContext(); 4956 QualType FlagsTy; 4957 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4958 RecordDecl *KmpDependInfoRD = 4959 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4960 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4961 llvm::Value *NumDeps; 4962 LValue Base; 4963 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4964 4965 Address Begin = Base.getAddress(CGF); 4966 // Cast from pointer to array type to pointer to single element. 4967 llvm::Value *End = CGF.Builder.CreateGEP( 4968 Begin.getElementType(), Begin.getPointer(), NumDeps); 4969 // The basic structure here is a while-do loop. 4970 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4971 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4972 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4973 CGF.EmitBlock(BodyBB); 4974 llvm::PHINode *ElementPHI = 4975 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4976 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4977 Begin = Begin.withPointer(ElementPHI); 4978 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4979 Base.getTBAAInfo()); 4980 // deps[i].flags = NewDepKind; 4981 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4982 LValue FlagsLVal = CGF.EmitLValueForField( 4983 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4984 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4985 FlagsLVal); 4986 4987 // Shift the address forward by one element. 4988 Address ElementNext = 4989 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 4990 ElementPHI->addIncoming(ElementNext.getPointer(), 4991 CGF.Builder.GetInsertBlock()); 4992 llvm::Value *IsEmpty = 4993 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 4994 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4995 // Done. 4996 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4997 } 4998 4999 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5000 const OMPExecutableDirective &D, 5001 llvm::Function *TaskFunction, 5002 QualType SharedsTy, Address Shareds, 5003 const Expr *IfCond, 5004 const OMPTaskDataTy &Data) { 5005 if (!CGF.HaveInsertPoint()) 5006 return; 5007 5008 TaskResultTy Result = 5009 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5010 llvm::Value *NewTask = Result.NewTask; 5011 llvm::Function *TaskEntry = Result.TaskEntry; 5012 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5013 LValue TDBase = Result.TDBase; 5014 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5015 // Process list of dependences. 5016 Address DependenciesArray = Address::invalid(); 5017 llvm::Value *NumOfElements; 5018 std::tie(NumOfElements, DependenciesArray) = 5019 emitDependClause(CGF, Data.Dependences, Loc); 5020 5021 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5022 // libcall. 5023 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5024 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5025 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5026 // list is not empty 5027 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5028 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5029 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5030 llvm::Value *DepTaskArgs[7]; 5031 if (!Data.Dependences.empty()) { 5032 DepTaskArgs[0] = UpLoc; 5033 DepTaskArgs[1] = ThreadID; 5034 DepTaskArgs[2] = NewTask; 5035 DepTaskArgs[3] = NumOfElements; 5036 DepTaskArgs[4] = DependenciesArray.getPointer(); 5037 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5038 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5039 } 5040 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5041 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5042 if (!Data.Tied) { 5043 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5044 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5045 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5046 } 5047 if (!Data.Dependences.empty()) { 5048 CGF.EmitRuntimeCall( 5049 OMPBuilder.getOrCreateRuntimeFunction( 5050 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5051 DepTaskArgs); 5052 } else { 5053 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5054 CGM.getModule(), OMPRTL___kmpc_omp_task), 5055 TaskArgs); 5056 } 5057 // Check if parent region is untied and build return for untied task; 5058 if (auto *Region = 5059 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5060 Region->emitUntiedSwitch(CGF); 5061 }; 5062 5063 llvm::Value *DepWaitTaskArgs[6]; 5064 if (!Data.Dependences.empty()) { 5065 DepWaitTaskArgs[0] = UpLoc; 5066 DepWaitTaskArgs[1] = ThreadID; 5067 DepWaitTaskArgs[2] = NumOfElements; 5068 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5069 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5070 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5071 } 5072 auto &M = CGM.getModule(); 5073 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5074 TaskEntry, &Data, &DepWaitTaskArgs, 5075 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5076 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5077 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5078 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5079 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5080 // is specified. 5081 if (!Data.Dependences.empty()) 5082 CGF.EmitRuntimeCall( 5083 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5084 DepWaitTaskArgs); 5085 // Call proxy_task_entry(gtid, new_task); 5086 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5087 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5088 Action.Enter(CGF); 5089 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5090 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5091 OutlinedFnArgs); 5092 }; 5093 5094 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5095 // kmp_task_t *new_task); 5096 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5097 // kmp_task_t *new_task); 5098 RegionCodeGenTy RCG(CodeGen); 5099 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5100 M, OMPRTL___kmpc_omp_task_begin_if0), 5101 TaskArgs, 5102 OMPBuilder.getOrCreateRuntimeFunction( 5103 M, OMPRTL___kmpc_omp_task_complete_if0), 5104 TaskArgs); 5105 RCG.setAction(Action); 5106 RCG(CGF); 5107 }; 5108 5109 if (IfCond) { 5110 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5111 } else { 5112 RegionCodeGenTy ThenRCG(ThenCodeGen); 5113 ThenRCG(CGF); 5114 } 5115 } 5116 5117 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5118 const OMPLoopDirective &D, 5119 llvm::Function *TaskFunction, 5120 QualType SharedsTy, Address Shareds, 5121 const Expr *IfCond, 5122 const OMPTaskDataTy &Data) { 5123 if (!CGF.HaveInsertPoint()) 5124 return; 5125 TaskResultTy Result = 5126 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5127 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5128 // libcall. 5129 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5130 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5131 // sched, kmp_uint64 grainsize, void *task_dup); 5132 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5133 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5134 llvm::Value *IfVal; 5135 if (IfCond) { 5136 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5137 /*isSigned=*/true); 5138 } else { 5139 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5140 } 5141 5142 LValue LBLVal = CGF.EmitLValueForField( 5143 Result.TDBase, 5144 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5145 const auto *LBVar = 5146 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5147 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5148 LBLVal.getQuals(), 5149 /*IsInitializer=*/true); 5150 LValue UBLVal = CGF.EmitLValueForField( 5151 Result.TDBase, 5152 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5153 const auto *UBVar = 5154 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5155 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5156 UBLVal.getQuals(), 5157 /*IsInitializer=*/true); 5158 LValue StLVal = CGF.EmitLValueForField( 5159 Result.TDBase, 5160 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5161 const auto *StVar = 5162 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5163 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5164 StLVal.getQuals(), 5165 /*IsInitializer=*/true); 5166 // Store reductions address. 5167 LValue RedLVal = CGF.EmitLValueForField( 5168 Result.TDBase, 5169 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5170 if (Data.Reductions) { 5171 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5172 } else { 5173 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5174 CGF.getContext().VoidPtrTy); 5175 } 5176 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5177 llvm::Value *TaskArgs[] = { 5178 UpLoc, 5179 ThreadID, 5180 Result.NewTask, 5181 IfVal, 5182 LBLVal.getPointer(CGF), 5183 UBLVal.getPointer(CGF), 5184 CGF.EmitLoadOfScalar(StLVal, Loc), 5185 llvm::ConstantInt::getSigned( 5186 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5187 llvm::ConstantInt::getSigned( 5188 CGF.IntTy, Data.Schedule.getPointer() 5189 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5190 : NoSchedule), 5191 Data.Schedule.getPointer() 5192 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5193 /*isSigned=*/false) 5194 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5195 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5196 Result.TaskDupFn, CGF.VoidPtrTy) 5197 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5198 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5199 CGM.getModule(), OMPRTL___kmpc_taskloop), 5200 TaskArgs); 5201 } 5202 5203 /// Emit reduction operation for each element of array (required for 5204 /// array sections) LHS op = RHS. 5205 /// \param Type Type of array. 5206 /// \param LHSVar Variable on the left side of the reduction operation 5207 /// (references element of array in original variable). 5208 /// \param RHSVar Variable on the right side of the reduction operation 5209 /// (references element of array in original variable). 5210 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5211 /// RHSVar. 5212 static void EmitOMPAggregateReduction( 5213 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5214 const VarDecl *RHSVar, 5215 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5216 const Expr *, const Expr *)> &RedOpGen, 5217 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5218 const Expr *UpExpr = nullptr) { 5219 // Perform element-by-element initialization. 5220 QualType ElementTy; 5221 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5222 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5223 5224 // Drill down to the base element type on both arrays. 5225 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5226 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5227 5228 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5229 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5230 // Cast from pointer to array type to pointer to single element. 5231 llvm::Value *LHSEnd = 5232 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5233 // The basic structure here is a while-do loop. 5234 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5235 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5236 llvm::Value *IsEmpty = 5237 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5238 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5239 5240 // Enter the loop body, making that address the current address. 5241 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5242 CGF.EmitBlock(BodyBB); 5243 5244 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5245 5246 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5247 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5248 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5249 Address RHSElementCurrent( 5250 RHSElementPHI, RHSAddr.getElementType(), 5251 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5252 5253 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5254 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5255 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5256 Address LHSElementCurrent( 5257 LHSElementPHI, LHSAddr.getElementType(), 5258 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5259 5260 // Emit copy. 5261 CodeGenFunction::OMPPrivateScope Scope(CGF); 5262 Scope.addPrivate(LHSVar, LHSElementCurrent); 5263 Scope.addPrivate(RHSVar, RHSElementCurrent); 5264 Scope.Privatize(); 5265 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5266 Scope.ForceCleanup(); 5267 5268 // Shift the address forward by one element. 5269 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5270 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5271 "omp.arraycpy.dest.element"); 5272 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5273 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5274 "omp.arraycpy.src.element"); 5275 // Check whether we've reached the end. 5276 llvm::Value *Done = 5277 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5278 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5279 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5280 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5281 5282 // Done. 5283 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5284 } 5285 5286 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5287 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5288 /// UDR combiner function. 5289 static void emitReductionCombiner(CodeGenFunction &CGF, 5290 const Expr *ReductionOp) { 5291 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5292 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5293 if (const auto *DRE = 5294 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5295 if (const auto *DRD = 5296 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5297 std::pair<llvm::Function *, llvm::Function *> Reduction = 5298 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5299 RValue Func = RValue::get(Reduction.first); 5300 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5301 CGF.EmitIgnoredExpr(ReductionOp); 5302 return; 5303 } 5304 CGF.EmitIgnoredExpr(ReductionOp); 5305 } 5306 5307 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5308 SourceLocation Loc, llvm::Type *ArgsElemType, 5309 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5310 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5311 ASTContext &C = CGM.getContext(); 5312 5313 // void reduction_func(void *LHSArg, void *RHSArg); 5314 FunctionArgList Args; 5315 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5316 ImplicitParamDecl::Other); 5317 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5318 ImplicitParamDecl::Other); 5319 Args.push_back(&LHSArg); 5320 Args.push_back(&RHSArg); 5321 const auto &CGFI = 5322 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5323 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5324 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5325 llvm::GlobalValue::InternalLinkage, Name, 5326 &CGM.getModule()); 5327 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5328 Fn->setDoesNotRecurse(); 5329 CodeGenFunction CGF(CGM); 5330 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5331 5332 // Dst = (void*[n])(LHSArg); 5333 // Src = (void*[n])(RHSArg); 5334 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5335 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5336 ArgsElemType->getPointerTo()), 5337 ArgsElemType, CGF.getPointerAlign()); 5338 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5339 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5340 ArgsElemType->getPointerTo()), 5341 ArgsElemType, CGF.getPointerAlign()); 5342 5343 // ... 5344 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5345 // ... 5346 CodeGenFunction::OMPPrivateScope Scope(CGF); 5347 const auto *IPriv = Privates.begin(); 5348 unsigned Idx = 0; 5349 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5350 const auto *RHSVar = 5351 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5352 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5353 const auto *LHSVar = 5354 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5355 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5356 QualType PrivTy = (*IPriv)->getType(); 5357 if (PrivTy->isVariablyModifiedType()) { 5358 // Get array size and emit VLA type. 5359 ++Idx; 5360 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5361 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5362 const VariableArrayType *VLA = 5363 CGF.getContext().getAsVariableArrayType(PrivTy); 5364 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5365 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5366 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5367 CGF.EmitVariablyModifiedType(PrivTy); 5368 } 5369 } 5370 Scope.Privatize(); 5371 IPriv = Privates.begin(); 5372 const auto *ILHS = LHSExprs.begin(); 5373 const auto *IRHS = RHSExprs.begin(); 5374 for (const Expr *E : ReductionOps) { 5375 if ((*IPriv)->getType()->isArrayType()) { 5376 // Emit reduction for array section. 5377 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5378 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5379 EmitOMPAggregateReduction( 5380 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5381 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5382 emitReductionCombiner(CGF, E); 5383 }); 5384 } else { 5385 // Emit reduction for array subscript or single variable. 5386 emitReductionCombiner(CGF, E); 5387 } 5388 ++IPriv; 5389 ++ILHS; 5390 ++IRHS; 5391 } 5392 Scope.ForceCleanup(); 5393 CGF.FinishFunction(); 5394 return Fn; 5395 } 5396 5397 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5398 const Expr *ReductionOp, 5399 const Expr *PrivateRef, 5400 const DeclRefExpr *LHS, 5401 const DeclRefExpr *RHS) { 5402 if (PrivateRef->getType()->isArrayType()) { 5403 // Emit reduction for array section. 5404 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5405 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5406 EmitOMPAggregateReduction( 5407 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5408 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5409 emitReductionCombiner(CGF, ReductionOp); 5410 }); 5411 } else { 5412 // Emit reduction for array subscript or single variable. 5413 emitReductionCombiner(CGF, ReductionOp); 5414 } 5415 } 5416 5417 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5418 ArrayRef<const Expr *> Privates, 5419 ArrayRef<const Expr *> LHSExprs, 5420 ArrayRef<const Expr *> RHSExprs, 5421 ArrayRef<const Expr *> ReductionOps, 5422 ReductionOptionsTy Options) { 5423 if (!CGF.HaveInsertPoint()) 5424 return; 5425 5426 bool WithNowait = Options.WithNowait; 5427 bool SimpleReduction = Options.SimpleReduction; 5428 5429 // Next code should be emitted for reduction: 5430 // 5431 // static kmp_critical_name lock = { 0 }; 5432 // 5433 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5434 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5435 // ... 5436 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5437 // *(Type<n>-1*)rhs[<n>-1]); 5438 // } 5439 // 5440 // ... 5441 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5442 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5443 // RedList, reduce_func, &<lock>)) { 5444 // case 1: 5445 // ... 5446 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5447 // ... 5448 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5449 // break; 5450 // case 2: 5451 // ... 5452 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5453 // ... 5454 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5455 // break; 5456 // default:; 5457 // } 5458 // 5459 // if SimpleReduction is true, only the next code is generated: 5460 // ... 5461 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5462 // ... 5463 5464 ASTContext &C = CGM.getContext(); 5465 5466 if (SimpleReduction) { 5467 CodeGenFunction::RunCleanupsScope Scope(CGF); 5468 const auto *IPriv = Privates.begin(); 5469 const auto *ILHS = LHSExprs.begin(); 5470 const auto *IRHS = RHSExprs.begin(); 5471 for (const Expr *E : ReductionOps) { 5472 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5473 cast<DeclRefExpr>(*IRHS)); 5474 ++IPriv; 5475 ++ILHS; 5476 ++IRHS; 5477 } 5478 return; 5479 } 5480 5481 // 1. Build a list of reduction variables. 5482 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5483 auto Size = RHSExprs.size(); 5484 for (const Expr *E : Privates) { 5485 if (E->getType()->isVariablyModifiedType()) 5486 // Reserve place for array size. 5487 ++Size; 5488 } 5489 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5490 QualType ReductionArrayTy = 5491 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5492 /*IndexTypeQuals=*/0); 5493 Address ReductionList = 5494 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5495 const auto *IPriv = Privates.begin(); 5496 unsigned Idx = 0; 5497 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5498 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5499 CGF.Builder.CreateStore( 5500 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5501 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5502 Elem); 5503 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5504 // Store array size. 5505 ++Idx; 5506 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5507 llvm::Value *Size = CGF.Builder.CreateIntCast( 5508 CGF.getVLASize( 5509 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5510 .NumElts, 5511 CGF.SizeTy, /*isSigned=*/false); 5512 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5513 Elem); 5514 } 5515 } 5516 5517 // 2. Emit reduce_func(). 5518 llvm::Function *ReductionFn = 5519 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5520 Privates, LHSExprs, RHSExprs, ReductionOps); 5521 5522 // 3. Create static kmp_critical_name lock = { 0 }; 5523 std::string Name = getName({"reduction"}); 5524 llvm::Value *Lock = getCriticalRegionLock(Name); 5525 5526 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5527 // RedList, reduce_func, &<lock>); 5528 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5529 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5530 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5531 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5532 ReductionList.getPointer(), CGF.VoidPtrTy); 5533 llvm::Value *Args[] = { 5534 IdentTLoc, // ident_t *<loc> 5535 ThreadId, // i32 <gtid> 5536 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5537 ReductionArrayTySize, // size_type sizeof(RedList) 5538 RL, // void *RedList 5539 ReductionFn, // void (*) (void *, void *) <reduce_func> 5540 Lock // kmp_critical_name *&<lock> 5541 }; 5542 llvm::Value *Res = CGF.EmitRuntimeCall( 5543 OMPBuilder.getOrCreateRuntimeFunction( 5544 CGM.getModule(), 5545 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5546 Args); 5547 5548 // 5. Build switch(res) 5549 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5550 llvm::SwitchInst *SwInst = 5551 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5552 5553 // 6. Build case 1: 5554 // ... 5555 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5556 // ... 5557 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5558 // break; 5559 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5560 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5561 CGF.EmitBlock(Case1BB); 5562 5563 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5564 llvm::Value *EndArgs[] = { 5565 IdentTLoc, // ident_t *<loc> 5566 ThreadId, // i32 <gtid> 5567 Lock // kmp_critical_name *&<lock> 5568 }; 5569 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5570 CodeGenFunction &CGF, PrePostActionTy &Action) { 5571 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5572 const auto *IPriv = Privates.begin(); 5573 const auto *ILHS = LHSExprs.begin(); 5574 const auto *IRHS = RHSExprs.begin(); 5575 for (const Expr *E : ReductionOps) { 5576 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5577 cast<DeclRefExpr>(*IRHS)); 5578 ++IPriv; 5579 ++ILHS; 5580 ++IRHS; 5581 } 5582 }; 5583 RegionCodeGenTy RCG(CodeGen); 5584 CommonActionTy Action( 5585 nullptr, llvm::None, 5586 OMPBuilder.getOrCreateRuntimeFunction( 5587 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5588 : OMPRTL___kmpc_end_reduce), 5589 EndArgs); 5590 RCG.setAction(Action); 5591 RCG(CGF); 5592 5593 CGF.EmitBranch(DefaultBB); 5594 5595 // 7. Build case 2: 5596 // ... 5597 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5598 // ... 5599 // break; 5600 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5601 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5602 CGF.EmitBlock(Case2BB); 5603 5604 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5605 CodeGenFunction &CGF, PrePostActionTy &Action) { 5606 const auto *ILHS = LHSExprs.begin(); 5607 const auto *IRHS = RHSExprs.begin(); 5608 const auto *IPriv = Privates.begin(); 5609 for (const Expr *E : ReductionOps) { 5610 const Expr *XExpr = nullptr; 5611 const Expr *EExpr = nullptr; 5612 const Expr *UpExpr = nullptr; 5613 BinaryOperatorKind BO = BO_Comma; 5614 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5615 if (BO->getOpcode() == BO_Assign) { 5616 XExpr = BO->getLHS(); 5617 UpExpr = BO->getRHS(); 5618 } 5619 } 5620 // Try to emit update expression as a simple atomic. 5621 const Expr *RHSExpr = UpExpr; 5622 if (RHSExpr) { 5623 // Analyze RHS part of the whole expression. 5624 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5625 RHSExpr->IgnoreParenImpCasts())) { 5626 // If this is a conditional operator, analyze its condition for 5627 // min/max reduction operator. 5628 RHSExpr = ACO->getCond(); 5629 } 5630 if (const auto *BORHS = 5631 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5632 EExpr = BORHS->getRHS(); 5633 BO = BORHS->getOpcode(); 5634 } 5635 } 5636 if (XExpr) { 5637 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5638 auto &&AtomicRedGen = [BO, VD, 5639 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5640 const Expr *EExpr, const Expr *UpExpr) { 5641 LValue X = CGF.EmitLValue(XExpr); 5642 RValue E; 5643 if (EExpr) 5644 E = CGF.EmitAnyExpr(EExpr); 5645 CGF.EmitOMPAtomicSimpleUpdateExpr( 5646 X, E, BO, /*IsXLHSInRHSPart=*/true, 5647 llvm::AtomicOrdering::Monotonic, Loc, 5648 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5649 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5650 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5651 CGF.emitOMPSimpleStore( 5652 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5653 VD->getType().getNonReferenceType(), Loc); 5654 PrivateScope.addPrivate(VD, LHSTemp); 5655 (void)PrivateScope.Privatize(); 5656 return CGF.EmitAnyExpr(UpExpr); 5657 }); 5658 }; 5659 if ((*IPriv)->getType()->isArrayType()) { 5660 // Emit atomic reduction for array section. 5661 const auto *RHSVar = 5662 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5663 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5664 AtomicRedGen, XExpr, EExpr, UpExpr); 5665 } else { 5666 // Emit atomic reduction for array subscript or single variable. 5667 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5668 } 5669 } else { 5670 // Emit as a critical region. 5671 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5672 const Expr *, const Expr *) { 5673 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5674 std::string Name = RT.getName({"atomic_reduction"}); 5675 RT.emitCriticalRegion( 5676 CGF, Name, 5677 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5678 Action.Enter(CGF); 5679 emitReductionCombiner(CGF, E); 5680 }, 5681 Loc); 5682 }; 5683 if ((*IPriv)->getType()->isArrayType()) { 5684 const auto *LHSVar = 5685 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5686 const auto *RHSVar = 5687 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5688 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5689 CritRedGen); 5690 } else { 5691 CritRedGen(CGF, nullptr, nullptr, nullptr); 5692 } 5693 } 5694 ++ILHS; 5695 ++IRHS; 5696 ++IPriv; 5697 } 5698 }; 5699 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5700 if (!WithNowait) { 5701 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5702 llvm::Value *EndArgs[] = { 5703 IdentTLoc, // ident_t *<loc> 5704 ThreadId, // i32 <gtid> 5705 Lock // kmp_critical_name *&<lock> 5706 }; 5707 CommonActionTy Action(nullptr, llvm::None, 5708 OMPBuilder.getOrCreateRuntimeFunction( 5709 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5710 EndArgs); 5711 AtomicRCG.setAction(Action); 5712 AtomicRCG(CGF); 5713 } else { 5714 AtomicRCG(CGF); 5715 } 5716 5717 CGF.EmitBranch(DefaultBB); 5718 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5719 } 5720 5721 /// Generates unique name for artificial threadprivate variables. 5722 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5723 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5724 const Expr *Ref) { 5725 SmallString<256> Buffer; 5726 llvm::raw_svector_ostream Out(Buffer); 5727 const clang::DeclRefExpr *DE; 5728 const VarDecl *D = ::getBaseDecl(Ref, DE); 5729 if (!D) 5730 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5731 D = D->getCanonicalDecl(); 5732 std::string Name = CGM.getOpenMPRuntime().getName( 5733 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5734 Out << Prefix << Name << "_" 5735 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5736 return std::string(Out.str()); 5737 } 5738 5739 /// Emits reduction initializer function: 5740 /// \code 5741 /// void @.red_init(void* %arg, void* %orig) { 5742 /// %0 = bitcast void* %arg to <type>* 5743 /// store <type> <init>, <type>* %0 5744 /// ret void 5745 /// } 5746 /// \endcode 5747 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5748 SourceLocation Loc, 5749 ReductionCodeGen &RCG, unsigned N) { 5750 ASTContext &C = CGM.getContext(); 5751 QualType VoidPtrTy = C.VoidPtrTy; 5752 VoidPtrTy.addRestrict(); 5753 FunctionArgList Args; 5754 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5755 ImplicitParamDecl::Other); 5756 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5757 ImplicitParamDecl::Other); 5758 Args.emplace_back(&Param); 5759 Args.emplace_back(&ParamOrig); 5760 const auto &FnInfo = 5761 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5762 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5763 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5764 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5765 Name, &CGM.getModule()); 5766 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5767 Fn->setDoesNotRecurse(); 5768 CodeGenFunction CGF(CGM); 5769 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5770 QualType PrivateType = RCG.getPrivateType(N); 5771 Address PrivateAddr = CGF.EmitLoadOfPointer( 5772 CGF.Builder.CreateElementBitCast( 5773 CGF.GetAddrOfLocalVar(&Param), 5774 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5775 C.getPointerType(PrivateType)->castAs<PointerType>()); 5776 llvm::Value *Size = nullptr; 5777 // If the size of the reduction item is non-constant, load it from global 5778 // threadprivate variable. 5779 if (RCG.getSizes(N).second) { 5780 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5781 CGF, CGM.getContext().getSizeType(), 5782 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5783 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5784 CGM.getContext().getSizeType(), Loc); 5785 } 5786 RCG.emitAggregateType(CGF, N, Size); 5787 Address OrigAddr = Address::invalid(); 5788 // If initializer uses initializer from declare reduction construct, emit a 5789 // pointer to the address of the original reduction item (reuired by reduction 5790 // initializer) 5791 if (RCG.usesReductionInitializer(N)) { 5792 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5793 OrigAddr = CGF.EmitLoadOfPointer( 5794 SharedAddr, 5795 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5796 } 5797 // Emit the initializer: 5798 // %0 = bitcast void* %arg to <type>* 5799 // store <type> <init>, <type>* %0 5800 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5801 [](CodeGenFunction &) { return false; }); 5802 CGF.FinishFunction(); 5803 return Fn; 5804 } 5805 5806 /// Emits reduction combiner function: 5807 /// \code 5808 /// void @.red_comb(void* %arg0, void* %arg1) { 5809 /// %lhs = bitcast void* %arg0 to <type>* 5810 /// %rhs = bitcast void* %arg1 to <type>* 5811 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5812 /// store <type> %2, <type>* %lhs 5813 /// ret void 5814 /// } 5815 /// \endcode 5816 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5817 SourceLocation Loc, 5818 ReductionCodeGen &RCG, unsigned N, 5819 const Expr *ReductionOp, 5820 const Expr *LHS, const Expr *RHS, 5821 const Expr *PrivateRef) { 5822 ASTContext &C = CGM.getContext(); 5823 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5824 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5825 FunctionArgList Args; 5826 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5827 C.VoidPtrTy, ImplicitParamDecl::Other); 5828 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5829 ImplicitParamDecl::Other); 5830 Args.emplace_back(&ParamInOut); 5831 Args.emplace_back(&ParamIn); 5832 const auto &FnInfo = 5833 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5834 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5835 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5836 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5837 Name, &CGM.getModule()); 5838 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5839 Fn->setDoesNotRecurse(); 5840 CodeGenFunction CGF(CGM); 5841 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5842 llvm::Value *Size = nullptr; 5843 // If the size of the reduction item is non-constant, load it from global 5844 // threadprivate variable. 5845 if (RCG.getSizes(N).second) { 5846 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5847 CGF, CGM.getContext().getSizeType(), 5848 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5849 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5850 CGM.getContext().getSizeType(), Loc); 5851 } 5852 RCG.emitAggregateType(CGF, N, Size); 5853 // Remap lhs and rhs variables to the addresses of the function arguments. 5854 // %lhs = bitcast void* %arg0 to <type>* 5855 // %rhs = bitcast void* %arg1 to <type>* 5856 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5857 PrivateScope.addPrivate( 5858 LHSVD, 5859 // Pull out the pointer to the variable. 5860 CGF.EmitLoadOfPointer( 5861 CGF.Builder.CreateElementBitCast( 5862 CGF.GetAddrOfLocalVar(&ParamInOut), 5863 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5864 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5865 PrivateScope.addPrivate( 5866 RHSVD, 5867 // Pull out the pointer to the variable. 5868 CGF.EmitLoadOfPointer( 5869 CGF.Builder.CreateElementBitCast( 5870 CGF.GetAddrOfLocalVar(&ParamIn), 5871 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5872 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5873 PrivateScope.Privatize(); 5874 // Emit the combiner body: 5875 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5876 // store <type> %2, <type>* %lhs 5877 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5878 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5879 cast<DeclRefExpr>(RHS)); 5880 CGF.FinishFunction(); 5881 return Fn; 5882 } 5883 5884 /// Emits reduction finalizer function: 5885 /// \code 5886 /// void @.red_fini(void* %arg) { 5887 /// %0 = bitcast void* %arg to <type>* 5888 /// <destroy>(<type>* %0) 5889 /// ret void 5890 /// } 5891 /// \endcode 5892 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5893 SourceLocation Loc, 5894 ReductionCodeGen &RCG, unsigned N) { 5895 if (!RCG.needCleanups(N)) 5896 return nullptr; 5897 ASTContext &C = CGM.getContext(); 5898 FunctionArgList Args; 5899 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5900 ImplicitParamDecl::Other); 5901 Args.emplace_back(&Param); 5902 const auto &FnInfo = 5903 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5904 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5905 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5906 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5907 Name, &CGM.getModule()); 5908 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5909 Fn->setDoesNotRecurse(); 5910 CodeGenFunction CGF(CGM); 5911 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5912 Address PrivateAddr = CGF.EmitLoadOfPointer( 5913 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5914 llvm::Value *Size = nullptr; 5915 // If the size of the reduction item is non-constant, load it from global 5916 // threadprivate variable. 5917 if (RCG.getSizes(N).second) { 5918 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5919 CGF, CGM.getContext().getSizeType(), 5920 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5921 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5922 CGM.getContext().getSizeType(), Loc); 5923 } 5924 RCG.emitAggregateType(CGF, N, Size); 5925 // Emit the finalizer body: 5926 // <destroy>(<type>* %0) 5927 RCG.emitCleanups(CGF, N, PrivateAddr); 5928 CGF.FinishFunction(Loc); 5929 return Fn; 5930 } 5931 5932 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5933 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5934 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5935 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5936 return nullptr; 5937 5938 // Build typedef struct: 5939 // kmp_taskred_input { 5940 // void *reduce_shar; // shared reduction item 5941 // void *reduce_orig; // original reduction item used for initialization 5942 // size_t reduce_size; // size of data item 5943 // void *reduce_init; // data initialization routine 5944 // void *reduce_fini; // data finalization routine 5945 // void *reduce_comb; // data combiner routine 5946 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5947 // } kmp_taskred_input_t; 5948 ASTContext &C = CGM.getContext(); 5949 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5950 RD->startDefinition(); 5951 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5952 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5953 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5954 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5955 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5956 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5957 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5958 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5959 RD->completeDefinition(); 5960 QualType RDType = C.getRecordType(RD); 5961 unsigned Size = Data.ReductionVars.size(); 5962 llvm::APInt ArraySize(/*numBits=*/64, Size); 5963 QualType ArrayRDType = C.getConstantArrayType( 5964 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5965 // kmp_task_red_input_t .rd_input.[Size]; 5966 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5967 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5968 Data.ReductionCopies, Data.ReductionOps); 5969 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5970 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5971 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5972 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5973 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5974 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5975 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5976 ".rd_input.gep."); 5977 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5978 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5979 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5980 RCG.emitSharedOrigLValue(CGF, Cnt); 5981 llvm::Value *CastedShared = 5982 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 5983 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5984 // ElemLVal.reduce_orig = &Origs[Cnt]; 5985 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5986 llvm::Value *CastedOrig = 5987 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 5988 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 5989 RCG.emitAggregateType(CGF, Cnt); 5990 llvm::Value *SizeValInChars; 5991 llvm::Value *SizeVal; 5992 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5993 // We use delayed creation/initialization for VLAs and array sections. It is 5994 // required because runtime does not provide the way to pass the sizes of 5995 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5996 // threadprivate global variables are used to store these values and use 5997 // them in the functions. 5998 bool DelayedCreation = !!SizeVal; 5999 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6000 /*isSigned=*/false); 6001 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6002 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6003 // ElemLVal.reduce_init = init; 6004 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6005 llvm::Value *InitAddr = 6006 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6007 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6008 // ElemLVal.reduce_fini = fini; 6009 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6010 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6011 llvm::Value *FiniAddr = Fini 6012 ? CGF.EmitCastToVoidPtr(Fini) 6013 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6014 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6015 // ElemLVal.reduce_comb = comb; 6016 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6017 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6018 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6019 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6020 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6021 // ElemLVal.flags = 0; 6022 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6023 if (DelayedCreation) { 6024 CGF.EmitStoreOfScalar( 6025 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6026 FlagsLVal); 6027 } else 6028 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6029 FlagsLVal.getType()); 6030 } 6031 if (Data.IsReductionWithTaskMod) { 6032 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6033 // is_ws, int num, void *data); 6034 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6035 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6036 CGM.IntTy, /*isSigned=*/true); 6037 llvm::Value *Args[] = { 6038 IdentTLoc, GTid, 6039 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6040 /*isSigned=*/true), 6041 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6042 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6043 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6044 return CGF.EmitRuntimeCall( 6045 OMPBuilder.getOrCreateRuntimeFunction( 6046 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6047 Args); 6048 } 6049 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6050 llvm::Value *Args[] = { 6051 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6052 /*isSigned=*/true), 6053 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6054 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6055 CGM.VoidPtrTy)}; 6056 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6057 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6058 Args); 6059 } 6060 6061 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6062 SourceLocation Loc, 6063 bool IsWorksharingReduction) { 6064 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6065 // is_ws, int num, void *data); 6066 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6067 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6068 CGM.IntTy, /*isSigned=*/true); 6069 llvm::Value *Args[] = {IdentTLoc, GTid, 6070 llvm::ConstantInt::get(CGM.IntTy, 6071 IsWorksharingReduction ? 1 : 0, 6072 /*isSigned=*/true)}; 6073 (void)CGF.EmitRuntimeCall( 6074 OMPBuilder.getOrCreateRuntimeFunction( 6075 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6076 Args); 6077 } 6078 6079 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6080 SourceLocation Loc, 6081 ReductionCodeGen &RCG, 6082 unsigned N) { 6083 auto Sizes = RCG.getSizes(N); 6084 // Emit threadprivate global variable if the type is non-constant 6085 // (Sizes.second = nullptr). 6086 if (Sizes.second) { 6087 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6088 /*isSigned=*/false); 6089 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6090 CGF, CGM.getContext().getSizeType(), 6091 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6092 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6093 } 6094 } 6095 6096 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6097 SourceLocation Loc, 6098 llvm::Value *ReductionsPtr, 6099 LValue SharedLVal) { 6100 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6101 // *d); 6102 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6103 CGM.IntTy, 6104 /*isSigned=*/true), 6105 ReductionsPtr, 6106 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6107 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6108 return Address( 6109 CGF.EmitRuntimeCall( 6110 OMPBuilder.getOrCreateRuntimeFunction( 6111 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6112 Args), 6113 CGF.Int8Ty, SharedLVal.getAlignment()); 6114 } 6115 6116 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6117 const OMPTaskDataTy &Data) { 6118 if (!CGF.HaveInsertPoint()) 6119 return; 6120 6121 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6122 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6123 OMPBuilder.createTaskwait(CGF.Builder); 6124 } else { 6125 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6126 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6127 auto &M = CGM.getModule(); 6128 Address DependenciesArray = Address::invalid(); 6129 llvm::Value *NumOfElements; 6130 std::tie(NumOfElements, DependenciesArray) = 6131 emitDependClause(CGF, Data.Dependences, Loc); 6132 llvm::Value *DepWaitTaskArgs[6]; 6133 if (!Data.Dependences.empty()) { 6134 DepWaitTaskArgs[0] = UpLoc; 6135 DepWaitTaskArgs[1] = ThreadID; 6136 DepWaitTaskArgs[2] = NumOfElements; 6137 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6138 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6139 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6140 6141 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6142 6143 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6144 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6145 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6146 // is specified. 6147 CGF.EmitRuntimeCall( 6148 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6149 DepWaitTaskArgs); 6150 6151 } else { 6152 6153 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6154 // global_tid); 6155 llvm::Value *Args[] = {UpLoc, ThreadID}; 6156 // Ignore return result until untied tasks are supported. 6157 CGF.EmitRuntimeCall( 6158 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6159 Args); 6160 } 6161 } 6162 6163 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6164 Region->emitUntiedSwitch(CGF); 6165 } 6166 6167 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6168 OpenMPDirectiveKind InnerKind, 6169 const RegionCodeGenTy &CodeGen, 6170 bool HasCancel) { 6171 if (!CGF.HaveInsertPoint()) 6172 return; 6173 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6174 InnerKind != OMPD_critical && 6175 InnerKind != OMPD_master && 6176 InnerKind != OMPD_masked); 6177 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6178 } 6179 6180 namespace { 6181 enum RTCancelKind { 6182 CancelNoreq = 0, 6183 CancelParallel = 1, 6184 CancelLoop = 2, 6185 CancelSections = 3, 6186 CancelTaskgroup = 4 6187 }; 6188 } // anonymous namespace 6189 6190 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6191 RTCancelKind CancelKind = CancelNoreq; 6192 if (CancelRegion == OMPD_parallel) 6193 CancelKind = CancelParallel; 6194 else if (CancelRegion == OMPD_for) 6195 CancelKind = CancelLoop; 6196 else if (CancelRegion == OMPD_sections) 6197 CancelKind = CancelSections; 6198 else { 6199 assert(CancelRegion == OMPD_taskgroup); 6200 CancelKind = CancelTaskgroup; 6201 } 6202 return CancelKind; 6203 } 6204 6205 void CGOpenMPRuntime::emitCancellationPointCall( 6206 CodeGenFunction &CGF, SourceLocation Loc, 6207 OpenMPDirectiveKind CancelRegion) { 6208 if (!CGF.HaveInsertPoint()) 6209 return; 6210 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6211 // global_tid, kmp_int32 cncl_kind); 6212 if (auto *OMPRegionInfo = 6213 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6214 // For 'cancellation point taskgroup', the task region info may not have a 6215 // cancel. This may instead happen in another adjacent task. 6216 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6217 llvm::Value *Args[] = { 6218 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6219 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6220 // Ignore return result until untied tasks are supported. 6221 llvm::Value *Result = CGF.EmitRuntimeCall( 6222 OMPBuilder.getOrCreateRuntimeFunction( 6223 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6224 Args); 6225 // if (__kmpc_cancellationpoint()) { 6226 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6227 // exit from construct; 6228 // } 6229 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6230 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6231 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6232 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6233 CGF.EmitBlock(ExitBB); 6234 if (CancelRegion == OMPD_parallel) 6235 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6236 // exit from construct; 6237 CodeGenFunction::JumpDest CancelDest = 6238 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6239 CGF.EmitBranchThroughCleanup(CancelDest); 6240 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6241 } 6242 } 6243 } 6244 6245 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6246 const Expr *IfCond, 6247 OpenMPDirectiveKind CancelRegion) { 6248 if (!CGF.HaveInsertPoint()) 6249 return; 6250 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6251 // kmp_int32 cncl_kind); 6252 auto &M = CGM.getModule(); 6253 if (auto *OMPRegionInfo = 6254 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6255 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6256 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6257 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6258 llvm::Value *Args[] = { 6259 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6260 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6261 // Ignore return result until untied tasks are supported. 6262 llvm::Value *Result = CGF.EmitRuntimeCall( 6263 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6264 // if (__kmpc_cancel()) { 6265 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6266 // exit from construct; 6267 // } 6268 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6269 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6270 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6271 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6272 CGF.EmitBlock(ExitBB); 6273 if (CancelRegion == OMPD_parallel) 6274 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6275 // exit from construct; 6276 CodeGenFunction::JumpDest CancelDest = 6277 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6278 CGF.EmitBranchThroughCleanup(CancelDest); 6279 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6280 }; 6281 if (IfCond) { 6282 emitIfClause(CGF, IfCond, ThenGen, 6283 [](CodeGenFunction &, PrePostActionTy &) {}); 6284 } else { 6285 RegionCodeGenTy ThenRCG(ThenGen); 6286 ThenRCG(CGF); 6287 } 6288 } 6289 } 6290 6291 namespace { 6292 /// Cleanup action for uses_allocators support. 6293 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6294 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6295 6296 public: 6297 OMPUsesAllocatorsActionTy( 6298 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6299 : Allocators(Allocators) {} 6300 void Enter(CodeGenFunction &CGF) override { 6301 if (!CGF.HaveInsertPoint()) 6302 return; 6303 for (const auto &AllocatorData : Allocators) { 6304 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6305 CGF, AllocatorData.first, AllocatorData.second); 6306 } 6307 } 6308 void Exit(CodeGenFunction &CGF) override { 6309 if (!CGF.HaveInsertPoint()) 6310 return; 6311 for (const auto &AllocatorData : Allocators) { 6312 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6313 AllocatorData.first); 6314 } 6315 } 6316 }; 6317 } // namespace 6318 6319 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6320 const OMPExecutableDirective &D, StringRef ParentName, 6321 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6322 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6323 assert(!ParentName.empty() && "Invalid target region parent name!"); 6324 HasEmittedTargetRegion = true; 6325 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6326 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6327 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6328 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6329 if (!D.AllocatorTraits) 6330 continue; 6331 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6332 } 6333 } 6334 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6335 CodeGen.setAction(UsesAllocatorAction); 6336 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6337 IsOffloadEntry, CodeGen); 6338 } 6339 6340 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6341 const Expr *Allocator, 6342 const Expr *AllocatorTraits) { 6343 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6344 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6345 // Use default memspace handle. 6346 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6347 llvm::Value *NumTraits = llvm::ConstantInt::get( 6348 CGF.IntTy, cast<ConstantArrayType>( 6349 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6350 ->getSize() 6351 .getLimitedValue()); 6352 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6353 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6354 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6355 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6356 AllocatorTraitsLVal.getBaseInfo(), 6357 AllocatorTraitsLVal.getTBAAInfo()); 6358 llvm::Value *Traits = 6359 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6360 6361 llvm::Value *AllocatorVal = 6362 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6363 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6364 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6365 // Store to allocator. 6366 CGF.EmitVarDecl(*cast<VarDecl>( 6367 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6368 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6369 AllocatorVal = 6370 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6371 Allocator->getType(), Allocator->getExprLoc()); 6372 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6373 } 6374 6375 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6376 const Expr *Allocator) { 6377 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6378 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6379 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6380 llvm::Value *AllocatorVal = 6381 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6382 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6383 CGF.getContext().VoidPtrTy, 6384 Allocator->getExprLoc()); 6385 (void)CGF.EmitRuntimeCall( 6386 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6387 OMPRTL___kmpc_destroy_allocator), 6388 {ThreadId, AllocatorVal}); 6389 } 6390 6391 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6392 const OMPExecutableDirective &D, StringRef ParentName, 6393 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6394 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6395 // Create a unique name for the entry function using the source location 6396 // information of the current target region. The name will be something like: 6397 // 6398 // __omp_offloading_DD_FFFF_PP_lBB 6399 // 6400 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6401 // mangled name of the function that encloses the target region and BB is the 6402 // line number of the target region. 6403 6404 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6405 !CGM.getLangOpts().OpenMPOffloadMandatory; 6406 unsigned DeviceID; 6407 unsigned FileID; 6408 unsigned Line; 6409 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6410 Line); 6411 SmallString<64> EntryFnName; 6412 { 6413 llvm::raw_svector_ostream OS(EntryFnName); 6414 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6415 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6416 } 6417 6418 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6419 6420 CodeGenFunction CGF(CGM, true); 6421 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6422 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6423 6424 if (BuildOutlinedFn) 6425 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6426 6427 // If this target outline function is not an offload entry, we don't need to 6428 // register it. 6429 if (!IsOffloadEntry) 6430 return; 6431 6432 // The target region ID is used by the runtime library to identify the current 6433 // target region, so it only has to be unique and not necessarily point to 6434 // anything. It could be the pointer to the outlined function that implements 6435 // the target region, but we aren't using that so that the compiler doesn't 6436 // need to keep that, and could therefore inline the host function if proven 6437 // worthwhile during optimization. In the other hand, if emitting code for the 6438 // device, the ID has to be the function address so that it can retrieved from 6439 // the offloading entry and launched by the runtime library. We also mark the 6440 // outlined function to have external linkage in case we are emitting code for 6441 // the device, because these functions will be entry points to the device. 6442 6443 if (CGM.getLangOpts().OpenMPIsDevice) { 6444 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6445 OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage); 6446 OutlinedFn->setDSOLocal(false); 6447 if (CGM.getTriple().isAMDGCN()) 6448 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6449 } else { 6450 std::string Name = getName({EntryFnName, "region_id"}); 6451 OutlinedFnID = new llvm::GlobalVariable( 6452 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6453 llvm::GlobalValue::WeakAnyLinkage, 6454 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6455 } 6456 6457 // If we do not allow host fallback we still need a named address to use. 6458 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6459 if (!BuildOutlinedFn) { 6460 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6461 "Named kernel already exists?"); 6462 TargetRegionEntryAddr = new llvm::GlobalVariable( 6463 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6464 llvm::GlobalValue::InternalLinkage, 6465 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6466 } 6467 6468 // Register the information for the entry associated with this target region. 6469 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6470 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6471 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6472 6473 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6474 int32_t DefaultValTeams = -1; 6475 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6476 if (DefaultValTeams > 0 && OutlinedFn) { 6477 OutlinedFn->addFnAttr("omp_target_num_teams", 6478 std::to_string(DefaultValTeams)); 6479 } 6480 int32_t DefaultValThreads = -1; 6481 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6482 if (DefaultValThreads > 0 && OutlinedFn) { 6483 OutlinedFn->addFnAttr("omp_target_thread_limit", 6484 std::to_string(DefaultValThreads)); 6485 } 6486 6487 if (BuildOutlinedFn) 6488 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6489 } 6490 6491 /// Checks if the expression is constant or does not have non-trivial function 6492 /// calls. 6493 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6494 // We can skip constant expressions. 6495 // We can skip expressions with trivial calls or simple expressions. 6496 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6497 !E->hasNonTrivialCall(Ctx)) && 6498 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6499 } 6500 6501 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6502 const Stmt *Body) { 6503 const Stmt *Child = Body->IgnoreContainers(); 6504 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6505 Child = nullptr; 6506 for (const Stmt *S : C->body()) { 6507 if (const auto *E = dyn_cast<Expr>(S)) { 6508 if (isTrivial(Ctx, E)) 6509 continue; 6510 } 6511 // Some of the statements can be ignored. 6512 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6513 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6514 continue; 6515 // Analyze declarations. 6516 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6517 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6518 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6519 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6520 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6521 isa<UsingDirectiveDecl>(D) || 6522 isa<OMPDeclareReductionDecl>(D) || 6523 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6524 return true; 6525 const auto *VD = dyn_cast<VarDecl>(D); 6526 if (!VD) 6527 return false; 6528 return VD->hasGlobalStorage() || !VD->isUsed(); 6529 })) 6530 continue; 6531 } 6532 // Found multiple children - cannot get the one child only. 6533 if (Child) 6534 return nullptr; 6535 Child = S; 6536 } 6537 if (Child) 6538 Child = Child->IgnoreContainers(); 6539 } 6540 return Child; 6541 } 6542 6543 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6544 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6545 int32_t &DefaultVal) { 6546 6547 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6548 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6549 "Expected target-based executable directive."); 6550 switch (DirectiveKind) { 6551 case OMPD_target: { 6552 const auto *CS = D.getInnermostCapturedStmt(); 6553 const auto *Body = 6554 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6555 const Stmt *ChildStmt = 6556 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6557 if (const auto *NestedDir = 6558 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6559 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6560 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6561 const Expr *NumTeams = 6562 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6563 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6564 if (auto Constant = 6565 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6566 DefaultVal = Constant->getExtValue(); 6567 return NumTeams; 6568 } 6569 DefaultVal = 0; 6570 return nullptr; 6571 } 6572 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6573 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6574 DefaultVal = 1; 6575 return nullptr; 6576 } 6577 DefaultVal = 1; 6578 return nullptr; 6579 } 6580 // A value of -1 is used to check if we need to emit no teams region 6581 DefaultVal = -1; 6582 return nullptr; 6583 } 6584 case OMPD_target_teams: 6585 case OMPD_target_teams_distribute: 6586 case OMPD_target_teams_distribute_simd: 6587 case OMPD_target_teams_distribute_parallel_for: 6588 case OMPD_target_teams_distribute_parallel_for_simd: { 6589 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6590 const Expr *NumTeams = 6591 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6592 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6593 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6594 DefaultVal = Constant->getExtValue(); 6595 return NumTeams; 6596 } 6597 DefaultVal = 0; 6598 return nullptr; 6599 } 6600 case OMPD_target_parallel: 6601 case OMPD_target_parallel_for: 6602 case OMPD_target_parallel_for_simd: 6603 case OMPD_target_simd: 6604 DefaultVal = 1; 6605 return nullptr; 6606 case OMPD_parallel: 6607 case OMPD_for: 6608 case OMPD_parallel_for: 6609 case OMPD_parallel_master: 6610 case OMPD_parallel_sections: 6611 case OMPD_for_simd: 6612 case OMPD_parallel_for_simd: 6613 case OMPD_cancel: 6614 case OMPD_cancellation_point: 6615 case OMPD_ordered: 6616 case OMPD_threadprivate: 6617 case OMPD_allocate: 6618 case OMPD_task: 6619 case OMPD_simd: 6620 case OMPD_tile: 6621 case OMPD_unroll: 6622 case OMPD_sections: 6623 case OMPD_section: 6624 case OMPD_single: 6625 case OMPD_master: 6626 case OMPD_critical: 6627 case OMPD_taskyield: 6628 case OMPD_barrier: 6629 case OMPD_taskwait: 6630 case OMPD_taskgroup: 6631 case OMPD_atomic: 6632 case OMPD_flush: 6633 case OMPD_depobj: 6634 case OMPD_scan: 6635 case OMPD_teams: 6636 case OMPD_target_data: 6637 case OMPD_target_exit_data: 6638 case OMPD_target_enter_data: 6639 case OMPD_distribute: 6640 case OMPD_distribute_simd: 6641 case OMPD_distribute_parallel_for: 6642 case OMPD_distribute_parallel_for_simd: 6643 case OMPD_teams_distribute: 6644 case OMPD_teams_distribute_simd: 6645 case OMPD_teams_distribute_parallel_for: 6646 case OMPD_teams_distribute_parallel_for_simd: 6647 case OMPD_target_update: 6648 case OMPD_declare_simd: 6649 case OMPD_declare_variant: 6650 case OMPD_begin_declare_variant: 6651 case OMPD_end_declare_variant: 6652 case OMPD_declare_target: 6653 case OMPD_end_declare_target: 6654 case OMPD_declare_reduction: 6655 case OMPD_declare_mapper: 6656 case OMPD_taskloop: 6657 case OMPD_taskloop_simd: 6658 case OMPD_master_taskloop: 6659 case OMPD_master_taskloop_simd: 6660 case OMPD_parallel_master_taskloop: 6661 case OMPD_parallel_master_taskloop_simd: 6662 case OMPD_requires: 6663 case OMPD_metadirective: 6664 case OMPD_unknown: 6665 break; 6666 default: 6667 break; 6668 } 6669 llvm_unreachable("Unexpected directive kind."); 6670 } 6671 6672 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6673 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6674 assert(!CGF.getLangOpts().OpenMPIsDevice && 6675 "Clauses associated with the teams directive expected to be emitted " 6676 "only for the host!"); 6677 CGBuilderTy &Bld = CGF.Builder; 6678 int32_t DefaultNT = -1; 6679 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6680 if (NumTeams != nullptr) { 6681 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6682 6683 switch (DirectiveKind) { 6684 case OMPD_target: { 6685 const auto *CS = D.getInnermostCapturedStmt(); 6686 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6687 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6688 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6689 /*IgnoreResultAssign*/ true); 6690 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6691 /*isSigned=*/true); 6692 } 6693 case OMPD_target_teams: 6694 case OMPD_target_teams_distribute: 6695 case OMPD_target_teams_distribute_simd: 6696 case OMPD_target_teams_distribute_parallel_for: 6697 case OMPD_target_teams_distribute_parallel_for_simd: { 6698 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6699 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6700 /*IgnoreResultAssign*/ true); 6701 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6702 /*isSigned=*/true); 6703 } 6704 default: 6705 break; 6706 } 6707 } else if (DefaultNT == -1) { 6708 return nullptr; 6709 } 6710 6711 return Bld.getInt32(DefaultNT); 6712 } 6713 6714 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6715 llvm::Value *DefaultThreadLimitVal) { 6716 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6717 CGF.getContext(), CS->getCapturedStmt()); 6718 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6719 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6720 llvm::Value *NumThreads = nullptr; 6721 llvm::Value *CondVal = nullptr; 6722 // Handle if clause. If if clause present, the number of threads is 6723 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6724 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6725 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6726 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6727 const OMPIfClause *IfClause = nullptr; 6728 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6729 if (C->getNameModifier() == OMPD_unknown || 6730 C->getNameModifier() == OMPD_parallel) { 6731 IfClause = C; 6732 break; 6733 } 6734 } 6735 if (IfClause) { 6736 const Expr *Cond = IfClause->getCondition(); 6737 bool Result; 6738 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6739 if (!Result) 6740 return CGF.Builder.getInt32(1); 6741 } else { 6742 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6743 if (const auto *PreInit = 6744 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6745 for (const auto *I : PreInit->decls()) { 6746 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6747 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6748 } else { 6749 CodeGenFunction::AutoVarEmission Emission = 6750 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6751 CGF.EmitAutoVarCleanups(Emission); 6752 } 6753 } 6754 } 6755 CondVal = CGF.EvaluateExprAsBool(Cond); 6756 } 6757 } 6758 } 6759 // Check the value of num_threads clause iff if clause was not specified 6760 // or is not evaluated to false. 6761 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6762 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6763 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6764 const auto *NumThreadsClause = 6765 Dir->getSingleClause<OMPNumThreadsClause>(); 6766 CodeGenFunction::LexicalScope Scope( 6767 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6768 if (const auto *PreInit = 6769 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6770 for (const auto *I : PreInit->decls()) { 6771 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6772 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6773 } else { 6774 CodeGenFunction::AutoVarEmission Emission = 6775 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6776 CGF.EmitAutoVarCleanups(Emission); 6777 } 6778 } 6779 } 6780 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6781 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6782 /*isSigned=*/false); 6783 if (DefaultThreadLimitVal) 6784 NumThreads = CGF.Builder.CreateSelect( 6785 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6786 DefaultThreadLimitVal, NumThreads); 6787 } else { 6788 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6789 : CGF.Builder.getInt32(0); 6790 } 6791 // Process condition of the if clause. 6792 if (CondVal) { 6793 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6794 CGF.Builder.getInt32(1)); 6795 } 6796 return NumThreads; 6797 } 6798 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6799 return CGF.Builder.getInt32(1); 6800 return DefaultThreadLimitVal; 6801 } 6802 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6803 : CGF.Builder.getInt32(0); 6804 } 6805 6806 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6807 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6808 int32_t &DefaultVal) { 6809 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6810 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6811 "Expected target-based executable directive."); 6812 6813 switch (DirectiveKind) { 6814 case OMPD_target: 6815 // Teams have no clause thread_limit 6816 return nullptr; 6817 case OMPD_target_teams: 6818 case OMPD_target_teams_distribute: 6819 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6820 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6821 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6822 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6823 if (auto Constant = 6824 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6825 DefaultVal = Constant->getExtValue(); 6826 return ThreadLimit; 6827 } 6828 return nullptr; 6829 case OMPD_target_parallel: 6830 case OMPD_target_parallel_for: 6831 case OMPD_target_parallel_for_simd: 6832 case OMPD_target_teams_distribute_parallel_for: 6833 case OMPD_target_teams_distribute_parallel_for_simd: { 6834 Expr *ThreadLimit = nullptr; 6835 Expr *NumThreads = nullptr; 6836 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6837 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6838 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6839 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6840 if (auto Constant = 6841 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6842 DefaultVal = Constant->getExtValue(); 6843 } 6844 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6845 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6846 NumThreads = NumThreadsClause->getNumThreads(); 6847 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6848 if (auto Constant = 6849 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6850 if (Constant->getExtValue() < DefaultVal) { 6851 DefaultVal = Constant->getExtValue(); 6852 ThreadLimit = NumThreads; 6853 } 6854 } 6855 } 6856 } 6857 return ThreadLimit; 6858 } 6859 case OMPD_target_teams_distribute_simd: 6860 case OMPD_target_simd: 6861 DefaultVal = 1; 6862 return nullptr; 6863 case OMPD_parallel: 6864 case OMPD_for: 6865 case OMPD_parallel_for: 6866 case OMPD_parallel_master: 6867 case OMPD_parallel_sections: 6868 case OMPD_for_simd: 6869 case OMPD_parallel_for_simd: 6870 case OMPD_cancel: 6871 case OMPD_cancellation_point: 6872 case OMPD_ordered: 6873 case OMPD_threadprivate: 6874 case OMPD_allocate: 6875 case OMPD_task: 6876 case OMPD_simd: 6877 case OMPD_tile: 6878 case OMPD_unroll: 6879 case OMPD_sections: 6880 case OMPD_section: 6881 case OMPD_single: 6882 case OMPD_master: 6883 case OMPD_critical: 6884 case OMPD_taskyield: 6885 case OMPD_barrier: 6886 case OMPD_taskwait: 6887 case OMPD_taskgroup: 6888 case OMPD_atomic: 6889 case OMPD_flush: 6890 case OMPD_depobj: 6891 case OMPD_scan: 6892 case OMPD_teams: 6893 case OMPD_target_data: 6894 case OMPD_target_exit_data: 6895 case OMPD_target_enter_data: 6896 case OMPD_distribute: 6897 case OMPD_distribute_simd: 6898 case OMPD_distribute_parallel_for: 6899 case OMPD_distribute_parallel_for_simd: 6900 case OMPD_teams_distribute: 6901 case OMPD_teams_distribute_simd: 6902 case OMPD_teams_distribute_parallel_for: 6903 case OMPD_teams_distribute_parallel_for_simd: 6904 case OMPD_target_update: 6905 case OMPD_declare_simd: 6906 case OMPD_declare_variant: 6907 case OMPD_begin_declare_variant: 6908 case OMPD_end_declare_variant: 6909 case OMPD_declare_target: 6910 case OMPD_end_declare_target: 6911 case OMPD_declare_reduction: 6912 case OMPD_declare_mapper: 6913 case OMPD_taskloop: 6914 case OMPD_taskloop_simd: 6915 case OMPD_master_taskloop: 6916 case OMPD_master_taskloop_simd: 6917 case OMPD_parallel_master_taskloop: 6918 case OMPD_parallel_master_taskloop_simd: 6919 case OMPD_requires: 6920 case OMPD_unknown: 6921 break; 6922 default: 6923 break; 6924 } 6925 llvm_unreachable("Unsupported directive kind."); 6926 } 6927 6928 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6929 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6930 assert(!CGF.getLangOpts().OpenMPIsDevice && 6931 "Clauses associated with the teams directive expected to be emitted " 6932 "only for the host!"); 6933 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6934 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6935 "Expected target-based executable directive."); 6936 CGBuilderTy &Bld = CGF.Builder; 6937 llvm::Value *ThreadLimitVal = nullptr; 6938 llvm::Value *NumThreadsVal = nullptr; 6939 switch (DirectiveKind) { 6940 case OMPD_target: { 6941 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6942 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6943 return NumThreads; 6944 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6945 CGF.getContext(), CS->getCapturedStmt()); 6946 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6947 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6948 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6950 const auto *ThreadLimitClause = 6951 Dir->getSingleClause<OMPThreadLimitClause>(); 6952 CodeGenFunction::LexicalScope Scope( 6953 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6954 if (const auto *PreInit = 6955 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6956 for (const auto *I : PreInit->decls()) { 6957 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6958 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6959 } else { 6960 CodeGenFunction::AutoVarEmission Emission = 6961 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6962 CGF.EmitAutoVarCleanups(Emission); 6963 } 6964 } 6965 } 6966 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6967 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6968 ThreadLimitVal = 6969 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6970 } 6971 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6972 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6973 CS = Dir->getInnermostCapturedStmt(); 6974 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6975 CGF.getContext(), CS->getCapturedStmt()); 6976 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6977 } 6978 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6979 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6980 CS = Dir->getInnermostCapturedStmt(); 6981 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6982 return NumThreads; 6983 } 6984 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6985 return Bld.getInt32(1); 6986 } 6987 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6988 } 6989 case OMPD_target_teams: { 6990 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6991 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6992 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6993 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6994 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6995 ThreadLimitVal = 6996 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6997 } 6998 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6999 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7000 return NumThreads; 7001 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7002 CGF.getContext(), CS->getCapturedStmt()); 7003 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7004 if (Dir->getDirectiveKind() == OMPD_distribute) { 7005 CS = Dir->getInnermostCapturedStmt(); 7006 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7007 return NumThreads; 7008 } 7009 } 7010 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7011 } 7012 case OMPD_target_teams_distribute: 7013 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7014 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7015 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7016 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7017 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7018 ThreadLimitVal = 7019 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7020 } 7021 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7022 case OMPD_target_parallel: 7023 case OMPD_target_parallel_for: 7024 case OMPD_target_parallel_for_simd: 7025 case OMPD_target_teams_distribute_parallel_for: 7026 case OMPD_target_teams_distribute_parallel_for_simd: { 7027 llvm::Value *CondVal = nullptr; 7028 // Handle if clause. If if clause present, the number of threads is 7029 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7030 if (D.hasClausesOfKind<OMPIfClause>()) { 7031 const OMPIfClause *IfClause = nullptr; 7032 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7033 if (C->getNameModifier() == OMPD_unknown || 7034 C->getNameModifier() == OMPD_parallel) { 7035 IfClause = C; 7036 break; 7037 } 7038 } 7039 if (IfClause) { 7040 const Expr *Cond = IfClause->getCondition(); 7041 bool Result; 7042 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7043 if (!Result) 7044 return Bld.getInt32(1); 7045 } else { 7046 CodeGenFunction::RunCleanupsScope Scope(CGF); 7047 CondVal = CGF.EvaluateExprAsBool(Cond); 7048 } 7049 } 7050 } 7051 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7052 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7053 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7054 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7055 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7056 ThreadLimitVal = 7057 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7058 } 7059 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7060 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7061 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7062 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7063 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7064 NumThreadsVal = 7065 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7066 ThreadLimitVal = ThreadLimitVal 7067 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7068 ThreadLimitVal), 7069 NumThreadsVal, ThreadLimitVal) 7070 : NumThreadsVal; 7071 } 7072 if (!ThreadLimitVal) 7073 ThreadLimitVal = Bld.getInt32(0); 7074 if (CondVal) 7075 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7076 return ThreadLimitVal; 7077 } 7078 case OMPD_target_teams_distribute_simd: 7079 case OMPD_target_simd: 7080 return Bld.getInt32(1); 7081 case OMPD_parallel: 7082 case OMPD_for: 7083 case OMPD_parallel_for: 7084 case OMPD_parallel_master: 7085 case OMPD_parallel_sections: 7086 case OMPD_for_simd: 7087 case OMPD_parallel_for_simd: 7088 case OMPD_cancel: 7089 case OMPD_cancellation_point: 7090 case OMPD_ordered: 7091 case OMPD_threadprivate: 7092 case OMPD_allocate: 7093 case OMPD_task: 7094 case OMPD_simd: 7095 case OMPD_tile: 7096 case OMPD_unroll: 7097 case OMPD_sections: 7098 case OMPD_section: 7099 case OMPD_single: 7100 case OMPD_master: 7101 case OMPD_critical: 7102 case OMPD_taskyield: 7103 case OMPD_barrier: 7104 case OMPD_taskwait: 7105 case OMPD_taskgroup: 7106 case OMPD_atomic: 7107 case OMPD_flush: 7108 case OMPD_depobj: 7109 case OMPD_scan: 7110 case OMPD_teams: 7111 case OMPD_target_data: 7112 case OMPD_target_exit_data: 7113 case OMPD_target_enter_data: 7114 case OMPD_distribute: 7115 case OMPD_distribute_simd: 7116 case OMPD_distribute_parallel_for: 7117 case OMPD_distribute_parallel_for_simd: 7118 case OMPD_teams_distribute: 7119 case OMPD_teams_distribute_simd: 7120 case OMPD_teams_distribute_parallel_for: 7121 case OMPD_teams_distribute_parallel_for_simd: 7122 case OMPD_target_update: 7123 case OMPD_declare_simd: 7124 case OMPD_declare_variant: 7125 case OMPD_begin_declare_variant: 7126 case OMPD_end_declare_variant: 7127 case OMPD_declare_target: 7128 case OMPD_end_declare_target: 7129 case OMPD_declare_reduction: 7130 case OMPD_declare_mapper: 7131 case OMPD_taskloop: 7132 case OMPD_taskloop_simd: 7133 case OMPD_master_taskloop: 7134 case OMPD_master_taskloop_simd: 7135 case OMPD_parallel_master_taskloop: 7136 case OMPD_parallel_master_taskloop_simd: 7137 case OMPD_requires: 7138 case OMPD_metadirective: 7139 case OMPD_unknown: 7140 break; 7141 default: 7142 break; 7143 } 7144 llvm_unreachable("Unsupported directive kind."); 7145 } 7146 7147 namespace { 7148 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7149 7150 // Utility to handle information from clauses associated with a given 7151 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7152 // It provides a convenient interface to obtain the information and generate 7153 // code for that information. 7154 class MappableExprsHandler { 7155 public: 7156 /// Values for bit flags used to specify the mapping type for 7157 /// offloading. 7158 enum OpenMPOffloadMappingFlags : uint64_t { 7159 /// No flags 7160 OMP_MAP_NONE = 0x0, 7161 /// Allocate memory on the device and move data from host to device. 7162 OMP_MAP_TO = 0x01, 7163 /// Allocate memory on the device and move data from device to host. 7164 OMP_MAP_FROM = 0x02, 7165 /// Always perform the requested mapping action on the element, even 7166 /// if it was already mapped before. 7167 OMP_MAP_ALWAYS = 0x04, 7168 /// Delete the element from the device environment, ignoring the 7169 /// current reference count associated with the element. 7170 OMP_MAP_DELETE = 0x08, 7171 /// The element being mapped is a pointer-pointee pair; both the 7172 /// pointer and the pointee should be mapped. 7173 OMP_MAP_PTR_AND_OBJ = 0x10, 7174 /// This flags signals that the base address of an entry should be 7175 /// passed to the target kernel as an argument. 7176 OMP_MAP_TARGET_PARAM = 0x20, 7177 /// Signal that the runtime library has to return the device pointer 7178 /// in the current position for the data being mapped. Used when we have the 7179 /// use_device_ptr or use_device_addr clause. 7180 OMP_MAP_RETURN_PARAM = 0x40, 7181 /// This flag signals that the reference being passed is a pointer to 7182 /// private data. 7183 OMP_MAP_PRIVATE = 0x80, 7184 /// Pass the element to the device by value. 7185 OMP_MAP_LITERAL = 0x100, 7186 /// Implicit map 7187 OMP_MAP_IMPLICIT = 0x200, 7188 /// Close is a hint to the runtime to allocate memory close to 7189 /// the target device. 7190 OMP_MAP_CLOSE = 0x400, 7191 /// 0x800 is reserved for compatibility with XLC. 7192 /// Produce a runtime error if the data is not already allocated. 7193 OMP_MAP_PRESENT = 0x1000, 7194 // Increment and decrement a separate reference counter so that the data 7195 // cannot be unmapped within the associated region. Thus, this flag is 7196 // intended to be used on 'target' and 'target data' directives because they 7197 // are inherently structured. It is not intended to be used on 'target 7198 // enter data' and 'target exit data' directives because they are inherently 7199 // dynamic. 7200 // This is an OpenMP extension for the sake of OpenACC support. 7201 OMP_MAP_OMPX_HOLD = 0x2000, 7202 /// Signal that the runtime library should use args as an array of 7203 /// descriptor_dim pointers and use args_size as dims. Used when we have 7204 /// non-contiguous list items in target update directive 7205 OMP_MAP_NON_CONTIG = 0x100000000000, 7206 /// The 16 MSBs of the flags indicate whether the entry is member of some 7207 /// struct/class. 7208 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7209 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7210 }; 7211 7212 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7213 static unsigned getFlagMemberOffset() { 7214 unsigned Offset = 0; 7215 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7216 Remain = Remain >> 1) 7217 Offset++; 7218 return Offset; 7219 } 7220 7221 /// Class that holds debugging information for a data mapping to be passed to 7222 /// the runtime library. 7223 class MappingExprInfo { 7224 /// The variable declaration used for the data mapping. 7225 const ValueDecl *MapDecl = nullptr; 7226 /// The original expression used in the map clause, or null if there is 7227 /// none. 7228 const Expr *MapExpr = nullptr; 7229 7230 public: 7231 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7232 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7233 7234 const ValueDecl *getMapDecl() const { return MapDecl; } 7235 const Expr *getMapExpr() const { return MapExpr; } 7236 }; 7237 7238 /// Class that associates information with a base pointer to be passed to the 7239 /// runtime library. 7240 class BasePointerInfo { 7241 /// The base pointer. 7242 llvm::Value *Ptr = nullptr; 7243 /// The base declaration that refers to this device pointer, or null if 7244 /// there is none. 7245 const ValueDecl *DevPtrDecl = nullptr; 7246 7247 public: 7248 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7249 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7250 llvm::Value *operator*() const { return Ptr; } 7251 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7252 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7253 }; 7254 7255 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7256 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7257 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7258 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7259 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7260 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7261 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7262 7263 /// This structure contains combined information generated for mappable 7264 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7265 /// mappers, and non-contiguous information. 7266 struct MapCombinedInfoTy { 7267 struct StructNonContiguousInfo { 7268 bool IsNonContiguous = false; 7269 MapDimArrayTy Dims; 7270 MapNonContiguousArrayTy Offsets; 7271 MapNonContiguousArrayTy Counts; 7272 MapNonContiguousArrayTy Strides; 7273 }; 7274 MapExprsArrayTy Exprs; 7275 MapBaseValuesArrayTy BasePointers; 7276 MapValuesArrayTy Pointers; 7277 MapValuesArrayTy Sizes; 7278 MapFlagsArrayTy Types; 7279 MapMappersArrayTy Mappers; 7280 StructNonContiguousInfo NonContigInfo; 7281 7282 /// Append arrays in \a CurInfo. 7283 void append(MapCombinedInfoTy &CurInfo) { 7284 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7285 BasePointers.append(CurInfo.BasePointers.begin(), 7286 CurInfo.BasePointers.end()); 7287 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7288 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7289 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7290 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7291 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7292 CurInfo.NonContigInfo.Dims.end()); 7293 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7294 CurInfo.NonContigInfo.Offsets.end()); 7295 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7296 CurInfo.NonContigInfo.Counts.end()); 7297 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7298 CurInfo.NonContigInfo.Strides.end()); 7299 } 7300 }; 7301 7302 /// Map between a struct and the its lowest & highest elements which have been 7303 /// mapped. 7304 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7305 /// HE(FieldIndex, Pointer)} 7306 struct StructRangeInfoTy { 7307 MapCombinedInfoTy PreliminaryMapData; 7308 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7309 0, Address::invalid()}; 7310 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7311 0, Address::invalid()}; 7312 Address Base = Address::invalid(); 7313 Address LB = Address::invalid(); 7314 bool IsArraySection = false; 7315 bool HasCompleteRecord = false; 7316 }; 7317 7318 private: 7319 /// Kind that defines how a device pointer has to be returned. 7320 struct MapInfo { 7321 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7322 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7323 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7324 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7325 bool ReturnDevicePointer = false; 7326 bool IsImplicit = false; 7327 const ValueDecl *Mapper = nullptr; 7328 const Expr *VarRef = nullptr; 7329 bool ForDeviceAddr = false; 7330 7331 MapInfo() = default; 7332 MapInfo( 7333 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7334 OpenMPMapClauseKind MapType, 7335 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7336 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7337 bool ReturnDevicePointer, bool IsImplicit, 7338 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7339 bool ForDeviceAddr = false) 7340 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7341 MotionModifiers(MotionModifiers), 7342 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7343 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7344 }; 7345 7346 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7347 /// member and there is no map information about it, then emission of that 7348 /// entry is deferred until the whole struct has been processed. 7349 struct DeferredDevicePtrEntryTy { 7350 const Expr *IE = nullptr; 7351 const ValueDecl *VD = nullptr; 7352 bool ForDeviceAddr = false; 7353 7354 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7355 bool ForDeviceAddr) 7356 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7357 }; 7358 7359 /// The target directive from where the mappable clauses were extracted. It 7360 /// is either a executable directive or a user-defined mapper directive. 7361 llvm::PointerUnion<const OMPExecutableDirective *, 7362 const OMPDeclareMapperDecl *> 7363 CurDir; 7364 7365 /// Function the directive is being generated for. 7366 CodeGenFunction &CGF; 7367 7368 /// Set of all first private variables in the current directive. 7369 /// bool data is set to true if the variable is implicitly marked as 7370 /// firstprivate, false otherwise. 7371 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7372 7373 /// Map between device pointer declarations and their expression components. 7374 /// The key value for declarations in 'this' is null. 7375 llvm::DenseMap< 7376 const ValueDecl *, 7377 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7378 DevPointersMap; 7379 7380 /// Map between lambda declarations and their map type. 7381 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7382 7383 llvm::Value *getExprTypeSize(const Expr *E) const { 7384 QualType ExprTy = E->getType().getCanonicalType(); 7385 7386 // Calculate the size for array shaping expression. 7387 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7388 llvm::Value *Size = 7389 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7390 for (const Expr *SE : OAE->getDimensions()) { 7391 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7392 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7393 CGF.getContext().getSizeType(), 7394 SE->getExprLoc()); 7395 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7396 } 7397 return Size; 7398 } 7399 7400 // Reference types are ignored for mapping purposes. 7401 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7402 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7403 7404 // Given that an array section is considered a built-in type, we need to 7405 // do the calculation based on the length of the section instead of relying 7406 // on CGF.getTypeSize(E->getType()). 7407 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7408 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7409 OAE->getBase()->IgnoreParenImpCasts()) 7410 .getCanonicalType(); 7411 7412 // If there is no length associated with the expression and lower bound is 7413 // not specified too, that means we are using the whole length of the 7414 // base. 7415 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7416 !OAE->getLowerBound()) 7417 return CGF.getTypeSize(BaseTy); 7418 7419 llvm::Value *ElemSize; 7420 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7421 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7422 } else { 7423 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7424 assert(ATy && "Expecting array type if not a pointer type."); 7425 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7426 } 7427 7428 // If we don't have a length at this point, that is because we have an 7429 // array section with a single element. 7430 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7431 return ElemSize; 7432 7433 if (const Expr *LenExpr = OAE->getLength()) { 7434 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7435 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7436 CGF.getContext().getSizeType(), 7437 LenExpr->getExprLoc()); 7438 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7439 } 7440 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7441 OAE->getLowerBound() && "expected array_section[lb:]."); 7442 // Size = sizetype - lb * elemtype; 7443 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7444 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7445 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7446 CGF.getContext().getSizeType(), 7447 OAE->getLowerBound()->getExprLoc()); 7448 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7449 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7450 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7451 LengthVal = CGF.Builder.CreateSelect( 7452 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7453 return LengthVal; 7454 } 7455 return CGF.getTypeSize(ExprTy); 7456 } 7457 7458 /// Return the corresponding bits for a given map clause modifier. Add 7459 /// a flag marking the map as a pointer if requested. Add a flag marking the 7460 /// map as the first one of a series of maps that relate to the same map 7461 /// expression. 7462 OpenMPOffloadMappingFlags getMapTypeBits( 7463 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7464 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7465 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7466 OpenMPOffloadMappingFlags Bits = 7467 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7468 switch (MapType) { 7469 case OMPC_MAP_alloc: 7470 case OMPC_MAP_release: 7471 // alloc and release is the default behavior in the runtime library, i.e. 7472 // if we don't pass any bits alloc/release that is what the runtime is 7473 // going to do. Therefore, we don't need to signal anything for these two 7474 // type modifiers. 7475 break; 7476 case OMPC_MAP_to: 7477 Bits |= OMP_MAP_TO; 7478 break; 7479 case OMPC_MAP_from: 7480 Bits |= OMP_MAP_FROM; 7481 break; 7482 case OMPC_MAP_tofrom: 7483 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7484 break; 7485 case OMPC_MAP_delete: 7486 Bits |= OMP_MAP_DELETE; 7487 break; 7488 case OMPC_MAP_unknown: 7489 llvm_unreachable("Unexpected map type!"); 7490 } 7491 if (AddPtrFlag) 7492 Bits |= OMP_MAP_PTR_AND_OBJ; 7493 if (AddIsTargetParamFlag) 7494 Bits |= OMP_MAP_TARGET_PARAM; 7495 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7496 Bits |= OMP_MAP_ALWAYS; 7497 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7498 Bits |= OMP_MAP_CLOSE; 7499 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7500 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7501 Bits |= OMP_MAP_PRESENT; 7502 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7503 Bits |= OMP_MAP_OMPX_HOLD; 7504 if (IsNonContiguous) 7505 Bits |= OMP_MAP_NON_CONTIG; 7506 return Bits; 7507 } 7508 7509 /// Return true if the provided expression is a final array section. A 7510 /// final array section, is one whose length can't be proved to be one. 7511 bool isFinalArraySectionExpression(const Expr *E) const { 7512 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7513 7514 // It is not an array section and therefore not a unity-size one. 7515 if (!OASE) 7516 return false; 7517 7518 // An array section with no colon always refer to a single element. 7519 if (OASE->getColonLocFirst().isInvalid()) 7520 return false; 7521 7522 const Expr *Length = OASE->getLength(); 7523 7524 // If we don't have a length we have to check if the array has size 1 7525 // for this dimension. Also, we should always expect a length if the 7526 // base type is pointer. 7527 if (!Length) { 7528 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7529 OASE->getBase()->IgnoreParenImpCasts()) 7530 .getCanonicalType(); 7531 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7532 return ATy->getSize().getSExtValue() != 1; 7533 // If we don't have a constant dimension length, we have to consider 7534 // the current section as having any size, so it is not necessarily 7535 // unitary. If it happen to be unity size, that's user fault. 7536 return true; 7537 } 7538 7539 // Check if the length evaluates to 1. 7540 Expr::EvalResult Result; 7541 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7542 return true; // Can have more that size 1. 7543 7544 llvm::APSInt ConstLength = Result.Val.getInt(); 7545 return ConstLength.getSExtValue() != 1; 7546 } 7547 7548 /// Generate the base pointers, section pointers, sizes, map type bits, and 7549 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7550 /// map type, map or motion modifiers, and expression components. 7551 /// \a IsFirstComponent should be set to true if the provided set of 7552 /// components is the first associated with a capture. 7553 void generateInfoForComponentList( 7554 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7555 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7556 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7557 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7558 bool IsFirstComponentList, bool IsImplicit, 7559 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7560 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7561 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7562 OverlappedElements = llvm::None) const { 7563 // The following summarizes what has to be generated for each map and the 7564 // types below. The generated information is expressed in this order: 7565 // base pointer, section pointer, size, flags 7566 // (to add to the ones that come from the map type and modifier). 7567 // 7568 // double d; 7569 // int i[100]; 7570 // float *p; 7571 // 7572 // struct S1 { 7573 // int i; 7574 // float f[50]; 7575 // } 7576 // struct S2 { 7577 // int i; 7578 // float f[50]; 7579 // S1 s; 7580 // double *p; 7581 // struct S2 *ps; 7582 // int &ref; 7583 // } 7584 // S2 s; 7585 // S2 *ps; 7586 // 7587 // map(d) 7588 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7589 // 7590 // map(i) 7591 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7592 // 7593 // map(i[1:23]) 7594 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7595 // 7596 // map(p) 7597 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7598 // 7599 // map(p[1:24]) 7600 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7601 // in unified shared memory mode or for local pointers 7602 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7603 // 7604 // map(s) 7605 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7606 // 7607 // map(s.i) 7608 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7609 // 7610 // map(s.s.f) 7611 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7612 // 7613 // map(s.p) 7614 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7615 // 7616 // map(to: s.p[:22]) 7617 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7618 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7619 // &(s.p), &(s.p[0]), 22*sizeof(double), 7620 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7621 // (*) alloc space for struct members, only this is a target parameter 7622 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7623 // optimizes this entry out, same in the examples below) 7624 // (***) map the pointee (map: to) 7625 // 7626 // map(to: s.ref) 7627 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7628 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7629 // (*) alloc space for struct members, only this is a target parameter 7630 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7631 // optimizes this entry out, same in the examples below) 7632 // (***) map the pointee (map: to) 7633 // 7634 // map(s.ps) 7635 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7636 // 7637 // map(from: s.ps->s.i) 7638 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7639 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7640 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7641 // 7642 // map(to: s.ps->ps) 7643 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7644 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7645 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7646 // 7647 // map(s.ps->ps->ps) 7648 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7649 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7650 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7651 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7652 // 7653 // map(to: s.ps->ps->s.f[:22]) 7654 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7655 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7656 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7657 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7658 // 7659 // map(ps) 7660 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7661 // 7662 // map(ps->i) 7663 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7664 // 7665 // map(ps->s.f) 7666 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7667 // 7668 // map(from: ps->p) 7669 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7670 // 7671 // map(to: ps->p[:22]) 7672 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7673 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7674 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7675 // 7676 // map(ps->ps) 7677 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7678 // 7679 // map(from: ps->ps->s.i) 7680 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7681 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7682 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7683 // 7684 // map(from: ps->ps->ps) 7685 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7686 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7687 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7688 // 7689 // map(ps->ps->ps->ps) 7690 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7691 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7692 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7693 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7694 // 7695 // map(to: ps->ps->ps->s.f[:22]) 7696 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7697 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7698 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7699 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7700 // 7701 // map(to: s.f[:22]) map(from: s.p[:33]) 7702 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7703 // sizeof(double*) (**), TARGET_PARAM 7704 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7705 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7706 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7707 // (*) allocate contiguous space needed to fit all mapped members even if 7708 // we allocate space for members not mapped (in this example, 7709 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7710 // them as well because they fall between &s.f[0] and &s.p) 7711 // 7712 // map(from: s.f[:22]) map(to: ps->p[:33]) 7713 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7714 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7715 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7716 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7717 // (*) the struct this entry pertains to is the 2nd element in the list of 7718 // arguments, hence MEMBER_OF(2) 7719 // 7720 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7721 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7722 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7723 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7724 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7725 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7726 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7727 // (*) the struct this entry pertains to is the 4th element in the list 7728 // of arguments, hence MEMBER_OF(4) 7729 7730 // Track if the map information being generated is the first for a capture. 7731 bool IsCaptureFirstInfo = IsFirstComponentList; 7732 // When the variable is on a declare target link or in a to clause with 7733 // unified memory, a reference is needed to hold the host/device address 7734 // of the variable. 7735 bool RequiresReference = false; 7736 7737 // Scan the components from the base to the complete expression. 7738 auto CI = Components.rbegin(); 7739 auto CE = Components.rend(); 7740 auto I = CI; 7741 7742 // Track if the map information being generated is the first for a list of 7743 // components. 7744 bool IsExpressionFirstInfo = true; 7745 bool FirstPointerInComplexData = false; 7746 Address BP = Address::invalid(); 7747 const Expr *AssocExpr = I->getAssociatedExpression(); 7748 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7749 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7750 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7751 7752 if (isa<MemberExpr>(AssocExpr)) { 7753 // The base is the 'this' pointer. The content of the pointer is going 7754 // to be the base of the field being mapped. 7755 BP = CGF.LoadCXXThisAddress(); 7756 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7757 (OASE && 7758 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7759 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7760 } else if (OAShE && 7761 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7762 BP = Address( 7763 CGF.EmitScalarExpr(OAShE->getBase()), 7764 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7765 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7766 } else { 7767 // The base is the reference to the variable. 7768 // BP = &Var. 7769 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7770 if (const auto *VD = 7771 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7772 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7773 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7774 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7775 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7776 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7777 RequiresReference = true; 7778 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7779 } 7780 } 7781 } 7782 7783 // If the variable is a pointer and is being dereferenced (i.e. is not 7784 // the last component), the base has to be the pointer itself, not its 7785 // reference. References are ignored for mapping purposes. 7786 QualType Ty = 7787 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7788 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7789 // No need to generate individual map information for the pointer, it 7790 // can be associated with the combined storage if shared memory mode is 7791 // active or the base declaration is not global variable. 7792 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7793 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7794 !VD || VD->hasLocalStorage()) 7795 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7796 else 7797 FirstPointerInComplexData = true; 7798 ++I; 7799 } 7800 } 7801 7802 // Track whether a component of the list should be marked as MEMBER_OF some 7803 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7804 // in a component list should be marked as MEMBER_OF, all subsequent entries 7805 // do not belong to the base struct. E.g. 7806 // struct S2 s; 7807 // s.ps->ps->ps->f[:] 7808 // (1) (2) (3) (4) 7809 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7810 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7811 // is the pointee of ps(2) which is not member of struct s, so it should not 7812 // be marked as such (it is still PTR_AND_OBJ). 7813 // The variable is initialized to false so that PTR_AND_OBJ entries which 7814 // are not struct members are not considered (e.g. array of pointers to 7815 // data). 7816 bool ShouldBeMemberOf = false; 7817 7818 // Variable keeping track of whether or not we have encountered a component 7819 // in the component list which is a member expression. Useful when we have a 7820 // pointer or a final array section, in which case it is the previous 7821 // component in the list which tells us whether we have a member expression. 7822 // E.g. X.f[:] 7823 // While processing the final array section "[:]" it is "f" which tells us 7824 // whether we are dealing with a member of a declared struct. 7825 const MemberExpr *EncounteredME = nullptr; 7826 7827 // Track for the total number of dimension. Start from one for the dummy 7828 // dimension. 7829 uint64_t DimSize = 1; 7830 7831 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7832 bool IsPrevMemberReference = false; 7833 7834 for (; I != CE; ++I) { 7835 // If the current component is member of a struct (parent struct) mark it. 7836 if (!EncounteredME) { 7837 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7838 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7839 // as MEMBER_OF the parent struct. 7840 if (EncounteredME) { 7841 ShouldBeMemberOf = true; 7842 // Do not emit as complex pointer if this is actually not array-like 7843 // expression. 7844 if (FirstPointerInComplexData) { 7845 QualType Ty = std::prev(I) 7846 ->getAssociatedDeclaration() 7847 ->getType() 7848 .getNonReferenceType(); 7849 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7850 FirstPointerInComplexData = false; 7851 } 7852 } 7853 } 7854 7855 auto Next = std::next(I); 7856 7857 // We need to generate the addresses and sizes if this is the last 7858 // component, if the component is a pointer or if it is an array section 7859 // whose length can't be proved to be one. If this is a pointer, it 7860 // becomes the base address for the following components. 7861 7862 // A final array section, is one whose length can't be proved to be one. 7863 // If the map item is non-contiguous then we don't treat any array section 7864 // as final array section. 7865 bool IsFinalArraySection = 7866 !IsNonContiguous && 7867 isFinalArraySectionExpression(I->getAssociatedExpression()); 7868 7869 // If we have a declaration for the mapping use that, otherwise use 7870 // the base declaration of the map clause. 7871 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7872 ? I->getAssociatedDeclaration() 7873 : BaseDecl; 7874 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7875 : MapExpr; 7876 7877 // Get information on whether the element is a pointer. Have to do a 7878 // special treatment for array sections given that they are built-in 7879 // types. 7880 const auto *OASE = 7881 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7882 const auto *OAShE = 7883 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7884 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7885 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7886 bool IsPointer = 7887 OAShE || 7888 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7889 .getCanonicalType() 7890 ->isAnyPointerType()) || 7891 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7892 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7893 MapDecl && 7894 MapDecl->getType()->isLValueReferenceType(); 7895 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7896 7897 if (OASE) 7898 ++DimSize; 7899 7900 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7901 IsFinalArraySection) { 7902 // If this is not the last component, we expect the pointer to be 7903 // associated with an array expression or member expression. 7904 assert((Next == CE || 7905 isa<MemberExpr>(Next->getAssociatedExpression()) || 7906 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7907 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7908 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7909 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7910 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7911 "Unexpected expression"); 7912 7913 Address LB = Address::invalid(); 7914 Address LowestElem = Address::invalid(); 7915 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7916 const MemberExpr *E) { 7917 const Expr *BaseExpr = E->getBase(); 7918 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7919 // scalar. 7920 LValue BaseLV; 7921 if (E->isArrow()) { 7922 LValueBaseInfo BaseInfo; 7923 TBAAAccessInfo TBAAInfo; 7924 Address Addr = 7925 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7926 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7927 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7928 } else { 7929 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7930 } 7931 return BaseLV; 7932 }; 7933 if (OAShE) { 7934 LowestElem = LB = 7935 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7936 CGF.ConvertTypeForMem( 7937 OAShE->getBase()->getType()->getPointeeType()), 7938 CGF.getContext().getTypeAlignInChars( 7939 OAShE->getBase()->getType())); 7940 } else if (IsMemberReference) { 7941 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7942 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7943 LowestElem = CGF.EmitLValueForFieldInitialization( 7944 BaseLVal, cast<FieldDecl>(MapDecl)) 7945 .getAddress(CGF); 7946 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7947 .getAddress(CGF); 7948 } else { 7949 LowestElem = LB = 7950 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7951 .getAddress(CGF); 7952 } 7953 7954 // If this component is a pointer inside the base struct then we don't 7955 // need to create any entry for it - it will be combined with the object 7956 // it is pointing to into a single PTR_AND_OBJ entry. 7957 bool IsMemberPointerOrAddr = 7958 EncounteredME && 7959 (((IsPointer || ForDeviceAddr) && 7960 I->getAssociatedExpression() == EncounteredME) || 7961 (IsPrevMemberReference && !IsPointer) || 7962 (IsMemberReference && Next != CE && 7963 !Next->getAssociatedExpression()->getType()->isPointerType())); 7964 if (!OverlappedElements.empty() && Next == CE) { 7965 // Handle base element with the info for overlapped elements. 7966 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7967 assert(!IsPointer && 7968 "Unexpected base element with the pointer type."); 7969 // Mark the whole struct as the struct that requires allocation on the 7970 // device. 7971 PartialStruct.LowestElem = {0, LowestElem}; 7972 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7973 I->getAssociatedExpression()->getType()); 7974 Address HB = CGF.Builder.CreateConstGEP( 7975 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7976 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7977 TypeSize.getQuantity() - 1); 7978 PartialStruct.HighestElem = { 7979 std::numeric_limits<decltype( 7980 PartialStruct.HighestElem.first)>::max(), 7981 HB}; 7982 PartialStruct.Base = BP; 7983 PartialStruct.LB = LB; 7984 assert( 7985 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7986 "Overlapped elements must be used only once for the variable."); 7987 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7988 // Emit data for non-overlapped data. 7989 OpenMPOffloadMappingFlags Flags = 7990 OMP_MAP_MEMBER_OF | 7991 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7992 /*AddPtrFlag=*/false, 7993 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7994 llvm::Value *Size = nullptr; 7995 // Do bitcopy of all non-overlapped structure elements. 7996 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7997 Component : OverlappedElements) { 7998 Address ComponentLB = Address::invalid(); 7999 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8000 Component) { 8001 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8002 const auto *FD = dyn_cast<FieldDecl>(VD); 8003 if (FD && FD->getType()->isLValueReferenceType()) { 8004 const auto *ME = 8005 cast<MemberExpr>(MC.getAssociatedExpression()); 8006 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8007 ComponentLB = 8008 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8009 .getAddress(CGF); 8010 } else { 8011 ComponentLB = 8012 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8013 .getAddress(CGF); 8014 } 8015 Size = CGF.Builder.CreatePtrDiff( 8016 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8017 CGF.EmitCastToVoidPtr(LB.getPointer())); 8018 break; 8019 } 8020 } 8021 assert(Size && "Failed to determine structure size"); 8022 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8023 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8024 CombinedInfo.Pointers.push_back(LB.getPointer()); 8025 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8026 Size, CGF.Int64Ty, /*isSigned=*/true)); 8027 CombinedInfo.Types.push_back(Flags); 8028 CombinedInfo.Mappers.push_back(nullptr); 8029 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8030 : 1); 8031 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8032 } 8033 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8034 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8035 CombinedInfo.Pointers.push_back(LB.getPointer()); 8036 Size = CGF.Builder.CreatePtrDiff( 8037 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8038 CGF.EmitCastToVoidPtr(LB.getPointer())); 8039 CombinedInfo.Sizes.push_back( 8040 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8041 CombinedInfo.Types.push_back(Flags); 8042 CombinedInfo.Mappers.push_back(nullptr); 8043 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8044 : 1); 8045 break; 8046 } 8047 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8048 if (!IsMemberPointerOrAddr || 8049 (Next == CE && MapType != OMPC_MAP_unknown)) { 8050 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8051 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8052 CombinedInfo.Pointers.push_back(LB.getPointer()); 8053 CombinedInfo.Sizes.push_back( 8054 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8055 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8056 : 1); 8057 8058 // If Mapper is valid, the last component inherits the mapper. 8059 bool HasMapper = Mapper && Next == CE; 8060 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8061 8062 // We need to add a pointer flag for each map that comes from the 8063 // same expression except for the first one. We also need to signal 8064 // this map is the first one that relates with the current capture 8065 // (there is a set of entries for each capture). 8066 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8067 MapType, MapModifiers, MotionModifiers, IsImplicit, 8068 !IsExpressionFirstInfo || RequiresReference || 8069 FirstPointerInComplexData || IsMemberReference, 8070 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8071 8072 if (!IsExpressionFirstInfo || IsMemberReference) { 8073 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8074 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8075 if (IsPointer || (IsMemberReference && Next != CE)) 8076 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8077 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8078 8079 if (ShouldBeMemberOf) { 8080 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8081 // should be later updated with the correct value of MEMBER_OF. 8082 Flags |= OMP_MAP_MEMBER_OF; 8083 // From now on, all subsequent PTR_AND_OBJ entries should not be 8084 // marked as MEMBER_OF. 8085 ShouldBeMemberOf = false; 8086 } 8087 } 8088 8089 CombinedInfo.Types.push_back(Flags); 8090 } 8091 8092 // If we have encountered a member expression so far, keep track of the 8093 // mapped member. If the parent is "*this", then the value declaration 8094 // is nullptr. 8095 if (EncounteredME) { 8096 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8097 unsigned FieldIndex = FD->getFieldIndex(); 8098 8099 // Update info about the lowest and highest elements for this struct 8100 if (!PartialStruct.Base.isValid()) { 8101 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8102 if (IsFinalArraySection) { 8103 Address HB = 8104 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8105 .getAddress(CGF); 8106 PartialStruct.HighestElem = {FieldIndex, HB}; 8107 } else { 8108 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8109 } 8110 PartialStruct.Base = BP; 8111 PartialStruct.LB = BP; 8112 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8113 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8114 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8115 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8116 } 8117 } 8118 8119 // Need to emit combined struct for array sections. 8120 if (IsFinalArraySection || IsNonContiguous) 8121 PartialStruct.IsArraySection = true; 8122 8123 // If we have a final array section, we are done with this expression. 8124 if (IsFinalArraySection) 8125 break; 8126 8127 // The pointer becomes the base for the next element. 8128 if (Next != CE) 8129 BP = IsMemberReference ? LowestElem : LB; 8130 8131 IsExpressionFirstInfo = false; 8132 IsCaptureFirstInfo = false; 8133 FirstPointerInComplexData = false; 8134 IsPrevMemberReference = IsMemberReference; 8135 } else if (FirstPointerInComplexData) { 8136 QualType Ty = Components.rbegin() 8137 ->getAssociatedDeclaration() 8138 ->getType() 8139 .getNonReferenceType(); 8140 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8141 FirstPointerInComplexData = false; 8142 } 8143 } 8144 // If ran into the whole component - allocate the space for the whole 8145 // record. 8146 if (!EncounteredME) 8147 PartialStruct.HasCompleteRecord = true; 8148 8149 if (!IsNonContiguous) 8150 return; 8151 8152 const ASTContext &Context = CGF.getContext(); 8153 8154 // For supporting stride in array section, we need to initialize the first 8155 // dimension size as 1, first offset as 0, and first count as 1 8156 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8157 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8158 MapValuesArrayTy CurStrides; 8159 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8160 uint64_t ElementTypeSize; 8161 8162 // Collect Size information for each dimension and get the element size as 8163 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8164 // should be [10, 10] and the first stride is 4 btyes. 8165 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8166 Components) { 8167 const Expr *AssocExpr = Component.getAssociatedExpression(); 8168 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8169 8170 if (!OASE) 8171 continue; 8172 8173 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8174 auto *CAT = Context.getAsConstantArrayType(Ty); 8175 auto *VAT = Context.getAsVariableArrayType(Ty); 8176 8177 // We need all the dimension size except for the last dimension. 8178 assert((VAT || CAT || &Component == &*Components.begin()) && 8179 "Should be either ConstantArray or VariableArray if not the " 8180 "first Component"); 8181 8182 // Get element size if CurStrides is empty. 8183 if (CurStrides.empty()) { 8184 const Type *ElementType = nullptr; 8185 if (CAT) 8186 ElementType = CAT->getElementType().getTypePtr(); 8187 else if (VAT) 8188 ElementType = VAT->getElementType().getTypePtr(); 8189 else 8190 assert(&Component == &*Components.begin() && 8191 "Only expect pointer (non CAT or VAT) when this is the " 8192 "first Component"); 8193 // If ElementType is null, then it means the base is a pointer 8194 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8195 // for next iteration. 8196 if (ElementType) { 8197 // For the case that having pointer as base, we need to remove one 8198 // level of indirection. 8199 if (&Component != &*Components.begin()) 8200 ElementType = ElementType->getPointeeOrArrayElementType(); 8201 ElementTypeSize = 8202 Context.getTypeSizeInChars(ElementType).getQuantity(); 8203 CurStrides.push_back( 8204 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8205 } 8206 } 8207 // Get dimension value except for the last dimension since we don't need 8208 // it. 8209 if (DimSizes.size() < Components.size() - 1) { 8210 if (CAT) 8211 DimSizes.push_back(llvm::ConstantInt::get( 8212 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8213 else if (VAT) 8214 DimSizes.push_back(CGF.Builder.CreateIntCast( 8215 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8216 /*IsSigned=*/false)); 8217 } 8218 } 8219 8220 // Skip the dummy dimension since we have already have its information. 8221 auto *DI = DimSizes.begin() + 1; 8222 // Product of dimension. 8223 llvm::Value *DimProd = 8224 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8225 8226 // Collect info for non-contiguous. Notice that offset, count, and stride 8227 // are only meaningful for array-section, so we insert a null for anything 8228 // other than array-section. 8229 // Also, the size of offset, count, and stride are not the same as 8230 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8231 // count, and stride are the same as the number of non-contiguous 8232 // declaration in target update to/from clause. 8233 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8234 Components) { 8235 const Expr *AssocExpr = Component.getAssociatedExpression(); 8236 8237 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8238 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8239 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8240 /*isSigned=*/false); 8241 CurOffsets.push_back(Offset); 8242 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8243 CurStrides.push_back(CurStrides.back()); 8244 continue; 8245 } 8246 8247 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8248 8249 if (!OASE) 8250 continue; 8251 8252 // Offset 8253 const Expr *OffsetExpr = OASE->getLowerBound(); 8254 llvm::Value *Offset = nullptr; 8255 if (!OffsetExpr) { 8256 // If offset is absent, then we just set it to zero. 8257 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8258 } else { 8259 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8260 CGF.Int64Ty, 8261 /*isSigned=*/false); 8262 } 8263 CurOffsets.push_back(Offset); 8264 8265 // Count 8266 const Expr *CountExpr = OASE->getLength(); 8267 llvm::Value *Count = nullptr; 8268 if (!CountExpr) { 8269 // In Clang, once a high dimension is an array section, we construct all 8270 // the lower dimension as array section, however, for case like 8271 // arr[0:2][2], Clang construct the inner dimension as an array section 8272 // but it actually is not in an array section form according to spec. 8273 if (!OASE->getColonLocFirst().isValid() && 8274 !OASE->getColonLocSecond().isValid()) { 8275 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8276 } else { 8277 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8278 // When the length is absent it defaults to ⌈(size − 8279 // lower-bound)/stride⌉, where size is the size of the array 8280 // dimension. 8281 const Expr *StrideExpr = OASE->getStride(); 8282 llvm::Value *Stride = 8283 StrideExpr 8284 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8285 CGF.Int64Ty, /*isSigned=*/false) 8286 : nullptr; 8287 if (Stride) 8288 Count = CGF.Builder.CreateUDiv( 8289 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8290 else 8291 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8292 } 8293 } else { 8294 Count = CGF.EmitScalarExpr(CountExpr); 8295 } 8296 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8297 CurCounts.push_back(Count); 8298 8299 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8300 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8301 // Offset Count Stride 8302 // D0 0 1 4 (int) <- dummy dimension 8303 // D1 0 2 8 (2 * (1) * 4) 8304 // D2 1 2 20 (1 * (1 * 5) * 4) 8305 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8306 const Expr *StrideExpr = OASE->getStride(); 8307 llvm::Value *Stride = 8308 StrideExpr 8309 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8310 CGF.Int64Ty, /*isSigned=*/false) 8311 : nullptr; 8312 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8313 if (Stride) 8314 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8315 else 8316 CurStrides.push_back(DimProd); 8317 if (DI != DimSizes.end()) 8318 ++DI; 8319 } 8320 8321 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8322 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8323 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8324 } 8325 8326 /// Return the adjusted map modifiers if the declaration a capture refers to 8327 /// appears in a first-private clause. This is expected to be used only with 8328 /// directives that start with 'target'. 8329 MappableExprsHandler::OpenMPOffloadMappingFlags 8330 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8331 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8332 8333 // A first private variable captured by reference will use only the 8334 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8335 // declaration is known as first-private in this handler. 8336 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8337 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8338 return MappableExprsHandler::OMP_MAP_TO | 8339 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8340 return MappableExprsHandler::OMP_MAP_PRIVATE | 8341 MappableExprsHandler::OMP_MAP_TO; 8342 } 8343 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8344 if (I != LambdasMap.end()) 8345 // for map(to: lambda): using user specified map type. 8346 return getMapTypeBits( 8347 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8348 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8349 /*AddPtrFlag=*/false, 8350 /*AddIsTargetParamFlag=*/false, 8351 /*isNonContiguous=*/false); 8352 return MappableExprsHandler::OMP_MAP_TO | 8353 MappableExprsHandler::OMP_MAP_FROM; 8354 } 8355 8356 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8357 // Rotate by getFlagMemberOffset() bits. 8358 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8359 << getFlagMemberOffset()); 8360 } 8361 8362 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8363 OpenMPOffloadMappingFlags MemberOfFlag) { 8364 // If the entry is PTR_AND_OBJ but has not been marked with the special 8365 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8366 // marked as MEMBER_OF. 8367 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8368 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8369 return; 8370 8371 // Reset the placeholder value to prepare the flag for the assignment of the 8372 // proper MEMBER_OF value. 8373 Flags &= ~OMP_MAP_MEMBER_OF; 8374 Flags |= MemberOfFlag; 8375 } 8376 8377 void getPlainLayout(const CXXRecordDecl *RD, 8378 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8379 bool AsBase) const { 8380 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8381 8382 llvm::StructType *St = 8383 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8384 8385 unsigned NumElements = St->getNumElements(); 8386 llvm::SmallVector< 8387 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8388 RecordLayout(NumElements); 8389 8390 // Fill bases. 8391 for (const auto &I : RD->bases()) { 8392 if (I.isVirtual()) 8393 continue; 8394 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8395 // Ignore empty bases. 8396 if (Base->isEmpty() || CGF.getContext() 8397 .getASTRecordLayout(Base) 8398 .getNonVirtualSize() 8399 .isZero()) 8400 continue; 8401 8402 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8403 RecordLayout[FieldIndex] = Base; 8404 } 8405 // Fill in virtual bases. 8406 for (const auto &I : RD->vbases()) { 8407 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8408 // Ignore empty bases. 8409 if (Base->isEmpty()) 8410 continue; 8411 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8412 if (RecordLayout[FieldIndex]) 8413 continue; 8414 RecordLayout[FieldIndex] = Base; 8415 } 8416 // Fill in all the fields. 8417 assert(!RD->isUnion() && "Unexpected union."); 8418 for (const auto *Field : RD->fields()) { 8419 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8420 // will fill in later.) 8421 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8422 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8423 RecordLayout[FieldIndex] = Field; 8424 } 8425 } 8426 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8427 &Data : RecordLayout) { 8428 if (Data.isNull()) 8429 continue; 8430 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8431 getPlainLayout(Base, Layout, /*AsBase=*/true); 8432 else 8433 Layout.push_back(Data.get<const FieldDecl *>()); 8434 } 8435 } 8436 8437 /// Generate all the base pointers, section pointers, sizes, map types, and 8438 /// mappers for the extracted mappable expressions (all included in \a 8439 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8440 /// pair of the relevant declaration and index where it occurs is appended to 8441 /// the device pointers info array. 8442 void generateAllInfoForClauses( 8443 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8444 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8445 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8446 // We have to process the component lists that relate with the same 8447 // declaration in a single chunk so that we can generate the map flags 8448 // correctly. Therefore, we organize all lists in a map. 8449 enum MapKind { Present, Allocs, Other, Total }; 8450 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8451 SmallVector<SmallVector<MapInfo, 8>, 4>> 8452 Info; 8453 8454 // Helper function to fill the information map for the different supported 8455 // clauses. 8456 auto &&InfoGen = 8457 [&Info, &SkipVarSet]( 8458 const ValueDecl *D, MapKind Kind, 8459 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8460 OpenMPMapClauseKind MapType, 8461 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8462 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8463 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8464 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8465 if (SkipVarSet.contains(D)) 8466 return; 8467 auto It = Info.find(D); 8468 if (It == Info.end()) 8469 It = Info 8470 .insert(std::make_pair( 8471 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8472 .first; 8473 It->second[Kind].emplace_back( 8474 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8475 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8476 }; 8477 8478 for (const auto *Cl : Clauses) { 8479 const auto *C = dyn_cast<OMPMapClause>(Cl); 8480 if (!C) 8481 continue; 8482 MapKind Kind = Other; 8483 if (llvm::is_contained(C->getMapTypeModifiers(), 8484 OMPC_MAP_MODIFIER_present)) 8485 Kind = Present; 8486 else if (C->getMapType() == OMPC_MAP_alloc) 8487 Kind = Allocs; 8488 const auto *EI = C->getVarRefs().begin(); 8489 for (const auto L : C->component_lists()) { 8490 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8491 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8492 C->getMapTypeModifiers(), llvm::None, 8493 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8494 E); 8495 ++EI; 8496 } 8497 } 8498 for (const auto *Cl : Clauses) { 8499 const auto *C = dyn_cast<OMPToClause>(Cl); 8500 if (!C) 8501 continue; 8502 MapKind Kind = Other; 8503 if (llvm::is_contained(C->getMotionModifiers(), 8504 OMPC_MOTION_MODIFIER_present)) 8505 Kind = Present; 8506 const auto *EI = C->getVarRefs().begin(); 8507 for (const auto L : C->component_lists()) { 8508 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8509 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8510 C->isImplicit(), std::get<2>(L), *EI); 8511 ++EI; 8512 } 8513 } 8514 for (const auto *Cl : Clauses) { 8515 const auto *C = dyn_cast<OMPFromClause>(Cl); 8516 if (!C) 8517 continue; 8518 MapKind Kind = Other; 8519 if (llvm::is_contained(C->getMotionModifiers(), 8520 OMPC_MOTION_MODIFIER_present)) 8521 Kind = Present; 8522 const auto *EI = C->getVarRefs().begin(); 8523 for (const auto L : C->component_lists()) { 8524 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8525 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8526 C->isImplicit(), std::get<2>(L), *EI); 8527 ++EI; 8528 } 8529 } 8530 8531 // Look at the use_device_ptr clause information and mark the existing map 8532 // entries as such. If there is no map information for an entry in the 8533 // use_device_ptr list, we create one with map type 'alloc' and zero size 8534 // section. It is the user fault if that was not mapped before. If there is 8535 // no map information and the pointer is a struct member, then we defer the 8536 // emission of that entry until the whole struct has been processed. 8537 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8538 SmallVector<DeferredDevicePtrEntryTy, 4>> 8539 DeferredInfo; 8540 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8541 8542 for (const auto *Cl : Clauses) { 8543 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8544 if (!C) 8545 continue; 8546 for (const auto L : C->component_lists()) { 8547 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8548 std::get<1>(L); 8549 assert(!Components.empty() && 8550 "Not expecting empty list of components!"); 8551 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8552 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8553 const Expr *IE = Components.back().getAssociatedExpression(); 8554 // If the first component is a member expression, we have to look into 8555 // 'this', which maps to null in the map of map information. Otherwise 8556 // look directly for the information. 8557 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8558 8559 // We potentially have map information for this declaration already. 8560 // Look for the first set of components that refer to it. 8561 if (It != Info.end()) { 8562 bool Found = false; 8563 for (auto &Data : It->second) { 8564 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8565 return MI.Components.back().getAssociatedDeclaration() == VD; 8566 }); 8567 // If we found a map entry, signal that the pointer has to be 8568 // returned and move on to the next declaration. Exclude cases where 8569 // the base pointer is mapped as array subscript, array section or 8570 // array shaping. The base address is passed as a pointer to base in 8571 // this case and cannot be used as a base for use_device_ptr list 8572 // item. 8573 if (CI != Data.end()) { 8574 auto PrevCI = std::next(CI->Components.rbegin()); 8575 const auto *VarD = dyn_cast<VarDecl>(VD); 8576 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8577 isa<MemberExpr>(IE) || 8578 !VD->getType().getNonReferenceType()->isPointerType() || 8579 PrevCI == CI->Components.rend() || 8580 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8581 VarD->hasLocalStorage()) { 8582 CI->ReturnDevicePointer = true; 8583 Found = true; 8584 break; 8585 } 8586 } 8587 } 8588 if (Found) 8589 continue; 8590 } 8591 8592 // We didn't find any match in our map information - generate a zero 8593 // size array section - if the pointer is a struct member we defer this 8594 // action until the whole struct has been processed. 8595 if (isa<MemberExpr>(IE)) { 8596 // Insert the pointer into Info to be processed by 8597 // generateInfoForComponentList. Because it is a member pointer 8598 // without a pointee, no entry will be generated for it, therefore 8599 // we need to generate one after the whole struct has been processed. 8600 // Nonetheless, generateInfoForComponentList must be called to take 8601 // the pointer into account for the calculation of the range of the 8602 // partial struct. 8603 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8604 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8605 nullptr); 8606 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8607 } else { 8608 llvm::Value *Ptr = 8609 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8610 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8611 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8612 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8613 UseDevicePtrCombinedInfo.Sizes.push_back( 8614 llvm::Constant::getNullValue(CGF.Int64Ty)); 8615 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8616 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8617 } 8618 } 8619 } 8620 8621 // Look at the use_device_addr clause information and mark the existing map 8622 // entries as such. If there is no map information for an entry in the 8623 // use_device_addr list, we create one with map type 'alloc' and zero size 8624 // section. It is the user fault if that was not mapped before. If there is 8625 // no map information and the pointer is a struct member, then we defer the 8626 // emission of that entry until the whole struct has been processed. 8627 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8628 for (const auto *Cl : Clauses) { 8629 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8630 if (!C) 8631 continue; 8632 for (const auto L : C->component_lists()) { 8633 assert(!std::get<1>(L).empty() && 8634 "Not expecting empty list of components!"); 8635 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8636 if (!Processed.insert(VD).second) 8637 continue; 8638 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8639 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8640 // If the first component is a member expression, we have to look into 8641 // 'this', which maps to null in the map of map information. Otherwise 8642 // look directly for the information. 8643 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8644 8645 // We potentially have map information for this declaration already. 8646 // Look for the first set of components that refer to it. 8647 if (It != Info.end()) { 8648 bool Found = false; 8649 for (auto &Data : It->second) { 8650 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8651 return MI.Components.back().getAssociatedDeclaration() == VD; 8652 }); 8653 // If we found a map entry, signal that the pointer has to be 8654 // returned and move on to the next declaration. 8655 if (CI != Data.end()) { 8656 CI->ReturnDevicePointer = true; 8657 Found = true; 8658 break; 8659 } 8660 } 8661 if (Found) 8662 continue; 8663 } 8664 8665 // We didn't find any match in our map information - generate a zero 8666 // size array section - if the pointer is a struct member we defer this 8667 // action until the whole struct has been processed. 8668 if (isa<MemberExpr>(IE)) { 8669 // Insert the pointer into Info to be processed by 8670 // generateInfoForComponentList. Because it is a member pointer 8671 // without a pointee, no entry will be generated for it, therefore 8672 // we need to generate one after the whole struct has been processed. 8673 // Nonetheless, generateInfoForComponentList must be called to take 8674 // the pointer into account for the calculation of the range of the 8675 // partial struct. 8676 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8677 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8678 nullptr, nullptr, /*ForDeviceAddr=*/true); 8679 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8680 } else { 8681 llvm::Value *Ptr; 8682 if (IE->isGLValue()) 8683 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8684 else 8685 Ptr = CGF.EmitScalarExpr(IE); 8686 CombinedInfo.Exprs.push_back(VD); 8687 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8688 CombinedInfo.Pointers.push_back(Ptr); 8689 CombinedInfo.Sizes.push_back( 8690 llvm::Constant::getNullValue(CGF.Int64Ty)); 8691 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8692 CombinedInfo.Mappers.push_back(nullptr); 8693 } 8694 } 8695 } 8696 8697 for (const auto &Data : Info) { 8698 StructRangeInfoTy PartialStruct; 8699 // Temporary generated information. 8700 MapCombinedInfoTy CurInfo; 8701 const Decl *D = Data.first; 8702 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8703 for (const auto &M : Data.second) { 8704 for (const MapInfo &L : M) { 8705 assert(!L.Components.empty() && 8706 "Not expecting declaration with no component lists."); 8707 8708 // Remember the current base pointer index. 8709 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8710 CurInfo.NonContigInfo.IsNonContiguous = 8711 L.Components.back().isNonContiguous(); 8712 generateInfoForComponentList( 8713 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8714 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8715 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8716 8717 // If this entry relates with a device pointer, set the relevant 8718 // declaration and add the 'return pointer' flag. 8719 if (L.ReturnDevicePointer) { 8720 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8721 "Unexpected number of mapped base pointers."); 8722 8723 const ValueDecl *RelevantVD = 8724 L.Components.back().getAssociatedDeclaration(); 8725 assert(RelevantVD && 8726 "No relevant declaration related with device pointer??"); 8727 8728 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8729 RelevantVD); 8730 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8731 } 8732 } 8733 } 8734 8735 // Append any pending zero-length pointers which are struct members and 8736 // used with use_device_ptr or use_device_addr. 8737 auto CI = DeferredInfo.find(Data.first); 8738 if (CI != DeferredInfo.end()) { 8739 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8740 llvm::Value *BasePtr; 8741 llvm::Value *Ptr; 8742 if (L.ForDeviceAddr) { 8743 if (L.IE->isGLValue()) 8744 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8745 else 8746 Ptr = this->CGF.EmitScalarExpr(L.IE); 8747 BasePtr = Ptr; 8748 // Entry is RETURN_PARAM. Also, set the placeholder value 8749 // MEMBER_OF=FFFF so that the entry is later updated with the 8750 // correct value of MEMBER_OF. 8751 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8752 } else { 8753 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8754 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8755 L.IE->getExprLoc()); 8756 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8757 // placeholder value MEMBER_OF=FFFF so that the entry is later 8758 // updated with the correct value of MEMBER_OF. 8759 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8760 OMP_MAP_MEMBER_OF); 8761 } 8762 CurInfo.Exprs.push_back(L.VD); 8763 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8764 CurInfo.Pointers.push_back(Ptr); 8765 CurInfo.Sizes.push_back( 8766 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8767 CurInfo.Mappers.push_back(nullptr); 8768 } 8769 } 8770 // If there is an entry in PartialStruct it means we have a struct with 8771 // individual members mapped. Emit an extra combined entry. 8772 if (PartialStruct.Base.isValid()) { 8773 CurInfo.NonContigInfo.Dims.push_back(0); 8774 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8775 } 8776 8777 // We need to append the results of this capture to what we already 8778 // have. 8779 CombinedInfo.append(CurInfo); 8780 } 8781 // Append data for use_device_ptr clauses. 8782 CombinedInfo.append(UseDevicePtrCombinedInfo); 8783 } 8784 8785 public: 8786 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8787 : CurDir(&Dir), CGF(CGF) { 8788 // Extract firstprivate clause information. 8789 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8790 for (const auto *D : C->varlists()) 8791 FirstPrivateDecls.try_emplace( 8792 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8793 // Extract implicit firstprivates from uses_allocators clauses. 8794 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8795 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8796 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8797 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8798 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8799 /*Implicit=*/true); 8800 else if (const auto *VD = dyn_cast<VarDecl>( 8801 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8802 ->getDecl())) 8803 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8804 } 8805 } 8806 // Extract device pointer clause information. 8807 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8808 for (auto L : C->component_lists()) 8809 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8810 // Extract map information. 8811 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8812 if (C->getMapType() != OMPC_MAP_to) 8813 continue; 8814 for (auto L : C->component_lists()) { 8815 const ValueDecl *VD = std::get<0>(L); 8816 const auto *RD = VD ? VD->getType() 8817 .getCanonicalType() 8818 .getNonReferenceType() 8819 ->getAsCXXRecordDecl() 8820 : nullptr; 8821 if (RD && RD->isLambda()) 8822 LambdasMap.try_emplace(std::get<0>(L), C); 8823 } 8824 } 8825 } 8826 8827 /// Constructor for the declare mapper directive. 8828 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8829 : CurDir(&Dir), CGF(CGF) {} 8830 8831 /// Generate code for the combined entry if we have a partially mapped struct 8832 /// and take care of the mapping flags of the arguments corresponding to 8833 /// individual struct members. 8834 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8835 MapFlagsArrayTy &CurTypes, 8836 const StructRangeInfoTy &PartialStruct, 8837 const ValueDecl *VD = nullptr, 8838 bool NotTargetParams = true) const { 8839 if (CurTypes.size() == 1 && 8840 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8841 !PartialStruct.IsArraySection) 8842 return; 8843 Address LBAddr = PartialStruct.LowestElem.second; 8844 Address HBAddr = PartialStruct.HighestElem.second; 8845 if (PartialStruct.HasCompleteRecord) { 8846 LBAddr = PartialStruct.LB; 8847 HBAddr = PartialStruct.LB; 8848 } 8849 CombinedInfo.Exprs.push_back(VD); 8850 // Base is the base of the struct 8851 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8852 // Pointer is the address of the lowest element 8853 llvm::Value *LB = LBAddr.getPointer(); 8854 CombinedInfo.Pointers.push_back(LB); 8855 // There should not be a mapper for a combined entry. 8856 CombinedInfo.Mappers.push_back(nullptr); 8857 // Size is (addr of {highest+1} element) - (addr of lowest element) 8858 llvm::Value *HB = HBAddr.getPointer(); 8859 llvm::Value *HAddr = 8860 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8861 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8862 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8863 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8864 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8865 /*isSigned=*/false); 8866 CombinedInfo.Sizes.push_back(Size); 8867 // Map type is always TARGET_PARAM, if generate info for captures. 8868 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8869 : OMP_MAP_TARGET_PARAM); 8870 // If any element has the present modifier, then make sure the runtime 8871 // doesn't attempt to allocate the struct. 8872 if (CurTypes.end() != 8873 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8874 return Type & OMP_MAP_PRESENT; 8875 })) 8876 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8877 // Remove TARGET_PARAM flag from the first element 8878 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8879 // If any element has the ompx_hold modifier, then make sure the runtime 8880 // uses the hold reference count for the struct as a whole so that it won't 8881 // be unmapped by an extra dynamic reference count decrement. Add it to all 8882 // elements as well so the runtime knows which reference count to check 8883 // when determining whether it's time for device-to-host transfers of 8884 // individual elements. 8885 if (CurTypes.end() != 8886 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8887 return Type & OMP_MAP_OMPX_HOLD; 8888 })) { 8889 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8890 for (auto &M : CurTypes) 8891 M |= OMP_MAP_OMPX_HOLD; 8892 } 8893 8894 // All other current entries will be MEMBER_OF the combined entry 8895 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8896 // 0xFFFF in the MEMBER_OF field). 8897 OpenMPOffloadMappingFlags MemberOfFlag = 8898 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8899 for (auto &M : CurTypes) 8900 setCorrectMemberOfFlag(M, MemberOfFlag); 8901 } 8902 8903 /// Generate all the base pointers, section pointers, sizes, map types, and 8904 /// mappers for the extracted mappable expressions (all included in \a 8905 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8906 /// pair of the relevant declaration and index where it occurs is appended to 8907 /// the device pointers info array. 8908 void generateAllInfo( 8909 MapCombinedInfoTy &CombinedInfo, 8910 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8911 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8912 assert(CurDir.is<const OMPExecutableDirective *>() && 8913 "Expect a executable directive"); 8914 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8915 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8916 } 8917 8918 /// Generate all the base pointers, section pointers, sizes, map types, and 8919 /// mappers for the extracted map clauses of user-defined mapper (all included 8920 /// in \a CombinedInfo). 8921 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8922 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8923 "Expect a declare mapper directive"); 8924 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8925 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8926 } 8927 8928 /// Emit capture info for lambdas for variables captured by reference. 8929 void generateInfoForLambdaCaptures( 8930 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8931 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8932 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8933 const auto *RD = VDType->getAsCXXRecordDecl(); 8934 if (!RD || !RD->isLambda()) 8935 return; 8936 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8937 CGF.getContext().getDeclAlign(VD)); 8938 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8939 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8940 FieldDecl *ThisCapture = nullptr; 8941 RD->getCaptureFields(Captures, ThisCapture); 8942 if (ThisCapture) { 8943 LValue ThisLVal = 8944 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8945 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8946 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8947 VDLVal.getPointer(CGF)); 8948 CombinedInfo.Exprs.push_back(VD); 8949 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8950 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8951 CombinedInfo.Sizes.push_back( 8952 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8953 CGF.Int64Ty, /*isSigned=*/true)); 8954 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8955 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8956 CombinedInfo.Mappers.push_back(nullptr); 8957 } 8958 for (const LambdaCapture &LC : RD->captures()) { 8959 if (!LC.capturesVariable()) 8960 continue; 8961 const VarDecl *VD = LC.getCapturedVar(); 8962 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8963 continue; 8964 auto It = Captures.find(VD); 8965 assert(It != Captures.end() && "Found lambda capture without field."); 8966 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8967 if (LC.getCaptureKind() == LCK_ByRef) { 8968 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8969 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8970 VDLVal.getPointer(CGF)); 8971 CombinedInfo.Exprs.push_back(VD); 8972 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8973 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8974 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8975 CGF.getTypeSize( 8976 VD->getType().getCanonicalType().getNonReferenceType()), 8977 CGF.Int64Ty, /*isSigned=*/true)); 8978 } else { 8979 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8980 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8981 VDLVal.getPointer(CGF)); 8982 CombinedInfo.Exprs.push_back(VD); 8983 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8984 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8985 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8986 } 8987 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8988 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8989 CombinedInfo.Mappers.push_back(nullptr); 8990 } 8991 } 8992 8993 /// Set correct indices for lambdas captures. 8994 void adjustMemberOfForLambdaCaptures( 8995 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8996 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8997 MapFlagsArrayTy &Types) const { 8998 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8999 // Set correct member_of idx for all implicit lambda captures. 9000 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9001 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9002 continue; 9003 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9004 assert(BasePtr && "Unable to find base lambda address."); 9005 int TgtIdx = -1; 9006 for (unsigned J = I; J > 0; --J) { 9007 unsigned Idx = J - 1; 9008 if (Pointers[Idx] != BasePtr) 9009 continue; 9010 TgtIdx = Idx; 9011 break; 9012 } 9013 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9014 // All other current entries will be MEMBER_OF the combined entry 9015 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9016 // 0xFFFF in the MEMBER_OF field). 9017 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9018 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9019 } 9020 } 9021 9022 /// Generate the base pointers, section pointers, sizes, map types, and 9023 /// mappers associated to a given capture (all included in \a CombinedInfo). 9024 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9025 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9026 StructRangeInfoTy &PartialStruct) const { 9027 assert(!Cap->capturesVariableArrayType() && 9028 "Not expecting to generate map info for a variable array type!"); 9029 9030 // We need to know when we generating information for the first component 9031 const ValueDecl *VD = Cap->capturesThis() 9032 ? nullptr 9033 : Cap->getCapturedVar()->getCanonicalDecl(); 9034 9035 // for map(to: lambda): skip here, processing it in 9036 // generateDefaultMapInfo 9037 if (LambdasMap.count(VD)) 9038 return; 9039 9040 // If this declaration appears in a is_device_ptr clause we just have to 9041 // pass the pointer by value. If it is a reference to a declaration, we just 9042 // pass its value. 9043 if (DevPointersMap.count(VD)) { 9044 CombinedInfo.Exprs.push_back(VD); 9045 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9046 CombinedInfo.Pointers.push_back(Arg); 9047 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9048 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9049 /*isSigned=*/true)); 9050 CombinedInfo.Types.push_back( 9051 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9052 OMP_MAP_TARGET_PARAM); 9053 CombinedInfo.Mappers.push_back(nullptr); 9054 return; 9055 } 9056 9057 using MapData = 9058 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9059 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9060 const ValueDecl *, const Expr *>; 9061 SmallVector<MapData, 4> DeclComponentLists; 9062 assert(CurDir.is<const OMPExecutableDirective *>() && 9063 "Expect a executable directive"); 9064 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9065 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9066 const auto *EI = C->getVarRefs().begin(); 9067 for (const auto L : C->decl_component_lists(VD)) { 9068 const ValueDecl *VDecl, *Mapper; 9069 // The Expression is not correct if the mapping is implicit 9070 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9071 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9072 std::tie(VDecl, Components, Mapper) = L; 9073 assert(VDecl == VD && "We got information for the wrong declaration??"); 9074 assert(!Components.empty() && 9075 "Not expecting declaration with no component lists."); 9076 DeclComponentLists.emplace_back(Components, C->getMapType(), 9077 C->getMapTypeModifiers(), 9078 C->isImplicit(), Mapper, E); 9079 ++EI; 9080 } 9081 } 9082 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9083 const MapData &RHS) { 9084 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9085 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9086 bool HasPresent = 9087 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9088 bool HasAllocs = MapType == OMPC_MAP_alloc; 9089 MapModifiers = std::get<2>(RHS); 9090 MapType = std::get<1>(LHS); 9091 bool HasPresentR = 9092 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9093 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9094 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9095 }); 9096 9097 // Find overlapping elements (including the offset from the base element). 9098 llvm::SmallDenseMap< 9099 const MapData *, 9100 llvm::SmallVector< 9101 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9102 4> 9103 OverlappedData; 9104 size_t Count = 0; 9105 for (const MapData &L : DeclComponentLists) { 9106 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9107 OpenMPMapClauseKind MapType; 9108 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9109 bool IsImplicit; 9110 const ValueDecl *Mapper; 9111 const Expr *VarRef; 9112 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9113 L; 9114 ++Count; 9115 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9116 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9117 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9118 VarRef) = L1; 9119 auto CI = Components.rbegin(); 9120 auto CE = Components.rend(); 9121 auto SI = Components1.rbegin(); 9122 auto SE = Components1.rend(); 9123 for (; CI != CE && SI != SE; ++CI, ++SI) { 9124 if (CI->getAssociatedExpression()->getStmtClass() != 9125 SI->getAssociatedExpression()->getStmtClass()) 9126 break; 9127 // Are we dealing with different variables/fields? 9128 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9129 break; 9130 } 9131 // Found overlapping if, at least for one component, reached the head 9132 // of the components list. 9133 if (CI == CE || SI == SE) { 9134 // Ignore it if it is the same component. 9135 if (CI == CE && SI == SE) 9136 continue; 9137 const auto It = (SI == SE) ? CI : SI; 9138 // If one component is a pointer and another one is a kind of 9139 // dereference of this pointer (array subscript, section, dereference, 9140 // etc.), it is not an overlapping. 9141 // Same, if one component is a base and another component is a 9142 // dereferenced pointer memberexpr with the same base. 9143 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9144 (std::prev(It)->getAssociatedDeclaration() && 9145 std::prev(It) 9146 ->getAssociatedDeclaration() 9147 ->getType() 9148 ->isPointerType()) || 9149 (It->getAssociatedDeclaration() && 9150 It->getAssociatedDeclaration()->getType()->isPointerType() && 9151 std::next(It) != CE && std::next(It) != SE)) 9152 continue; 9153 const MapData &BaseData = CI == CE ? L : L1; 9154 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9155 SI == SE ? Components : Components1; 9156 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9157 OverlappedElements.getSecond().push_back(SubData); 9158 } 9159 } 9160 } 9161 // Sort the overlapped elements for each item. 9162 llvm::SmallVector<const FieldDecl *, 4> Layout; 9163 if (!OverlappedData.empty()) { 9164 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9165 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9166 while (BaseType != OrigType) { 9167 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9168 OrigType = BaseType->getPointeeOrArrayElementType(); 9169 } 9170 9171 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9172 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9173 else { 9174 const auto *RD = BaseType->getAsRecordDecl(); 9175 Layout.append(RD->field_begin(), RD->field_end()); 9176 } 9177 } 9178 for (auto &Pair : OverlappedData) { 9179 llvm::stable_sort( 9180 Pair.getSecond(), 9181 [&Layout]( 9182 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9183 OMPClauseMappableExprCommon::MappableExprComponentListRef 9184 Second) { 9185 auto CI = First.rbegin(); 9186 auto CE = First.rend(); 9187 auto SI = Second.rbegin(); 9188 auto SE = Second.rend(); 9189 for (; CI != CE && SI != SE; ++CI, ++SI) { 9190 if (CI->getAssociatedExpression()->getStmtClass() != 9191 SI->getAssociatedExpression()->getStmtClass()) 9192 break; 9193 // Are we dealing with different variables/fields? 9194 if (CI->getAssociatedDeclaration() != 9195 SI->getAssociatedDeclaration()) 9196 break; 9197 } 9198 9199 // Lists contain the same elements. 9200 if (CI == CE && SI == SE) 9201 return false; 9202 9203 // List with less elements is less than list with more elements. 9204 if (CI == CE || SI == SE) 9205 return CI == CE; 9206 9207 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9208 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9209 if (FD1->getParent() == FD2->getParent()) 9210 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9211 const auto *It = 9212 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9213 return FD == FD1 || FD == FD2; 9214 }); 9215 return *It == FD1; 9216 }); 9217 } 9218 9219 // Associated with a capture, because the mapping flags depend on it. 9220 // Go through all of the elements with the overlapped elements. 9221 bool IsFirstComponentList = true; 9222 for (const auto &Pair : OverlappedData) { 9223 const MapData &L = *Pair.getFirst(); 9224 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9225 OpenMPMapClauseKind MapType; 9226 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9227 bool IsImplicit; 9228 const ValueDecl *Mapper; 9229 const Expr *VarRef; 9230 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9231 L; 9232 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9233 OverlappedComponents = Pair.getSecond(); 9234 generateInfoForComponentList( 9235 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9236 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9237 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9238 IsFirstComponentList = false; 9239 } 9240 // Go through other elements without overlapped elements. 9241 for (const MapData &L : DeclComponentLists) { 9242 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9243 OpenMPMapClauseKind MapType; 9244 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9245 bool IsImplicit; 9246 const ValueDecl *Mapper; 9247 const Expr *VarRef; 9248 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9249 L; 9250 auto It = OverlappedData.find(&L); 9251 if (It == OverlappedData.end()) 9252 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9253 Components, CombinedInfo, PartialStruct, 9254 IsFirstComponentList, IsImplicit, Mapper, 9255 /*ForDeviceAddr=*/false, VD, VarRef); 9256 IsFirstComponentList = false; 9257 } 9258 } 9259 9260 /// Generate the default map information for a given capture \a CI, 9261 /// record field declaration \a RI and captured value \a CV. 9262 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9263 const FieldDecl &RI, llvm::Value *CV, 9264 MapCombinedInfoTy &CombinedInfo) const { 9265 bool IsImplicit = true; 9266 // Do the default mapping. 9267 if (CI.capturesThis()) { 9268 CombinedInfo.Exprs.push_back(nullptr); 9269 CombinedInfo.BasePointers.push_back(CV); 9270 CombinedInfo.Pointers.push_back(CV); 9271 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9272 CombinedInfo.Sizes.push_back( 9273 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9274 CGF.Int64Ty, /*isSigned=*/true)); 9275 // Default map type. 9276 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9277 } else if (CI.capturesVariableByCopy()) { 9278 const VarDecl *VD = CI.getCapturedVar(); 9279 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9280 CombinedInfo.BasePointers.push_back(CV); 9281 CombinedInfo.Pointers.push_back(CV); 9282 if (!RI.getType()->isAnyPointerType()) { 9283 // We have to signal to the runtime captures passed by value that are 9284 // not pointers. 9285 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9286 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9287 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9288 } else { 9289 // Pointers are implicitly mapped with a zero size and no flags 9290 // (other than first map that is added for all implicit maps). 9291 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9292 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9293 } 9294 auto I = FirstPrivateDecls.find(VD); 9295 if (I != FirstPrivateDecls.end()) 9296 IsImplicit = I->getSecond(); 9297 } else { 9298 assert(CI.capturesVariable() && "Expected captured reference."); 9299 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9300 QualType ElementType = PtrTy->getPointeeType(); 9301 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9302 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9303 // The default map type for a scalar/complex type is 'to' because by 9304 // default the value doesn't have to be retrieved. For an aggregate 9305 // type, the default is 'tofrom'. 9306 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9307 const VarDecl *VD = CI.getCapturedVar(); 9308 auto I = FirstPrivateDecls.find(VD); 9309 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9310 CombinedInfo.BasePointers.push_back(CV); 9311 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9312 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9313 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9314 AlignmentSource::Decl)); 9315 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9316 } else { 9317 CombinedInfo.Pointers.push_back(CV); 9318 } 9319 if (I != FirstPrivateDecls.end()) 9320 IsImplicit = I->getSecond(); 9321 } 9322 // Every default map produces a single argument which is a target parameter. 9323 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9324 9325 // Add flag stating this is an implicit map. 9326 if (IsImplicit) 9327 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9328 9329 // No user-defined mapper for default mapping. 9330 CombinedInfo.Mappers.push_back(nullptr); 9331 } 9332 }; 9333 } // anonymous namespace 9334 9335 static void emitNonContiguousDescriptor( 9336 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9337 CGOpenMPRuntime::TargetDataInfo &Info) { 9338 CodeGenModule &CGM = CGF.CGM; 9339 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9340 &NonContigInfo = CombinedInfo.NonContigInfo; 9341 9342 // Build an array of struct descriptor_dim and then assign it to 9343 // offload_args. 9344 // 9345 // struct descriptor_dim { 9346 // uint64_t offset; 9347 // uint64_t count; 9348 // uint64_t stride 9349 // }; 9350 ASTContext &C = CGF.getContext(); 9351 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9352 RecordDecl *RD; 9353 RD = C.buildImplicitRecord("descriptor_dim"); 9354 RD->startDefinition(); 9355 addFieldToRecordDecl(C, RD, Int64Ty); 9356 addFieldToRecordDecl(C, RD, Int64Ty); 9357 addFieldToRecordDecl(C, RD, Int64Ty); 9358 RD->completeDefinition(); 9359 QualType DimTy = C.getRecordType(RD); 9360 9361 enum { OffsetFD = 0, CountFD, StrideFD }; 9362 // We need two index variable here since the size of "Dims" is the same as the 9363 // size of Components, however, the size of offset, count, and stride is equal 9364 // to the size of base declaration that is non-contiguous. 9365 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9366 // Skip emitting ir if dimension size is 1 since it cannot be 9367 // non-contiguous. 9368 if (NonContigInfo.Dims[I] == 1) 9369 continue; 9370 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9371 QualType ArrayTy = 9372 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9373 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9374 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9375 unsigned RevIdx = EE - II - 1; 9376 LValue DimsLVal = CGF.MakeAddrLValue( 9377 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9378 // Offset 9379 LValue OffsetLVal = CGF.EmitLValueForField( 9380 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9381 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9382 // Count 9383 LValue CountLVal = CGF.EmitLValueForField( 9384 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9385 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9386 // Stride 9387 LValue StrideLVal = CGF.EmitLValueForField( 9388 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9389 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9390 } 9391 // args[I] = &dims 9392 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9393 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9394 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9395 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9396 Info.PointersArray, 0, I); 9397 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); 9398 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9399 ++L; 9400 } 9401 } 9402 9403 // Try to extract the base declaration from a `this->x` expression if possible. 9404 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9405 if (!E) 9406 return nullptr; 9407 9408 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9409 if (const MemberExpr *ME = 9410 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9411 return ME->getMemberDecl(); 9412 return nullptr; 9413 } 9414 9415 /// Emit a string constant containing the names of the values mapped to the 9416 /// offloading runtime library. 9417 llvm::Constant * 9418 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9419 MappableExprsHandler::MappingExprInfo &MapExprs) { 9420 9421 uint32_t SrcLocStrSize; 9422 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9423 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9424 9425 SourceLocation Loc; 9426 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9427 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9428 Loc = VD->getLocation(); 9429 else 9430 Loc = MapExprs.getMapExpr()->getExprLoc(); 9431 } else { 9432 Loc = MapExprs.getMapDecl()->getLocation(); 9433 } 9434 9435 std::string ExprName; 9436 if (MapExprs.getMapExpr()) { 9437 PrintingPolicy P(CGF.getContext().getLangOpts()); 9438 llvm::raw_string_ostream OS(ExprName); 9439 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9440 OS.flush(); 9441 } else { 9442 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9443 } 9444 9445 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9446 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9447 PLoc.getLine(), PLoc.getColumn(), 9448 SrcLocStrSize); 9449 } 9450 9451 /// Emit the arrays used to pass the captures and map information to the 9452 /// offloading runtime library. If there is no map or capture information, 9453 /// return nullptr by reference. 9454 static void emitOffloadingArrays( 9455 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9456 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9457 bool IsNonContiguous = false) { 9458 CodeGenModule &CGM = CGF.CGM; 9459 ASTContext &Ctx = CGF.getContext(); 9460 9461 // Reset the array information. 9462 Info.clearArrayInfo(); 9463 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9464 9465 if (Info.NumberOfPtrs) { 9466 // Detect if we have any capture size requiring runtime evaluation of the 9467 // size so that a constant array could be eventually used. 9468 9469 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9470 QualType PointerArrayType = Ctx.getConstantArrayType( 9471 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9472 /*IndexTypeQuals=*/0); 9473 9474 Info.BasePointersArray = 9475 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9476 Info.PointersArray = 9477 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9478 Address MappersArray = 9479 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9480 Info.MappersArray = MappersArray.getPointer(); 9481 9482 // If we don't have any VLA types or other types that require runtime 9483 // evaluation, we can use a constant array for the map sizes, otherwise we 9484 // need to fill up the arrays as we do for the pointers. 9485 QualType Int64Ty = 9486 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9487 SmallVector<llvm::Constant *> ConstSizes( 9488 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9489 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9490 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9491 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9492 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9493 if (IsNonContiguous && (CombinedInfo.Types[I] & 9494 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9495 ConstSizes[I] = llvm::ConstantInt::get( 9496 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9497 else 9498 ConstSizes[I] = CI; 9499 continue; 9500 } 9501 } 9502 RuntimeSizes.set(I); 9503 } 9504 9505 if (RuntimeSizes.all()) { 9506 QualType SizeArrayType = Ctx.getConstantArrayType( 9507 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9508 /*IndexTypeQuals=*/0); 9509 Info.SizesArray = 9510 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9511 } else { 9512 auto *SizesArrayInit = llvm::ConstantArray::get( 9513 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9514 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9515 auto *SizesArrayGbl = new llvm::GlobalVariable( 9516 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9517 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9518 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9519 if (RuntimeSizes.any()) { 9520 QualType SizeArrayType = Ctx.getConstantArrayType( 9521 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9522 /*IndexTypeQuals=*/0); 9523 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9524 llvm::Value *GblConstPtr = 9525 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9526 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9527 CGF.Builder.CreateMemCpy( 9528 Buffer, 9529 Address(GblConstPtr, CGM.Int64Ty, 9530 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9531 /*DestWidth=*/64, /*Signed=*/false))), 9532 CGF.getTypeSize(SizeArrayType)); 9533 Info.SizesArray = Buffer.getPointer(); 9534 } else { 9535 Info.SizesArray = SizesArrayGbl; 9536 } 9537 } 9538 9539 // The map types are always constant so we don't need to generate code to 9540 // fill arrays. Instead, we create an array constant. 9541 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9542 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9543 std::string MaptypesName = 9544 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9545 auto *MapTypesArrayGbl = 9546 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9547 Info.MapTypesArray = MapTypesArrayGbl; 9548 9549 // The information types are only built if there is debug information 9550 // requested. 9551 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9552 Info.MapNamesArray = llvm::Constant::getNullValue( 9553 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9554 } else { 9555 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9556 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9557 }; 9558 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9559 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9560 std::string MapnamesName = 9561 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9562 auto *MapNamesArrayGbl = 9563 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9564 Info.MapNamesArray = MapNamesArrayGbl; 9565 } 9566 9567 // If there's a present map type modifier, it must not be applied to the end 9568 // of a region, so generate a separate map type array in that case. 9569 if (Info.separateBeginEndCalls()) { 9570 bool EndMapTypesDiffer = false; 9571 for (uint64_t &Type : Mapping) { 9572 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9573 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9574 EndMapTypesDiffer = true; 9575 } 9576 } 9577 if (EndMapTypesDiffer) { 9578 MapTypesArrayGbl = 9579 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9580 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9581 } 9582 } 9583 9584 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9585 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9586 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9587 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9588 Info.BasePointersArray, 0, I); 9589 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9590 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9591 Address BPAddr(BP, BPVal->getType(), 9592 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9593 CGF.Builder.CreateStore(BPVal, BPAddr); 9594 9595 if (Info.requiresDevicePointerInfo()) 9596 if (const ValueDecl *DevVD = 9597 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9598 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9599 9600 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9601 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9602 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9603 Info.PointersArray, 0, I); 9604 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9605 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9606 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9607 CGF.Builder.CreateStore(PVal, PAddr); 9608 9609 if (RuntimeSizes.test(I)) { 9610 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9611 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9612 Info.SizesArray, 9613 /*Idx0=*/0, 9614 /*Idx1=*/I); 9615 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); 9616 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9617 CGM.Int64Ty, 9618 /*isSigned=*/true), 9619 SAddr); 9620 } 9621 9622 // Fill up the mapper array. 9623 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9624 if (CombinedInfo.Mappers[I]) { 9625 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9626 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9627 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9628 Info.HasMapper = true; 9629 } 9630 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9631 CGF.Builder.CreateStore(MFunc, MAddr); 9632 } 9633 } 9634 9635 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9636 Info.NumberOfPtrs == 0) 9637 return; 9638 9639 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9640 } 9641 9642 namespace { 9643 /// Additional arguments for emitOffloadingArraysArgument function. 9644 struct ArgumentsOptions { 9645 bool ForEndCall = false; 9646 ArgumentsOptions() = default; 9647 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9648 }; 9649 } // namespace 9650 9651 /// Emit the arguments to be passed to the runtime library based on the 9652 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9653 /// ForEndCall, emit map types to be passed for the end of the region instead of 9654 /// the beginning. 9655 static void emitOffloadingArraysArgument( 9656 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9657 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9658 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9659 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9660 const ArgumentsOptions &Options = ArgumentsOptions()) { 9661 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9662 "expected region end call to runtime only when end call is separate"); 9663 CodeGenModule &CGM = CGF.CGM; 9664 if (Info.NumberOfPtrs) { 9665 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9666 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9667 Info.BasePointersArray, 9668 /*Idx0=*/0, /*Idx1=*/0); 9669 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9670 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9671 Info.PointersArray, 9672 /*Idx0=*/0, 9673 /*Idx1=*/0); 9674 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9675 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9676 /*Idx0=*/0, /*Idx1=*/0); 9677 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9678 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9679 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9680 : Info.MapTypesArray, 9681 /*Idx0=*/0, 9682 /*Idx1=*/0); 9683 9684 // Only emit the mapper information arrays if debug information is 9685 // requested. 9686 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9687 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9688 else 9689 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9690 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9691 Info.MapNamesArray, 9692 /*Idx0=*/0, 9693 /*Idx1=*/0); 9694 // If there is no user-defined mapper, set the mapper array to nullptr to 9695 // avoid an unnecessary data privatization 9696 if (!Info.HasMapper) 9697 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9698 else 9699 MappersArrayArg = 9700 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9701 } else { 9702 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9703 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9704 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9705 MapTypesArrayArg = 9706 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9707 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9708 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9709 } 9710 } 9711 9712 /// Check for inner distribute directive. 9713 static const OMPExecutableDirective * 9714 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9715 const auto *CS = D.getInnermostCapturedStmt(); 9716 const auto *Body = 9717 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9718 const Stmt *ChildStmt = 9719 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9720 9721 if (const auto *NestedDir = 9722 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9723 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9724 switch (D.getDirectiveKind()) { 9725 case OMPD_target: 9726 if (isOpenMPDistributeDirective(DKind)) 9727 return NestedDir; 9728 if (DKind == OMPD_teams) { 9729 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9730 /*IgnoreCaptured=*/true); 9731 if (!Body) 9732 return nullptr; 9733 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9734 if (const auto *NND = 9735 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9736 DKind = NND->getDirectiveKind(); 9737 if (isOpenMPDistributeDirective(DKind)) 9738 return NND; 9739 } 9740 } 9741 return nullptr; 9742 case OMPD_target_teams: 9743 if (isOpenMPDistributeDirective(DKind)) 9744 return NestedDir; 9745 return nullptr; 9746 case OMPD_target_parallel: 9747 case OMPD_target_simd: 9748 case OMPD_target_parallel_for: 9749 case OMPD_target_parallel_for_simd: 9750 return nullptr; 9751 case OMPD_target_teams_distribute: 9752 case OMPD_target_teams_distribute_simd: 9753 case OMPD_target_teams_distribute_parallel_for: 9754 case OMPD_target_teams_distribute_parallel_for_simd: 9755 case OMPD_parallel: 9756 case OMPD_for: 9757 case OMPD_parallel_for: 9758 case OMPD_parallel_master: 9759 case OMPD_parallel_sections: 9760 case OMPD_for_simd: 9761 case OMPD_parallel_for_simd: 9762 case OMPD_cancel: 9763 case OMPD_cancellation_point: 9764 case OMPD_ordered: 9765 case OMPD_threadprivate: 9766 case OMPD_allocate: 9767 case OMPD_task: 9768 case OMPD_simd: 9769 case OMPD_tile: 9770 case OMPD_unroll: 9771 case OMPD_sections: 9772 case OMPD_section: 9773 case OMPD_single: 9774 case OMPD_master: 9775 case OMPD_critical: 9776 case OMPD_taskyield: 9777 case OMPD_barrier: 9778 case OMPD_taskwait: 9779 case OMPD_taskgroup: 9780 case OMPD_atomic: 9781 case OMPD_flush: 9782 case OMPD_depobj: 9783 case OMPD_scan: 9784 case OMPD_teams: 9785 case OMPD_target_data: 9786 case OMPD_target_exit_data: 9787 case OMPD_target_enter_data: 9788 case OMPD_distribute: 9789 case OMPD_distribute_simd: 9790 case OMPD_distribute_parallel_for: 9791 case OMPD_distribute_parallel_for_simd: 9792 case OMPD_teams_distribute: 9793 case OMPD_teams_distribute_simd: 9794 case OMPD_teams_distribute_parallel_for: 9795 case OMPD_teams_distribute_parallel_for_simd: 9796 case OMPD_target_update: 9797 case OMPD_declare_simd: 9798 case OMPD_declare_variant: 9799 case OMPD_begin_declare_variant: 9800 case OMPD_end_declare_variant: 9801 case OMPD_declare_target: 9802 case OMPD_end_declare_target: 9803 case OMPD_declare_reduction: 9804 case OMPD_declare_mapper: 9805 case OMPD_taskloop: 9806 case OMPD_taskloop_simd: 9807 case OMPD_master_taskloop: 9808 case OMPD_master_taskloop_simd: 9809 case OMPD_parallel_master_taskloop: 9810 case OMPD_parallel_master_taskloop_simd: 9811 case OMPD_requires: 9812 case OMPD_metadirective: 9813 case OMPD_unknown: 9814 default: 9815 llvm_unreachable("Unexpected directive."); 9816 } 9817 } 9818 9819 return nullptr; 9820 } 9821 9822 /// Emit the user-defined mapper function. The code generation follows the 9823 /// pattern in the example below. 9824 /// \code 9825 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9826 /// void *base, void *begin, 9827 /// int64_t size, int64_t type, 9828 /// void *name = nullptr) { 9829 /// // Allocate space for an array section first or add a base/begin for 9830 /// // pointer dereference. 9831 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9832 /// !maptype.IsDelete) 9833 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9834 /// size*sizeof(Ty), clearToFromMember(type)); 9835 /// // Map members. 9836 /// for (unsigned i = 0; i < size; i++) { 9837 /// // For each component specified by this mapper: 9838 /// for (auto c : begin[i]->all_components) { 9839 /// if (c.hasMapper()) 9840 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9841 /// c.arg_type, c.arg_name); 9842 /// else 9843 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9844 /// c.arg_begin, c.arg_size, c.arg_type, 9845 /// c.arg_name); 9846 /// } 9847 /// } 9848 /// // Delete the array section. 9849 /// if (size > 1 && maptype.IsDelete) 9850 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9851 /// size*sizeof(Ty), clearToFromMember(type)); 9852 /// } 9853 /// \endcode 9854 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9855 CodeGenFunction *CGF) { 9856 if (UDMMap.count(D) > 0) 9857 return; 9858 ASTContext &C = CGM.getContext(); 9859 QualType Ty = D->getType(); 9860 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9861 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9862 auto *MapperVarDecl = 9863 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9864 SourceLocation Loc = D->getLocation(); 9865 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9866 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9867 9868 // Prepare mapper function arguments and attributes. 9869 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9870 C.VoidPtrTy, ImplicitParamDecl::Other); 9871 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9872 ImplicitParamDecl::Other); 9873 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9874 C.VoidPtrTy, ImplicitParamDecl::Other); 9875 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9876 ImplicitParamDecl::Other); 9877 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9878 ImplicitParamDecl::Other); 9879 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9880 ImplicitParamDecl::Other); 9881 FunctionArgList Args; 9882 Args.push_back(&HandleArg); 9883 Args.push_back(&BaseArg); 9884 Args.push_back(&BeginArg); 9885 Args.push_back(&SizeArg); 9886 Args.push_back(&TypeArg); 9887 Args.push_back(&NameArg); 9888 const CGFunctionInfo &FnInfo = 9889 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9890 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9891 SmallString<64> TyStr; 9892 llvm::raw_svector_ostream Out(TyStr); 9893 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9894 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9895 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9896 Name, &CGM.getModule()); 9897 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9898 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9899 // Start the mapper function code generation. 9900 CodeGenFunction MapperCGF(CGM); 9901 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9902 // Compute the starting and end addresses of array elements. 9903 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9904 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9905 C.getPointerType(Int64Ty), Loc); 9906 // Prepare common arguments for array initiation and deletion. 9907 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9908 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9909 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9910 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9911 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9912 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9913 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9914 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9915 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9916 // Convert the size in bytes into the number of array elements. 9917 Size = MapperCGF.Builder.CreateExactUDiv( 9918 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9919 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9920 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9921 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9922 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9923 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9924 C.getPointerType(Int64Ty), Loc); 9925 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9926 MapperCGF.GetAddrOfLocalVar(&NameArg), 9927 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9928 9929 // Emit array initiation if this is an array section and \p MapType indicates 9930 // that memory allocation is required. 9931 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9932 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9933 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9934 9935 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9936 9937 // Emit the loop header block. 9938 MapperCGF.EmitBlock(HeadBB); 9939 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9940 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9941 // Evaluate whether the initial condition is satisfied. 9942 llvm::Value *IsEmpty = 9943 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9944 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9945 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9946 9947 // Emit the loop body block. 9948 MapperCGF.EmitBlock(BodyBB); 9949 llvm::BasicBlock *LastBB = BodyBB; 9950 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9951 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9952 PtrPHI->addIncoming(PtrBegin, EntryBB); 9953 Address PtrCurrent(PtrPHI, ElemTy, 9954 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9955 .getAlignment() 9956 .alignmentOfArrayElement(ElementSize)); 9957 // Privatize the declared variable of mapper to be the current array element. 9958 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9959 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9960 (void)Scope.Privatize(); 9961 9962 // Get map clause information. Fill up the arrays with all mapped variables. 9963 MappableExprsHandler::MapCombinedInfoTy Info; 9964 MappableExprsHandler MEHandler(*D, MapperCGF); 9965 MEHandler.generateAllInfoForMapper(Info); 9966 9967 // Call the runtime API __tgt_mapper_num_components to get the number of 9968 // pre-existing components. 9969 llvm::Value *OffloadingArgs[] = {Handle}; 9970 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9971 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9972 OMPRTL___tgt_mapper_num_components), 9973 OffloadingArgs); 9974 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9975 PreviousSize, 9976 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9977 9978 // Fill up the runtime mapper handle for all components. 9979 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9980 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9981 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9982 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9983 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9984 llvm::Value *CurSizeArg = Info.Sizes[I]; 9985 llvm::Value *CurNameArg = 9986 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9987 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9988 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9989 9990 // Extract the MEMBER_OF field from the map type. 9991 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9992 llvm::Value *MemberMapType = 9993 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9994 9995 // Combine the map type inherited from user-defined mapper with that 9996 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9997 // bits of the \a MapType, which is the input argument of the mapper 9998 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9999 // bits of MemberMapType. 10000 // [OpenMP 5.0], 1.2.6. map-type decay. 10001 // | alloc | to | from | tofrom | release | delete 10002 // ---------------------------------------------------------- 10003 // alloc | alloc | alloc | alloc | alloc | release | delete 10004 // to | alloc | to | alloc | to | release | delete 10005 // from | alloc | alloc | from | from | release | delete 10006 // tofrom | alloc | to | from | tofrom | release | delete 10007 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10008 MapType, 10009 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10010 MappableExprsHandler::OMP_MAP_FROM)); 10011 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10012 llvm::BasicBlock *AllocElseBB = 10013 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10014 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10015 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10016 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10017 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10018 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10019 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10020 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10021 MapperCGF.EmitBlock(AllocBB); 10022 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10023 MemberMapType, 10024 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10025 MappableExprsHandler::OMP_MAP_FROM))); 10026 MapperCGF.Builder.CreateBr(EndBB); 10027 MapperCGF.EmitBlock(AllocElseBB); 10028 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10029 LeftToFrom, 10030 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10031 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10032 // In case of to, clear OMP_MAP_FROM. 10033 MapperCGF.EmitBlock(ToBB); 10034 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10035 MemberMapType, 10036 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10037 MapperCGF.Builder.CreateBr(EndBB); 10038 MapperCGF.EmitBlock(ToElseBB); 10039 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10040 LeftToFrom, 10041 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10042 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10043 // In case of from, clear OMP_MAP_TO. 10044 MapperCGF.EmitBlock(FromBB); 10045 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10046 MemberMapType, 10047 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10048 // In case of tofrom, do nothing. 10049 MapperCGF.EmitBlock(EndBB); 10050 LastBB = EndBB; 10051 llvm::PHINode *CurMapType = 10052 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10053 CurMapType->addIncoming(AllocMapType, AllocBB); 10054 CurMapType->addIncoming(ToMapType, ToBB); 10055 CurMapType->addIncoming(FromMapType, FromBB); 10056 CurMapType->addIncoming(MemberMapType, ToElseBB); 10057 10058 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10059 CurSizeArg, CurMapType, CurNameArg}; 10060 if (Info.Mappers[I]) { 10061 // Call the corresponding mapper function. 10062 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10063 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10064 assert(MapperFunc && "Expect a valid mapper function is available."); 10065 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10066 } else { 10067 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10068 // data structure. 10069 MapperCGF.EmitRuntimeCall( 10070 OMPBuilder.getOrCreateRuntimeFunction( 10071 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10072 OffloadingArgs); 10073 } 10074 } 10075 10076 // Update the pointer to point to the next element that needs to be mapped, 10077 // and check whether we have mapped all elements. 10078 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10079 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10080 PtrPHI->addIncoming(PtrNext, LastBB); 10081 llvm::Value *IsDone = 10082 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10083 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10084 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10085 10086 MapperCGF.EmitBlock(ExitBB); 10087 // Emit array deletion if this is an array section and \p MapType indicates 10088 // that deletion is required. 10089 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10090 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10091 10092 // Emit the function exit block. 10093 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10094 MapperCGF.FinishFunction(); 10095 UDMMap.try_emplace(D, Fn); 10096 if (CGF) { 10097 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10098 Decls.second.push_back(D); 10099 } 10100 } 10101 10102 /// Emit the array initialization or deletion portion for user-defined mapper 10103 /// code generation. First, it evaluates whether an array section is mapped and 10104 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10105 /// true, and \a MapType indicates to not delete this array, array 10106 /// initialization code is generated. If \a IsInit is false, and \a MapType 10107 /// indicates to not this array, array deletion code is generated. 10108 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10109 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10110 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10111 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10112 bool IsInit) { 10113 StringRef Prefix = IsInit ? ".init" : ".del"; 10114 10115 // Evaluate if this is an array section. 10116 llvm::BasicBlock *BodyBB = 10117 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10118 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10119 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10120 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10121 MapType, 10122 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10123 llvm::Value *DeleteCond; 10124 llvm::Value *Cond; 10125 if (IsInit) { 10126 // base != begin? 10127 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10128 // IsPtrAndObj? 10129 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10130 MapType, 10131 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10132 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10133 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10134 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10135 DeleteCond = MapperCGF.Builder.CreateIsNull( 10136 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10137 } else { 10138 Cond = IsArray; 10139 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10140 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10141 } 10142 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10143 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10144 10145 MapperCGF.EmitBlock(BodyBB); 10146 // Get the array size by multiplying element size and element number (i.e., \p 10147 // Size). 10148 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10149 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10150 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10151 // memory allocation/deletion purpose only. 10152 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10153 MapType, 10154 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10155 MappableExprsHandler::OMP_MAP_FROM))); 10156 MapTypeArg = MapperCGF.Builder.CreateOr( 10157 MapTypeArg, 10158 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10159 10160 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10161 // data structure. 10162 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10163 ArraySize, MapTypeArg, MapName}; 10164 MapperCGF.EmitRuntimeCall( 10165 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10166 OMPRTL___tgt_push_mapper_component), 10167 OffloadingArgs); 10168 } 10169 10170 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10171 const OMPDeclareMapperDecl *D) { 10172 auto I = UDMMap.find(D); 10173 if (I != UDMMap.end()) 10174 return I->second; 10175 emitUserDefinedMapper(D); 10176 return UDMMap.lookup(D); 10177 } 10178 10179 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10180 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10181 llvm::Value *DeviceID, 10182 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10183 const OMPLoopDirective &D)> 10184 SizeEmitter) { 10185 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10186 const OMPExecutableDirective *TD = &D; 10187 // Get nested teams distribute kind directive, if any. 10188 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10189 TD = getNestedDistributeDirective(CGM.getContext(), D); 10190 if (!TD) 10191 return; 10192 const auto *LD = cast<OMPLoopDirective>(TD); 10193 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10194 PrePostActionTy &) { 10195 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10196 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10197 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10198 CGF.EmitRuntimeCall( 10199 OMPBuilder.getOrCreateRuntimeFunction( 10200 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10201 Args); 10202 } 10203 }; 10204 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10205 } 10206 10207 void CGOpenMPRuntime::emitTargetCall( 10208 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10209 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10210 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10211 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10212 const OMPLoopDirective &D)> 10213 SizeEmitter) { 10214 if (!CGF.HaveInsertPoint()) 10215 return; 10216 10217 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10218 CGM.getLangOpts().OpenMPOffloadMandatory; 10219 10220 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10221 10222 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10223 D.hasClausesOfKind<OMPNowaitClause>(); 10224 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10225 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10226 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10227 PrePostActionTy &) { 10228 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10229 }; 10230 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10231 10232 CodeGenFunction::OMPTargetDataInfo InputInfo; 10233 llvm::Value *MapTypesArray = nullptr; 10234 llvm::Value *MapNamesArray = nullptr; 10235 // Generate code for the host fallback function. 10236 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10237 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10238 if (OffloadingMandatory) { 10239 CGF.Builder.CreateUnreachable(); 10240 } else { 10241 if (RequiresOuterTask) { 10242 CapturedVars.clear(); 10243 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10244 } 10245 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10246 } 10247 }; 10248 // Fill up the pointer arrays and transfer execution to the device. 10249 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10250 &MapNamesArray, SizeEmitter, 10251 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10252 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10253 // Reverse offloading is not supported, so just execute on the host. 10254 FallbackGen(CGF); 10255 return; 10256 } 10257 10258 // On top of the arrays that were filled up, the target offloading call 10259 // takes as arguments the device id as well as the host pointer. The host 10260 // pointer is used by the runtime library to identify the current target 10261 // region, so it only has to be unique and not necessarily point to 10262 // anything. It could be the pointer to the outlined function that 10263 // implements the target region, but we aren't using that so that the 10264 // compiler doesn't need to keep that, and could therefore inline the host 10265 // function if proven worthwhile during optimization. 10266 10267 // From this point on, we need to have an ID of the target region defined. 10268 assert(OutlinedFnID && "Invalid outlined function ID!"); 10269 (void)OutlinedFnID; 10270 10271 // Emit device ID if any. 10272 llvm::Value *DeviceID; 10273 if (Device.getPointer()) { 10274 assert((Device.getInt() == OMPC_DEVICE_unknown || 10275 Device.getInt() == OMPC_DEVICE_device_num) && 10276 "Expected device_num modifier."); 10277 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10278 DeviceID = 10279 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10280 } else { 10281 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10282 } 10283 10284 // Emit the number of elements in the offloading arrays. 10285 llvm::Value *PointerNum = 10286 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10287 10288 // Return value of the runtime offloading call. 10289 llvm::Value *Return; 10290 10291 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10292 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10293 10294 // Source location for the ident struct 10295 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10296 10297 // Emit tripcount for the target loop-based directive. 10298 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10299 10300 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10301 // The target region is an outlined function launched by the runtime 10302 // via calls __tgt_target() or __tgt_target_teams(). 10303 // 10304 // __tgt_target() launches a target region with one team and one thread, 10305 // executing a serial region. This master thread may in turn launch 10306 // more threads within its team upon encountering a parallel region, 10307 // however, no additional teams can be launched on the device. 10308 // 10309 // __tgt_target_teams() launches a target region with one or more teams, 10310 // each with one or more threads. This call is required for target 10311 // constructs such as: 10312 // 'target teams' 10313 // 'target' / 'teams' 10314 // 'target teams distribute parallel for' 10315 // 'target parallel' 10316 // and so on. 10317 // 10318 // Note that on the host and CPU targets, the runtime implementation of 10319 // these calls simply call the outlined function without forking threads. 10320 // The outlined functions themselves have runtime calls to 10321 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10322 // the compiler in emitTeamsCall() and emitParallelCall(). 10323 // 10324 // In contrast, on the NVPTX target, the implementation of 10325 // __tgt_target_teams() launches a GPU kernel with the requested number 10326 // of teams and threads so no additional calls to the runtime are required. 10327 if (NumTeams) { 10328 // If we have NumTeams defined this means that we have an enclosed teams 10329 // region. Therefore we also expect to have NumThreads defined. These two 10330 // values should be defined in the presence of a teams directive, 10331 // regardless of having any clauses associated. If the user is using teams 10332 // but no clauses, these two values will be the default that should be 10333 // passed to the runtime library - a 32-bit integer with the value zero. 10334 assert(NumThreads && "Thread limit expression should be available along " 10335 "with number of teams."); 10336 SmallVector<llvm::Value *> OffloadingArgs = { 10337 RTLoc, 10338 DeviceID, 10339 OutlinedFnID, 10340 PointerNum, 10341 InputInfo.BasePointersArray.getPointer(), 10342 InputInfo.PointersArray.getPointer(), 10343 InputInfo.SizesArray.getPointer(), 10344 MapTypesArray, 10345 MapNamesArray, 10346 InputInfo.MappersArray.getPointer(), 10347 NumTeams, 10348 NumThreads}; 10349 if (HasNowait) { 10350 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10351 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10352 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10353 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10354 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10355 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10356 } 10357 Return = CGF.EmitRuntimeCall( 10358 OMPBuilder.getOrCreateRuntimeFunction( 10359 CGM.getModule(), HasNowait 10360 ? OMPRTL___tgt_target_teams_nowait_mapper 10361 : OMPRTL___tgt_target_teams_mapper), 10362 OffloadingArgs); 10363 } else { 10364 SmallVector<llvm::Value *> OffloadingArgs = { 10365 RTLoc, 10366 DeviceID, 10367 OutlinedFnID, 10368 PointerNum, 10369 InputInfo.BasePointersArray.getPointer(), 10370 InputInfo.PointersArray.getPointer(), 10371 InputInfo.SizesArray.getPointer(), 10372 MapTypesArray, 10373 MapNamesArray, 10374 InputInfo.MappersArray.getPointer()}; 10375 if (HasNowait) { 10376 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10377 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10378 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10379 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10380 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10381 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10382 } 10383 Return = CGF.EmitRuntimeCall( 10384 OMPBuilder.getOrCreateRuntimeFunction( 10385 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10386 : OMPRTL___tgt_target_mapper), 10387 OffloadingArgs); 10388 } 10389 10390 // Check the error code and execute the host version if required. 10391 llvm::BasicBlock *OffloadFailedBlock = 10392 CGF.createBasicBlock("omp_offload.failed"); 10393 llvm::BasicBlock *OffloadContBlock = 10394 CGF.createBasicBlock("omp_offload.cont"); 10395 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10396 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10397 10398 CGF.EmitBlock(OffloadFailedBlock); 10399 FallbackGen(CGF); 10400 10401 CGF.EmitBranch(OffloadContBlock); 10402 10403 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10404 }; 10405 10406 // Notify that the host version must be executed. 10407 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10408 FallbackGen(CGF); 10409 }; 10410 10411 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10412 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10413 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10414 // Fill up the arrays with all the captured variables. 10415 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10416 10417 // Get mappable expression information. 10418 MappableExprsHandler MEHandler(D, CGF); 10419 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10420 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10421 10422 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10423 auto *CV = CapturedVars.begin(); 10424 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10425 CE = CS.capture_end(); 10426 CI != CE; ++CI, ++RI, ++CV) { 10427 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10428 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10429 10430 // VLA sizes are passed to the outlined region by copy and do not have map 10431 // information associated. 10432 if (CI->capturesVariableArrayType()) { 10433 CurInfo.Exprs.push_back(nullptr); 10434 CurInfo.BasePointers.push_back(*CV); 10435 CurInfo.Pointers.push_back(*CV); 10436 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10437 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10438 // Copy to the device as an argument. No need to retrieve it. 10439 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10440 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10441 MappableExprsHandler::OMP_MAP_IMPLICIT); 10442 CurInfo.Mappers.push_back(nullptr); 10443 } else { 10444 // If we have any information in the map clause, we use it, otherwise we 10445 // just do a default mapping. 10446 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10447 if (!CI->capturesThis()) 10448 MappedVarSet.insert(CI->getCapturedVar()); 10449 else 10450 MappedVarSet.insert(nullptr); 10451 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10452 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10453 // Generate correct mapping for variables captured by reference in 10454 // lambdas. 10455 if (CI->capturesVariable()) 10456 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10457 CurInfo, LambdaPointers); 10458 } 10459 // We expect to have at least an element of information for this capture. 10460 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10461 "Non-existing map pointer for capture!"); 10462 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10463 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10464 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10465 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10466 "Inconsistent map information sizes!"); 10467 10468 // If there is an entry in PartialStruct it means we have a struct with 10469 // individual members mapped. Emit an extra combined entry. 10470 if (PartialStruct.Base.isValid()) { 10471 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10472 MEHandler.emitCombinedEntry( 10473 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10474 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10475 } 10476 10477 // We need to append the results of this capture to what we already have. 10478 CombinedInfo.append(CurInfo); 10479 } 10480 // Adjust MEMBER_OF flags for the lambdas captures. 10481 MEHandler.adjustMemberOfForLambdaCaptures( 10482 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10483 CombinedInfo.Types); 10484 // Map any list items in a map clause that were not captures because they 10485 // weren't referenced within the construct. 10486 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10487 10488 TargetDataInfo Info; 10489 // Fill up the arrays and create the arguments. 10490 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10491 emitOffloadingArraysArgument( 10492 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10493 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10494 {/*ForEndCall=*/false}); 10495 10496 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10497 InputInfo.BasePointersArray = 10498 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10499 InputInfo.PointersArray = 10500 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10501 InputInfo.SizesArray = 10502 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10503 InputInfo.MappersArray = 10504 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10505 MapTypesArray = Info.MapTypesArray; 10506 MapNamesArray = Info.MapNamesArray; 10507 if (RequiresOuterTask) 10508 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10509 else 10510 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10511 }; 10512 10513 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10514 CodeGenFunction &CGF, PrePostActionTy &) { 10515 if (RequiresOuterTask) { 10516 CodeGenFunction::OMPTargetDataInfo InputInfo; 10517 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10518 } else { 10519 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10520 } 10521 }; 10522 10523 // If we have a target function ID it means that we need to support 10524 // offloading, otherwise, just execute on the host. We need to execute on host 10525 // regardless of the conditional in the if clause if, e.g., the user do not 10526 // specify target triples. 10527 if (OutlinedFnID) { 10528 if (IfCond) { 10529 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10530 } else { 10531 RegionCodeGenTy ThenRCG(TargetThenGen); 10532 ThenRCG(CGF); 10533 } 10534 } else { 10535 RegionCodeGenTy ElseRCG(TargetElseGen); 10536 ElseRCG(CGF); 10537 } 10538 } 10539 10540 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10541 StringRef ParentName) { 10542 if (!S) 10543 return; 10544 10545 // Codegen OMP target directives that offload compute to the device. 10546 bool RequiresDeviceCodegen = 10547 isa<OMPExecutableDirective>(S) && 10548 isOpenMPTargetExecutionDirective( 10549 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10550 10551 if (RequiresDeviceCodegen) { 10552 const auto &E = *cast<OMPExecutableDirective>(S); 10553 unsigned DeviceID; 10554 unsigned FileID; 10555 unsigned Line; 10556 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10557 FileID, Line); 10558 10559 // Is this a target region that should not be emitted as an entry point? If 10560 // so just signal we are done with this target region. 10561 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10562 ParentName, Line)) 10563 return; 10564 10565 switch (E.getDirectiveKind()) { 10566 case OMPD_target: 10567 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10568 cast<OMPTargetDirective>(E)); 10569 break; 10570 case OMPD_target_parallel: 10571 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10572 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10573 break; 10574 case OMPD_target_teams: 10575 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10576 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10577 break; 10578 case OMPD_target_teams_distribute: 10579 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10580 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10581 break; 10582 case OMPD_target_teams_distribute_simd: 10583 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10584 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10585 break; 10586 case OMPD_target_parallel_for: 10587 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10588 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10589 break; 10590 case OMPD_target_parallel_for_simd: 10591 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10592 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10593 break; 10594 case OMPD_target_simd: 10595 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10596 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10597 break; 10598 case OMPD_target_teams_distribute_parallel_for: 10599 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10600 CGM, ParentName, 10601 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10602 break; 10603 case OMPD_target_teams_distribute_parallel_for_simd: 10604 CodeGenFunction:: 10605 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10606 CGM, ParentName, 10607 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10608 break; 10609 case OMPD_parallel: 10610 case OMPD_for: 10611 case OMPD_parallel_for: 10612 case OMPD_parallel_master: 10613 case OMPD_parallel_sections: 10614 case OMPD_for_simd: 10615 case OMPD_parallel_for_simd: 10616 case OMPD_cancel: 10617 case OMPD_cancellation_point: 10618 case OMPD_ordered: 10619 case OMPD_threadprivate: 10620 case OMPD_allocate: 10621 case OMPD_task: 10622 case OMPD_simd: 10623 case OMPD_tile: 10624 case OMPD_unroll: 10625 case OMPD_sections: 10626 case OMPD_section: 10627 case OMPD_single: 10628 case OMPD_master: 10629 case OMPD_critical: 10630 case OMPD_taskyield: 10631 case OMPD_barrier: 10632 case OMPD_taskwait: 10633 case OMPD_taskgroup: 10634 case OMPD_atomic: 10635 case OMPD_flush: 10636 case OMPD_depobj: 10637 case OMPD_scan: 10638 case OMPD_teams: 10639 case OMPD_target_data: 10640 case OMPD_target_exit_data: 10641 case OMPD_target_enter_data: 10642 case OMPD_distribute: 10643 case OMPD_distribute_simd: 10644 case OMPD_distribute_parallel_for: 10645 case OMPD_distribute_parallel_for_simd: 10646 case OMPD_teams_distribute: 10647 case OMPD_teams_distribute_simd: 10648 case OMPD_teams_distribute_parallel_for: 10649 case OMPD_teams_distribute_parallel_for_simd: 10650 case OMPD_target_update: 10651 case OMPD_declare_simd: 10652 case OMPD_declare_variant: 10653 case OMPD_begin_declare_variant: 10654 case OMPD_end_declare_variant: 10655 case OMPD_declare_target: 10656 case OMPD_end_declare_target: 10657 case OMPD_declare_reduction: 10658 case OMPD_declare_mapper: 10659 case OMPD_taskloop: 10660 case OMPD_taskloop_simd: 10661 case OMPD_master_taskloop: 10662 case OMPD_master_taskloop_simd: 10663 case OMPD_parallel_master_taskloop: 10664 case OMPD_parallel_master_taskloop_simd: 10665 case OMPD_requires: 10666 case OMPD_metadirective: 10667 case OMPD_unknown: 10668 default: 10669 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10670 } 10671 return; 10672 } 10673 10674 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10675 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10676 return; 10677 10678 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10679 return; 10680 } 10681 10682 // If this is a lambda function, look into its body. 10683 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10684 S = L->getBody(); 10685 10686 // Keep looking for target regions recursively. 10687 for (const Stmt *II : S->children()) 10688 scanForTargetRegionsFunctions(II, ParentName); 10689 } 10690 10691 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10692 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10693 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10694 if (!DevTy) 10695 return false; 10696 // Do not emit device_type(nohost) functions for the host. 10697 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10698 return true; 10699 // Do not emit device_type(host) functions for the device. 10700 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10701 return true; 10702 return false; 10703 } 10704 10705 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10706 // If emitting code for the host, we do not process FD here. Instead we do 10707 // the normal code generation. 10708 if (!CGM.getLangOpts().OpenMPIsDevice) { 10709 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10710 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10711 CGM.getLangOpts().OpenMPIsDevice)) 10712 return true; 10713 return false; 10714 } 10715 10716 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10717 // Try to detect target regions in the function. 10718 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10719 StringRef Name = CGM.getMangledName(GD); 10720 scanForTargetRegionsFunctions(FD->getBody(), Name); 10721 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10722 CGM.getLangOpts().OpenMPIsDevice)) 10723 return true; 10724 } 10725 10726 // Do not to emit function if it is not marked as declare target. 10727 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10728 AlreadyEmittedTargetDecls.count(VD) == 0; 10729 } 10730 10731 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10732 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10733 CGM.getLangOpts().OpenMPIsDevice)) 10734 return true; 10735 10736 if (!CGM.getLangOpts().OpenMPIsDevice) 10737 return false; 10738 10739 // Check if there are Ctors/Dtors in this declaration and look for target 10740 // regions in it. We use the complete variant to produce the kernel name 10741 // mangling. 10742 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10743 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10744 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10745 StringRef ParentName = 10746 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10747 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10748 } 10749 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10750 StringRef ParentName = 10751 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10752 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10753 } 10754 } 10755 10756 // Do not to emit variable if it is not marked as declare target. 10757 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10758 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10759 cast<VarDecl>(GD.getDecl())); 10760 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10761 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10762 HasRequiresUnifiedSharedMemory)) { 10763 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10764 return true; 10765 } 10766 return false; 10767 } 10768 10769 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10770 llvm::Constant *Addr) { 10771 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10772 !CGM.getLangOpts().OpenMPIsDevice) 10773 return; 10774 10775 // If we have host/nohost variables, they do not need to be registered. 10776 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10777 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10778 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10779 return; 10780 10781 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10782 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10783 if (!Res) { 10784 if (CGM.getLangOpts().OpenMPIsDevice) { 10785 // Register non-target variables being emitted in device code (debug info 10786 // may cause this). 10787 StringRef VarName = CGM.getMangledName(VD); 10788 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10789 } 10790 return; 10791 } 10792 // Register declare target variables. 10793 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10794 StringRef VarName; 10795 CharUnits VarSize; 10796 llvm::GlobalValue::LinkageTypes Linkage; 10797 10798 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10799 !HasRequiresUnifiedSharedMemory) { 10800 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10801 VarName = CGM.getMangledName(VD); 10802 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10803 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10804 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10805 } else { 10806 VarSize = CharUnits::Zero(); 10807 } 10808 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10809 // Temp solution to prevent optimizations of the internal variables. 10810 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10811 // Do not create a "ref-variable" if the original is not also available 10812 // on the host. 10813 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10814 return; 10815 std::string RefName = getName({VarName, "ref"}); 10816 if (!CGM.GetGlobalValue(RefName)) { 10817 llvm::Constant *AddrRef = 10818 getOrCreateInternalVariable(Addr->getType(), RefName); 10819 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10820 GVAddrRef->setConstant(/*Val=*/true); 10821 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10822 GVAddrRef->setInitializer(Addr); 10823 CGM.addCompilerUsedGlobal(GVAddrRef); 10824 } 10825 } 10826 } else { 10827 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10828 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10829 HasRequiresUnifiedSharedMemory)) && 10830 "Declare target attribute must link or to with unified memory."); 10831 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10832 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10833 else 10834 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10835 10836 if (CGM.getLangOpts().OpenMPIsDevice) { 10837 VarName = Addr->getName(); 10838 Addr = nullptr; 10839 } else { 10840 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10841 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10842 } 10843 VarSize = CGM.getPointerSize(); 10844 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10845 } 10846 10847 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10848 VarName, Addr, VarSize, Flags, Linkage); 10849 } 10850 10851 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10852 if (isa<FunctionDecl>(GD.getDecl()) || 10853 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10854 return emitTargetFunctions(GD); 10855 10856 return emitTargetGlobalVariable(GD); 10857 } 10858 10859 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10860 for (const VarDecl *VD : DeferredGlobalVariables) { 10861 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10862 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10863 if (!Res) 10864 continue; 10865 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10866 !HasRequiresUnifiedSharedMemory) { 10867 CGM.EmitGlobal(VD); 10868 } else { 10869 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10870 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10871 HasRequiresUnifiedSharedMemory)) && 10872 "Expected link clause or to clause with unified memory."); 10873 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10874 } 10875 } 10876 } 10877 10878 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10879 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10880 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10881 " Expected target-based directive."); 10882 } 10883 10884 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10885 for (const OMPClause *Clause : D->clauselists()) { 10886 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10887 HasRequiresUnifiedSharedMemory = true; 10888 } else if (const auto *AC = 10889 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10890 switch (AC->getAtomicDefaultMemOrderKind()) { 10891 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10892 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10893 break; 10894 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10895 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10896 break; 10897 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10898 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10899 break; 10900 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10901 break; 10902 } 10903 } 10904 } 10905 } 10906 10907 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10908 return RequiresAtomicOrdering; 10909 } 10910 10911 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10912 LangAS &AS) { 10913 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10914 return false; 10915 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10916 switch(A->getAllocatorType()) { 10917 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10918 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10919 // Not supported, fallback to the default mem space. 10920 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10921 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10922 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10923 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10924 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10925 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10926 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10927 AS = LangAS::Default; 10928 return true; 10929 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10930 llvm_unreachable("Expected predefined allocator for the variables with the " 10931 "static storage."); 10932 } 10933 return false; 10934 } 10935 10936 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10937 return HasRequiresUnifiedSharedMemory; 10938 } 10939 10940 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10941 CodeGenModule &CGM) 10942 : CGM(CGM) { 10943 if (CGM.getLangOpts().OpenMPIsDevice) { 10944 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10945 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10946 } 10947 } 10948 10949 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10950 if (CGM.getLangOpts().OpenMPIsDevice) 10951 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10952 } 10953 10954 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10955 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10956 return true; 10957 10958 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10959 // Do not to emit function if it is marked as declare target as it was already 10960 // emitted. 10961 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10962 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10963 if (auto *F = dyn_cast_or_null<llvm::Function>( 10964 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10965 return !F->isDeclaration(); 10966 return false; 10967 } 10968 return true; 10969 } 10970 10971 return !AlreadyEmittedTargetDecls.insert(D).second; 10972 } 10973 10974 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10975 // If we don't have entries or if we are emitting code for the device, we 10976 // don't need to do anything. 10977 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10978 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10979 (OffloadEntriesInfoManager.empty() && 10980 !HasEmittedDeclareTargetRegion && 10981 !HasEmittedTargetRegion)) 10982 return nullptr; 10983 10984 // Create and register the function that handles the requires directives. 10985 ASTContext &C = CGM.getContext(); 10986 10987 llvm::Function *RequiresRegFn; 10988 { 10989 CodeGenFunction CGF(CGM); 10990 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10991 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10992 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10993 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10994 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10995 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10996 // TODO: check for other requires clauses. 10997 // The requires directive takes effect only when a target region is 10998 // present in the compilation unit. Otherwise it is ignored and not 10999 // passed to the runtime. This avoids the runtime from throwing an error 11000 // for mismatching requires clauses across compilation units that don't 11001 // contain at least 1 target region. 11002 assert((HasEmittedTargetRegion || 11003 HasEmittedDeclareTargetRegion || 11004 !OffloadEntriesInfoManager.empty()) && 11005 "Target or declare target region expected."); 11006 if (HasRequiresUnifiedSharedMemory) 11007 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11008 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11009 CGM.getModule(), OMPRTL___tgt_register_requires), 11010 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11011 CGF.FinishFunction(); 11012 } 11013 return RequiresRegFn; 11014 } 11015 11016 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11017 const OMPExecutableDirective &D, 11018 SourceLocation Loc, 11019 llvm::Function *OutlinedFn, 11020 ArrayRef<llvm::Value *> CapturedVars) { 11021 if (!CGF.HaveInsertPoint()) 11022 return; 11023 11024 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11025 CodeGenFunction::RunCleanupsScope Scope(CGF); 11026 11027 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11028 llvm::Value *Args[] = { 11029 RTLoc, 11030 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11031 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11032 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11033 RealArgs.append(std::begin(Args), std::end(Args)); 11034 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11035 11036 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11037 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11038 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11039 } 11040 11041 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11042 const Expr *NumTeams, 11043 const Expr *ThreadLimit, 11044 SourceLocation Loc) { 11045 if (!CGF.HaveInsertPoint()) 11046 return; 11047 11048 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11049 11050 llvm::Value *NumTeamsVal = 11051 NumTeams 11052 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11053 CGF.CGM.Int32Ty, /* isSigned = */ true) 11054 : CGF.Builder.getInt32(0); 11055 11056 llvm::Value *ThreadLimitVal = 11057 ThreadLimit 11058 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11059 CGF.CGM.Int32Ty, /* isSigned = */ true) 11060 : CGF.Builder.getInt32(0); 11061 11062 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11063 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11064 ThreadLimitVal}; 11065 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11066 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11067 PushNumTeamsArgs); 11068 } 11069 11070 void CGOpenMPRuntime::emitTargetDataCalls( 11071 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11072 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11073 if (!CGF.HaveInsertPoint()) 11074 return; 11075 11076 // Action used to replace the default codegen action and turn privatization 11077 // off. 11078 PrePostActionTy NoPrivAction; 11079 11080 // Generate the code for the opening of the data environment. Capture all the 11081 // arguments of the runtime call by reference because they are used in the 11082 // closing of the region. 11083 auto &&BeginThenGen = [this, &D, Device, &Info, 11084 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11085 // Fill up the arrays with all the mapped variables. 11086 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11087 11088 // Get map clause information. 11089 MappableExprsHandler MEHandler(D, CGF); 11090 MEHandler.generateAllInfo(CombinedInfo); 11091 11092 // Fill up the arrays and create the arguments. 11093 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11094 /*IsNonContiguous=*/true); 11095 11096 llvm::Value *BasePointersArrayArg = nullptr; 11097 llvm::Value *PointersArrayArg = nullptr; 11098 llvm::Value *SizesArrayArg = nullptr; 11099 llvm::Value *MapTypesArrayArg = nullptr; 11100 llvm::Value *MapNamesArrayArg = nullptr; 11101 llvm::Value *MappersArrayArg = nullptr; 11102 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11103 SizesArrayArg, MapTypesArrayArg, 11104 MapNamesArrayArg, MappersArrayArg, Info); 11105 11106 // Emit device ID if any. 11107 llvm::Value *DeviceID = nullptr; 11108 if (Device) { 11109 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11110 CGF.Int64Ty, /*isSigned=*/true); 11111 } else { 11112 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11113 } 11114 11115 // Emit the number of elements in the offloading arrays. 11116 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11117 // 11118 // Source location for the ident struct 11119 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11120 11121 llvm::Value *OffloadingArgs[] = {RTLoc, 11122 DeviceID, 11123 PointerNum, 11124 BasePointersArrayArg, 11125 PointersArrayArg, 11126 SizesArrayArg, 11127 MapTypesArrayArg, 11128 MapNamesArrayArg, 11129 MappersArrayArg}; 11130 CGF.EmitRuntimeCall( 11131 OMPBuilder.getOrCreateRuntimeFunction( 11132 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11133 OffloadingArgs); 11134 11135 // If device pointer privatization is required, emit the body of the region 11136 // here. It will have to be duplicated: with and without privatization. 11137 if (!Info.CaptureDeviceAddrMap.empty()) 11138 CodeGen(CGF); 11139 }; 11140 11141 // Generate code for the closing of the data region. 11142 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11143 PrePostActionTy &) { 11144 assert(Info.isValid() && "Invalid data environment closing arguments."); 11145 11146 llvm::Value *BasePointersArrayArg = nullptr; 11147 llvm::Value *PointersArrayArg = nullptr; 11148 llvm::Value *SizesArrayArg = nullptr; 11149 llvm::Value *MapTypesArrayArg = nullptr; 11150 llvm::Value *MapNamesArrayArg = nullptr; 11151 llvm::Value *MappersArrayArg = nullptr; 11152 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11153 SizesArrayArg, MapTypesArrayArg, 11154 MapNamesArrayArg, MappersArrayArg, Info, 11155 {/*ForEndCall=*/true}); 11156 11157 // Emit device ID if any. 11158 llvm::Value *DeviceID = nullptr; 11159 if (Device) { 11160 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11161 CGF.Int64Ty, /*isSigned=*/true); 11162 } else { 11163 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11164 } 11165 11166 // Emit the number of elements in the offloading arrays. 11167 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11168 11169 // Source location for the ident struct 11170 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11171 11172 llvm::Value *OffloadingArgs[] = {RTLoc, 11173 DeviceID, 11174 PointerNum, 11175 BasePointersArrayArg, 11176 PointersArrayArg, 11177 SizesArrayArg, 11178 MapTypesArrayArg, 11179 MapNamesArrayArg, 11180 MappersArrayArg}; 11181 CGF.EmitRuntimeCall( 11182 OMPBuilder.getOrCreateRuntimeFunction( 11183 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11184 OffloadingArgs); 11185 }; 11186 11187 // If we need device pointer privatization, we need to emit the body of the 11188 // region with no privatization in the 'else' branch of the conditional. 11189 // Otherwise, we don't have to do anything. 11190 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11191 PrePostActionTy &) { 11192 if (!Info.CaptureDeviceAddrMap.empty()) { 11193 CodeGen.setAction(NoPrivAction); 11194 CodeGen(CGF); 11195 } 11196 }; 11197 11198 // We don't have to do anything to close the region if the if clause evaluates 11199 // to false. 11200 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11201 11202 if (IfCond) { 11203 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11204 } else { 11205 RegionCodeGenTy RCG(BeginThenGen); 11206 RCG(CGF); 11207 } 11208 11209 // If we don't require privatization of device pointers, we emit the body in 11210 // between the runtime calls. This avoids duplicating the body code. 11211 if (Info.CaptureDeviceAddrMap.empty()) { 11212 CodeGen.setAction(NoPrivAction); 11213 CodeGen(CGF); 11214 } 11215 11216 if (IfCond) { 11217 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11218 } else { 11219 RegionCodeGenTy RCG(EndThenGen); 11220 RCG(CGF); 11221 } 11222 } 11223 11224 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11225 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11226 const Expr *Device) { 11227 if (!CGF.HaveInsertPoint()) 11228 return; 11229 11230 assert((isa<OMPTargetEnterDataDirective>(D) || 11231 isa<OMPTargetExitDataDirective>(D) || 11232 isa<OMPTargetUpdateDirective>(D)) && 11233 "Expecting either target enter, exit data, or update directives."); 11234 11235 CodeGenFunction::OMPTargetDataInfo InputInfo; 11236 llvm::Value *MapTypesArray = nullptr; 11237 llvm::Value *MapNamesArray = nullptr; 11238 // Generate the code for the opening of the data environment. 11239 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11240 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11241 // Emit device ID if any. 11242 llvm::Value *DeviceID = nullptr; 11243 if (Device) { 11244 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11245 CGF.Int64Ty, /*isSigned=*/true); 11246 } else { 11247 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11248 } 11249 11250 // Emit the number of elements in the offloading arrays. 11251 llvm::Constant *PointerNum = 11252 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11253 11254 // Source location for the ident struct 11255 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11256 11257 llvm::Value *OffloadingArgs[] = {RTLoc, 11258 DeviceID, 11259 PointerNum, 11260 InputInfo.BasePointersArray.getPointer(), 11261 InputInfo.PointersArray.getPointer(), 11262 InputInfo.SizesArray.getPointer(), 11263 MapTypesArray, 11264 MapNamesArray, 11265 InputInfo.MappersArray.getPointer()}; 11266 11267 // Select the right runtime function call for each standalone 11268 // directive. 11269 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11270 RuntimeFunction RTLFn; 11271 switch (D.getDirectiveKind()) { 11272 case OMPD_target_enter_data: 11273 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11274 : OMPRTL___tgt_target_data_begin_mapper; 11275 break; 11276 case OMPD_target_exit_data: 11277 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11278 : OMPRTL___tgt_target_data_end_mapper; 11279 break; 11280 case OMPD_target_update: 11281 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11282 : OMPRTL___tgt_target_data_update_mapper; 11283 break; 11284 case OMPD_parallel: 11285 case OMPD_for: 11286 case OMPD_parallel_for: 11287 case OMPD_parallel_master: 11288 case OMPD_parallel_sections: 11289 case OMPD_for_simd: 11290 case OMPD_parallel_for_simd: 11291 case OMPD_cancel: 11292 case OMPD_cancellation_point: 11293 case OMPD_ordered: 11294 case OMPD_threadprivate: 11295 case OMPD_allocate: 11296 case OMPD_task: 11297 case OMPD_simd: 11298 case OMPD_tile: 11299 case OMPD_unroll: 11300 case OMPD_sections: 11301 case OMPD_section: 11302 case OMPD_single: 11303 case OMPD_master: 11304 case OMPD_critical: 11305 case OMPD_taskyield: 11306 case OMPD_barrier: 11307 case OMPD_taskwait: 11308 case OMPD_taskgroup: 11309 case OMPD_atomic: 11310 case OMPD_flush: 11311 case OMPD_depobj: 11312 case OMPD_scan: 11313 case OMPD_teams: 11314 case OMPD_target_data: 11315 case OMPD_distribute: 11316 case OMPD_distribute_simd: 11317 case OMPD_distribute_parallel_for: 11318 case OMPD_distribute_parallel_for_simd: 11319 case OMPD_teams_distribute: 11320 case OMPD_teams_distribute_simd: 11321 case OMPD_teams_distribute_parallel_for: 11322 case OMPD_teams_distribute_parallel_for_simd: 11323 case OMPD_declare_simd: 11324 case OMPD_declare_variant: 11325 case OMPD_begin_declare_variant: 11326 case OMPD_end_declare_variant: 11327 case OMPD_declare_target: 11328 case OMPD_end_declare_target: 11329 case OMPD_declare_reduction: 11330 case OMPD_declare_mapper: 11331 case OMPD_taskloop: 11332 case OMPD_taskloop_simd: 11333 case OMPD_master_taskloop: 11334 case OMPD_master_taskloop_simd: 11335 case OMPD_parallel_master_taskloop: 11336 case OMPD_parallel_master_taskloop_simd: 11337 case OMPD_target: 11338 case OMPD_target_simd: 11339 case OMPD_target_teams_distribute: 11340 case OMPD_target_teams_distribute_simd: 11341 case OMPD_target_teams_distribute_parallel_for: 11342 case OMPD_target_teams_distribute_parallel_for_simd: 11343 case OMPD_target_teams: 11344 case OMPD_target_parallel: 11345 case OMPD_target_parallel_for: 11346 case OMPD_target_parallel_for_simd: 11347 case OMPD_requires: 11348 case OMPD_metadirective: 11349 case OMPD_unknown: 11350 default: 11351 llvm_unreachable("Unexpected standalone target data directive."); 11352 break; 11353 } 11354 CGF.EmitRuntimeCall( 11355 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11356 OffloadingArgs); 11357 }; 11358 11359 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11360 &MapNamesArray](CodeGenFunction &CGF, 11361 PrePostActionTy &) { 11362 // Fill up the arrays with all the mapped variables. 11363 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11364 11365 // Get map clause information. 11366 MappableExprsHandler MEHandler(D, CGF); 11367 MEHandler.generateAllInfo(CombinedInfo); 11368 11369 TargetDataInfo Info; 11370 // Fill up the arrays and create the arguments. 11371 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11372 /*IsNonContiguous=*/true); 11373 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11374 D.hasClausesOfKind<OMPNowaitClause>(); 11375 emitOffloadingArraysArgument( 11376 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11377 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11378 {/*ForEndCall=*/false}); 11379 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11380 InputInfo.BasePointersArray = 11381 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11382 InputInfo.PointersArray = 11383 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11384 InputInfo.SizesArray = 11385 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 11386 InputInfo.MappersArray = 11387 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11388 MapTypesArray = Info.MapTypesArray; 11389 MapNamesArray = Info.MapNamesArray; 11390 if (RequiresOuterTask) 11391 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11392 else 11393 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11394 }; 11395 11396 if (IfCond) { 11397 emitIfClause(CGF, IfCond, TargetThenGen, 11398 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11399 } else { 11400 RegionCodeGenTy ThenRCG(TargetThenGen); 11401 ThenRCG(CGF); 11402 } 11403 } 11404 11405 namespace { 11406 /// Kind of parameter in a function with 'declare simd' directive. 11407 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11408 /// Attribute set of the parameter. 11409 struct ParamAttrTy { 11410 ParamKindTy Kind = Vector; 11411 llvm::APSInt StrideOrArg; 11412 llvm::APSInt Alignment; 11413 }; 11414 } // namespace 11415 11416 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11417 ArrayRef<ParamAttrTy> ParamAttrs) { 11418 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11419 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11420 // of that clause. The VLEN value must be power of 2. 11421 // In other case the notion of the function`s "characteristic data type" (CDT) 11422 // is used to compute the vector length. 11423 // CDT is defined in the following order: 11424 // a) For non-void function, the CDT is the return type. 11425 // b) If the function has any non-uniform, non-linear parameters, then the 11426 // CDT is the type of the first such parameter. 11427 // c) If the CDT determined by a) or b) above is struct, union, or class 11428 // type which is pass-by-value (except for the type that maps to the 11429 // built-in complex data type), the characteristic data type is int. 11430 // d) If none of the above three cases is applicable, the CDT is int. 11431 // The VLEN is then determined based on the CDT and the size of vector 11432 // register of that ISA for which current vector version is generated. The 11433 // VLEN is computed using the formula below: 11434 // VLEN = sizeof(vector_register) / sizeof(CDT), 11435 // where vector register size specified in section 3.2.1 Registers and the 11436 // Stack Frame of original AMD64 ABI document. 11437 QualType RetType = FD->getReturnType(); 11438 if (RetType.isNull()) 11439 return 0; 11440 ASTContext &C = FD->getASTContext(); 11441 QualType CDT; 11442 if (!RetType.isNull() && !RetType->isVoidType()) { 11443 CDT = RetType; 11444 } else { 11445 unsigned Offset = 0; 11446 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11447 if (ParamAttrs[Offset].Kind == Vector) 11448 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11449 ++Offset; 11450 } 11451 if (CDT.isNull()) { 11452 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11453 if (ParamAttrs[I + Offset].Kind == Vector) { 11454 CDT = FD->getParamDecl(I)->getType(); 11455 break; 11456 } 11457 } 11458 } 11459 } 11460 if (CDT.isNull()) 11461 CDT = C.IntTy; 11462 CDT = CDT->getCanonicalTypeUnqualified(); 11463 if (CDT->isRecordType() || CDT->isUnionType()) 11464 CDT = C.IntTy; 11465 return C.getTypeSize(CDT); 11466 } 11467 11468 static void 11469 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11470 const llvm::APSInt &VLENVal, 11471 ArrayRef<ParamAttrTy> ParamAttrs, 11472 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11473 struct ISADataTy { 11474 char ISA; 11475 unsigned VecRegSize; 11476 }; 11477 ISADataTy ISAData[] = { 11478 { 11479 'b', 128 11480 }, // SSE 11481 { 11482 'c', 256 11483 }, // AVX 11484 { 11485 'd', 256 11486 }, // AVX2 11487 { 11488 'e', 512 11489 }, // AVX512 11490 }; 11491 llvm::SmallVector<char, 2> Masked; 11492 switch (State) { 11493 case OMPDeclareSimdDeclAttr::BS_Undefined: 11494 Masked.push_back('N'); 11495 Masked.push_back('M'); 11496 break; 11497 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11498 Masked.push_back('N'); 11499 break; 11500 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11501 Masked.push_back('M'); 11502 break; 11503 } 11504 for (char Mask : Masked) { 11505 for (const ISADataTy &Data : ISAData) { 11506 SmallString<256> Buffer; 11507 llvm::raw_svector_ostream Out(Buffer); 11508 Out << "_ZGV" << Data.ISA << Mask; 11509 if (!VLENVal) { 11510 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11511 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11512 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11513 } else { 11514 Out << VLENVal; 11515 } 11516 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11517 switch (ParamAttr.Kind){ 11518 case LinearWithVarStride: 11519 Out << 's' << ParamAttr.StrideOrArg; 11520 break; 11521 case Linear: 11522 Out << 'l'; 11523 if (ParamAttr.StrideOrArg != 1) 11524 Out << ParamAttr.StrideOrArg; 11525 break; 11526 case Uniform: 11527 Out << 'u'; 11528 break; 11529 case Vector: 11530 Out << 'v'; 11531 break; 11532 } 11533 if (!!ParamAttr.Alignment) 11534 Out << 'a' << ParamAttr.Alignment; 11535 } 11536 Out << '_' << Fn->getName(); 11537 Fn->addFnAttr(Out.str()); 11538 } 11539 } 11540 } 11541 11542 // This are the Functions that are needed to mangle the name of the 11543 // vector functions generated by the compiler, according to the rules 11544 // defined in the "Vector Function ABI specifications for AArch64", 11545 // available at 11546 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11547 11548 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11549 /// 11550 /// TODO: Need to implement the behavior for reference marked with a 11551 /// var or no linear modifiers (1.b in the section). For this, we 11552 /// need to extend ParamKindTy to support the linear modifiers. 11553 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11554 QT = QT.getCanonicalType(); 11555 11556 if (QT->isVoidType()) 11557 return false; 11558 11559 if (Kind == ParamKindTy::Uniform) 11560 return false; 11561 11562 if (Kind == ParamKindTy::Linear) 11563 return false; 11564 11565 // TODO: Handle linear references with modifiers 11566 11567 if (Kind == ParamKindTy::LinearWithVarStride) 11568 return false; 11569 11570 return true; 11571 } 11572 11573 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11574 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11575 QT = QT.getCanonicalType(); 11576 unsigned Size = C.getTypeSize(QT); 11577 11578 // Only scalars and complex within 16 bytes wide set PVB to true. 11579 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11580 return false; 11581 11582 if (QT->isFloatingType()) 11583 return true; 11584 11585 if (QT->isIntegerType()) 11586 return true; 11587 11588 if (QT->isPointerType()) 11589 return true; 11590 11591 // TODO: Add support for complex types (section 3.1.2, item 2). 11592 11593 return false; 11594 } 11595 11596 /// Computes the lane size (LS) of a return type or of an input parameter, 11597 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11598 /// TODO: Add support for references, section 3.2.1, item 1. 11599 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11600 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11601 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11602 if (getAArch64PBV(PTy, C)) 11603 return C.getTypeSize(PTy); 11604 } 11605 if (getAArch64PBV(QT, C)) 11606 return C.getTypeSize(QT); 11607 11608 return C.getTypeSize(C.getUIntPtrType()); 11609 } 11610 11611 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11612 // signature of the scalar function, as defined in 3.2.2 of the 11613 // AAVFABI. 11614 static std::tuple<unsigned, unsigned, bool> 11615 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11616 QualType RetType = FD->getReturnType().getCanonicalType(); 11617 11618 ASTContext &C = FD->getASTContext(); 11619 11620 bool OutputBecomesInput = false; 11621 11622 llvm::SmallVector<unsigned, 8> Sizes; 11623 if (!RetType->isVoidType()) { 11624 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11625 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11626 OutputBecomesInput = true; 11627 } 11628 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11629 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11630 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11631 } 11632 11633 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11634 // The LS of a function parameter / return value can only be a power 11635 // of 2, starting from 8 bits, up to 128. 11636 assert(llvm::all_of(Sizes, 11637 [](unsigned Size) { 11638 return Size == 8 || Size == 16 || Size == 32 || 11639 Size == 64 || Size == 128; 11640 }) && 11641 "Invalid size"); 11642 11643 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11644 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11645 OutputBecomesInput); 11646 } 11647 11648 /// Mangle the parameter part of the vector function name according to 11649 /// their OpenMP classification. The mangling function is defined in 11650 /// section 3.5 of the AAVFABI. 11651 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11652 SmallString<256> Buffer; 11653 llvm::raw_svector_ostream Out(Buffer); 11654 for (const auto &ParamAttr : ParamAttrs) { 11655 switch (ParamAttr.Kind) { 11656 case LinearWithVarStride: 11657 Out << "ls" << ParamAttr.StrideOrArg; 11658 break; 11659 case Linear: 11660 Out << 'l'; 11661 // Don't print the step value if it is not present or if it is 11662 // equal to 1. 11663 if (ParamAttr.StrideOrArg != 1) 11664 Out << ParamAttr.StrideOrArg; 11665 break; 11666 case Uniform: 11667 Out << 'u'; 11668 break; 11669 case Vector: 11670 Out << 'v'; 11671 break; 11672 } 11673 11674 if (!!ParamAttr.Alignment) 11675 Out << 'a' << ParamAttr.Alignment; 11676 } 11677 11678 return std::string(Out.str()); 11679 } 11680 11681 // Function used to add the attribute. The parameter `VLEN` is 11682 // templated to allow the use of "x" when targeting scalable functions 11683 // for SVE. 11684 template <typename T> 11685 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11686 char ISA, StringRef ParSeq, 11687 StringRef MangledName, bool OutputBecomesInput, 11688 llvm::Function *Fn) { 11689 SmallString<256> Buffer; 11690 llvm::raw_svector_ostream Out(Buffer); 11691 Out << Prefix << ISA << LMask << VLEN; 11692 if (OutputBecomesInput) 11693 Out << "v"; 11694 Out << ParSeq << "_" << MangledName; 11695 Fn->addFnAttr(Out.str()); 11696 } 11697 11698 // Helper function to generate the Advanced SIMD names depending on 11699 // the value of the NDS when simdlen is not present. 11700 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11701 StringRef Prefix, char ISA, 11702 StringRef ParSeq, StringRef MangledName, 11703 bool OutputBecomesInput, 11704 llvm::Function *Fn) { 11705 switch (NDS) { 11706 case 8: 11707 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11708 OutputBecomesInput, Fn); 11709 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11710 OutputBecomesInput, Fn); 11711 break; 11712 case 16: 11713 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11714 OutputBecomesInput, Fn); 11715 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11716 OutputBecomesInput, Fn); 11717 break; 11718 case 32: 11719 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11720 OutputBecomesInput, Fn); 11721 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11722 OutputBecomesInput, Fn); 11723 break; 11724 case 64: 11725 case 128: 11726 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11727 OutputBecomesInput, Fn); 11728 break; 11729 default: 11730 llvm_unreachable("Scalar type is too wide."); 11731 } 11732 } 11733 11734 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11735 static void emitAArch64DeclareSimdFunction( 11736 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11737 ArrayRef<ParamAttrTy> ParamAttrs, 11738 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11739 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11740 11741 // Get basic data for building the vector signature. 11742 const auto Data = getNDSWDS(FD, ParamAttrs); 11743 const unsigned NDS = std::get<0>(Data); 11744 const unsigned WDS = std::get<1>(Data); 11745 const bool OutputBecomesInput = std::get<2>(Data); 11746 11747 // Check the values provided via `simdlen` by the user. 11748 // 1. A `simdlen(1)` doesn't produce vector signatures, 11749 if (UserVLEN == 1) { 11750 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11751 DiagnosticsEngine::Warning, 11752 "The clause simdlen(1) has no effect when targeting aarch64."); 11753 CGM.getDiags().Report(SLoc, DiagID); 11754 return; 11755 } 11756 11757 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11758 // Advanced SIMD output. 11759 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11760 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11761 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11762 "power of 2 when targeting Advanced SIMD."); 11763 CGM.getDiags().Report(SLoc, DiagID); 11764 return; 11765 } 11766 11767 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11768 // limits. 11769 if (ISA == 's' && UserVLEN != 0) { 11770 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11771 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11772 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11773 "lanes in the architectural constraints " 11774 "for SVE (min is 128-bit, max is " 11775 "2048-bit, by steps of 128-bit)"); 11776 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11777 return; 11778 } 11779 } 11780 11781 // Sort out parameter sequence. 11782 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11783 StringRef Prefix = "_ZGV"; 11784 // Generate simdlen from user input (if any). 11785 if (UserVLEN) { 11786 if (ISA == 's') { 11787 // SVE generates only a masked function. 11788 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11789 OutputBecomesInput, Fn); 11790 } else { 11791 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11792 // Advanced SIMD generates one or two functions, depending on 11793 // the `[not]inbranch` clause. 11794 switch (State) { 11795 case OMPDeclareSimdDeclAttr::BS_Undefined: 11796 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11797 OutputBecomesInput, Fn); 11798 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11799 OutputBecomesInput, Fn); 11800 break; 11801 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11802 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11803 OutputBecomesInput, Fn); 11804 break; 11805 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11806 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11807 OutputBecomesInput, Fn); 11808 break; 11809 } 11810 } 11811 } else { 11812 // If no user simdlen is provided, follow the AAVFABI rules for 11813 // generating the vector length. 11814 if (ISA == 's') { 11815 // SVE, section 3.4.1, item 1. 11816 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11817 OutputBecomesInput, Fn); 11818 } else { 11819 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11820 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11821 // two vector names depending on the use of the clause 11822 // `[not]inbranch`. 11823 switch (State) { 11824 case OMPDeclareSimdDeclAttr::BS_Undefined: 11825 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11826 OutputBecomesInput, Fn); 11827 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11828 OutputBecomesInput, Fn); 11829 break; 11830 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11831 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11832 OutputBecomesInput, Fn); 11833 break; 11834 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11835 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11836 OutputBecomesInput, Fn); 11837 break; 11838 } 11839 } 11840 } 11841 } 11842 11843 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11844 llvm::Function *Fn) { 11845 ASTContext &C = CGM.getContext(); 11846 FD = FD->getMostRecentDecl(); 11847 while (FD) { 11848 // Map params to their positions in function decl. 11849 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11850 if (isa<CXXMethodDecl>(FD)) 11851 ParamPositions.try_emplace(FD, 0); 11852 unsigned ParamPos = ParamPositions.size(); 11853 for (const ParmVarDecl *P : FD->parameters()) { 11854 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11855 ++ParamPos; 11856 } 11857 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11858 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11859 // Mark uniform parameters. 11860 for (const Expr *E : Attr->uniforms()) { 11861 E = E->IgnoreParenImpCasts(); 11862 unsigned Pos; 11863 if (isa<CXXThisExpr>(E)) { 11864 Pos = ParamPositions[FD]; 11865 } else { 11866 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11867 ->getCanonicalDecl(); 11868 auto It = ParamPositions.find(PVD); 11869 assert(It != ParamPositions.end() && "Function parameter not found"); 11870 Pos = It->second; 11871 } 11872 ParamAttrs[Pos].Kind = Uniform; 11873 } 11874 // Get alignment info. 11875 auto *NI = Attr->alignments_begin(); 11876 for (const Expr *E : Attr->aligneds()) { 11877 E = E->IgnoreParenImpCasts(); 11878 unsigned Pos; 11879 QualType ParmTy; 11880 if (isa<CXXThisExpr>(E)) { 11881 Pos = ParamPositions[FD]; 11882 ParmTy = E->getType(); 11883 } else { 11884 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11885 ->getCanonicalDecl(); 11886 auto It = ParamPositions.find(PVD); 11887 assert(It != ParamPositions.end() && "Function parameter not found"); 11888 Pos = It->second; 11889 ParmTy = PVD->getType(); 11890 } 11891 ParamAttrs[Pos].Alignment = 11892 (*NI) 11893 ? (*NI)->EvaluateKnownConstInt(C) 11894 : llvm::APSInt::getUnsigned( 11895 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11896 .getQuantity()); 11897 ++NI; 11898 } 11899 // Mark linear parameters. 11900 auto *SI = Attr->steps_begin(); 11901 for (const Expr *E : Attr->linears()) { 11902 E = E->IgnoreParenImpCasts(); 11903 unsigned Pos; 11904 // Rescaling factor needed to compute the linear parameter 11905 // value in the mangled name. 11906 unsigned PtrRescalingFactor = 1; 11907 if (isa<CXXThisExpr>(E)) { 11908 Pos = ParamPositions[FD]; 11909 } else { 11910 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11911 ->getCanonicalDecl(); 11912 auto It = ParamPositions.find(PVD); 11913 assert(It != ParamPositions.end() && "Function parameter not found"); 11914 Pos = It->second; 11915 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11916 PtrRescalingFactor = CGM.getContext() 11917 .getTypeSizeInChars(P->getPointeeType()) 11918 .getQuantity(); 11919 } 11920 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11921 ParamAttr.Kind = Linear; 11922 // Assuming a stride of 1, for `linear` without modifiers. 11923 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11924 if (*SI) { 11925 Expr::EvalResult Result; 11926 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11927 if (const auto *DRE = 11928 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11929 if (const auto *StridePVD = 11930 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11931 ParamAttr.Kind = LinearWithVarStride; 11932 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11933 assert(It != ParamPositions.end() && 11934 "Function parameter not found"); 11935 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11936 } 11937 } 11938 } else { 11939 ParamAttr.StrideOrArg = Result.Val.getInt(); 11940 } 11941 } 11942 // If we are using a linear clause on a pointer, we need to 11943 // rescale the value of linear_step with the byte size of the 11944 // pointee type. 11945 if (Linear == ParamAttr.Kind) 11946 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11947 ++SI; 11948 } 11949 llvm::APSInt VLENVal; 11950 SourceLocation ExprLoc; 11951 const Expr *VLENExpr = Attr->getSimdlen(); 11952 if (VLENExpr) { 11953 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11954 ExprLoc = VLENExpr->getExprLoc(); 11955 } 11956 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11957 if (CGM.getTriple().isX86()) { 11958 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11959 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11960 unsigned VLEN = VLENVal.getExtValue(); 11961 StringRef MangledName = Fn->getName(); 11962 if (CGM.getTarget().hasFeature("sve")) 11963 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11964 MangledName, 's', 128, Fn, ExprLoc); 11965 if (CGM.getTarget().hasFeature("neon")) 11966 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11967 MangledName, 'n', 128, Fn, ExprLoc); 11968 } 11969 } 11970 FD = FD->getPreviousDecl(); 11971 } 11972 } 11973 11974 namespace { 11975 /// Cleanup action for doacross support. 11976 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11977 public: 11978 static const int DoacrossFinArgs = 2; 11979 11980 private: 11981 llvm::FunctionCallee RTLFn; 11982 llvm::Value *Args[DoacrossFinArgs]; 11983 11984 public: 11985 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11986 ArrayRef<llvm::Value *> CallArgs) 11987 : RTLFn(RTLFn) { 11988 assert(CallArgs.size() == DoacrossFinArgs); 11989 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11990 } 11991 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11992 if (!CGF.HaveInsertPoint()) 11993 return; 11994 CGF.EmitRuntimeCall(RTLFn, Args); 11995 } 11996 }; 11997 } // namespace 11998 11999 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12000 const OMPLoopDirective &D, 12001 ArrayRef<Expr *> NumIterations) { 12002 if (!CGF.HaveInsertPoint()) 12003 return; 12004 12005 ASTContext &C = CGM.getContext(); 12006 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12007 RecordDecl *RD; 12008 if (KmpDimTy.isNull()) { 12009 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12010 // kmp_int64 lo; // lower 12011 // kmp_int64 up; // upper 12012 // kmp_int64 st; // stride 12013 // }; 12014 RD = C.buildImplicitRecord("kmp_dim"); 12015 RD->startDefinition(); 12016 addFieldToRecordDecl(C, RD, Int64Ty); 12017 addFieldToRecordDecl(C, RD, Int64Ty); 12018 addFieldToRecordDecl(C, RD, Int64Ty); 12019 RD->completeDefinition(); 12020 KmpDimTy = C.getRecordType(RD); 12021 } else { 12022 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12023 } 12024 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12025 QualType ArrayTy = 12026 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12027 12028 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12029 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12030 enum { LowerFD = 0, UpperFD, StrideFD }; 12031 // Fill dims with data. 12032 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12033 LValue DimsLVal = CGF.MakeAddrLValue( 12034 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12035 // dims.upper = num_iterations; 12036 LValue UpperLVal = CGF.EmitLValueForField( 12037 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12038 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12039 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12040 Int64Ty, NumIterations[I]->getExprLoc()); 12041 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12042 // dims.stride = 1; 12043 LValue StrideLVal = CGF.EmitLValueForField( 12044 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12045 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12046 StrideLVal); 12047 } 12048 12049 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12050 // kmp_int32 num_dims, struct kmp_dim * dims); 12051 llvm::Value *Args[] = { 12052 emitUpdateLocation(CGF, D.getBeginLoc()), 12053 getThreadID(CGF, D.getBeginLoc()), 12054 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12055 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12056 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12057 CGM.VoidPtrTy)}; 12058 12059 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12060 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12061 CGF.EmitRuntimeCall(RTLFn, Args); 12062 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12063 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12064 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12065 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12066 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12067 llvm::makeArrayRef(FiniArgs)); 12068 } 12069 12070 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12071 const OMPDependClause *C) { 12072 QualType Int64Ty = 12073 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12074 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12075 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12076 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12077 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12078 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12079 const Expr *CounterVal = C->getLoopData(I); 12080 assert(CounterVal); 12081 llvm::Value *CntVal = CGF.EmitScalarConversion( 12082 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12083 CounterVal->getExprLoc()); 12084 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12085 /*Volatile=*/false, Int64Ty); 12086 } 12087 llvm::Value *Args[] = { 12088 emitUpdateLocation(CGF, C->getBeginLoc()), 12089 getThreadID(CGF, C->getBeginLoc()), 12090 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12091 llvm::FunctionCallee RTLFn; 12092 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12093 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12094 OMPRTL___kmpc_doacross_post); 12095 } else { 12096 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12097 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12098 OMPRTL___kmpc_doacross_wait); 12099 } 12100 CGF.EmitRuntimeCall(RTLFn, Args); 12101 } 12102 12103 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12104 llvm::FunctionCallee Callee, 12105 ArrayRef<llvm::Value *> Args) const { 12106 assert(Loc.isValid() && "Outlined function call location must be valid."); 12107 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12108 12109 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12110 if (Fn->doesNotThrow()) { 12111 CGF.EmitNounwindRuntimeCall(Fn, Args); 12112 return; 12113 } 12114 } 12115 CGF.EmitRuntimeCall(Callee, Args); 12116 } 12117 12118 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12119 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12120 ArrayRef<llvm::Value *> Args) const { 12121 emitCall(CGF, Loc, OutlinedFn, Args); 12122 } 12123 12124 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12125 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12126 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12127 HasEmittedDeclareTargetRegion = true; 12128 } 12129 12130 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12131 const VarDecl *NativeParam, 12132 const VarDecl *TargetParam) const { 12133 return CGF.GetAddrOfLocalVar(NativeParam); 12134 } 12135 12136 /// Return allocator value from expression, or return a null allocator (default 12137 /// when no allocator specified). 12138 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12139 const Expr *Allocator) { 12140 llvm::Value *AllocVal; 12141 if (Allocator) { 12142 AllocVal = CGF.EmitScalarExpr(Allocator); 12143 // According to the standard, the original allocator type is a enum 12144 // (integer). Convert to pointer type, if required. 12145 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12146 CGF.getContext().VoidPtrTy, 12147 Allocator->getExprLoc()); 12148 } else { 12149 // If no allocator specified, it defaults to the null allocator. 12150 AllocVal = llvm::Constant::getNullValue( 12151 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12152 } 12153 return AllocVal; 12154 } 12155 12156 /// Given the allocate directive list item type and align clause value, 12157 /// return appropriate alignment. 12158 static llvm::Value *getAlignmentValue(CodeGenFunction &CGF, QualType ListItemTy, 12159 const Expr *Alignment) { 12160 if (!Alignment) 12161 return nullptr; 12162 12163 unsigned UserAlign = 12164 Alignment->EvaluateKnownConstInt(CGF.getContext()).getExtValue(); 12165 CharUnits NaturalAlign = CGF.CGM.getNaturalTypeAlignment(ListItemTy); 12166 12167 // OpenMP5.1 pg 185 lines 7-10 12168 // Each item in the align modifier list must be aligned to the maximum 12169 // of the specified alignment and the type's natural alignment. 12170 // 12171 // If no alignment specified then use the natural alignment. 12172 return llvm::ConstantInt::get( 12173 CGF.CGM.SizeTy, 12174 std::max<unsigned>(UserAlign, NaturalAlign.getQuantity())); 12175 } 12176 12177 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12178 const VarDecl *VD) { 12179 if (!VD) 12180 return Address::invalid(); 12181 Address UntiedAddr = Address::invalid(); 12182 Address UntiedRealAddr = Address::invalid(); 12183 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12184 if (It != FunctionToUntiedTaskStackMap.end()) { 12185 const UntiedLocalVarsAddressesMap &UntiedData = 12186 UntiedLocalVarsStack[It->second]; 12187 auto I = UntiedData.find(VD); 12188 if (I != UntiedData.end()) { 12189 UntiedAddr = I->second.first; 12190 UntiedRealAddr = I->second.second; 12191 } 12192 } 12193 const VarDecl *CVD = VD->getCanonicalDecl(); 12194 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12195 // Use the default allocation. 12196 if (!isAllocatableDecl(VD)) 12197 return UntiedAddr; 12198 llvm::Value *Size; 12199 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12200 if (CVD->getType()->isVariablyModifiedType()) { 12201 Size = CGF.getTypeSize(CVD->getType()); 12202 // Align the size: ((size + align - 1) / align) * align 12203 Size = CGF.Builder.CreateNUWAdd( 12204 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12205 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12206 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12207 } else { 12208 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12209 Size = CGM.getSize(Sz.alignTo(Align)); 12210 } 12211 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12212 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12213 const Expr *Allocator = AA->getAllocator(); 12214 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12215 llvm::Value *Alignment = getAlignmentValue( 12216 CGF, VD->getType().getNonReferenceType(), AA->getAlignment()); 12217 SmallVector<llvm::Value *, 4> Args; 12218 Args.push_back(ThreadID); 12219 if (Alignment) 12220 Args.push_back(Alignment); 12221 Args.push_back(Size); 12222 Args.push_back(AllocVal); 12223 llvm::omp::RuntimeFunction FnID = 12224 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12225 llvm::Value *Addr = CGF.EmitRuntimeCall( 12226 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12227 getName({CVD->getName(), ".void.addr"})); 12228 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12229 CGM.getModule(), OMPRTL___kmpc_free); 12230 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12231 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12232 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12233 if (UntiedAddr.isValid()) 12234 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12235 12236 // Cleanup action for allocate support. 12237 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12238 llvm::FunctionCallee RTLFn; 12239 SourceLocation::UIntTy LocEncoding; 12240 Address Addr; 12241 const Expr *AllocExpr; 12242 12243 public: 12244 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12245 SourceLocation::UIntTy LocEncoding, Address Addr, 12246 const Expr *AllocExpr) 12247 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12248 AllocExpr(AllocExpr) {} 12249 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12250 if (!CGF.HaveInsertPoint()) 12251 return; 12252 llvm::Value *Args[3]; 12253 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12254 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12255 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12256 Addr.getPointer(), CGF.VoidPtrTy); 12257 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12258 Args[2] = AllocVal; 12259 CGF.EmitRuntimeCall(RTLFn, Args); 12260 } 12261 }; 12262 Address VDAddr = 12263 UntiedRealAddr.isValid() 12264 ? UntiedRealAddr 12265 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 12266 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12267 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12268 VDAddr, Allocator); 12269 if (UntiedRealAddr.isValid()) 12270 if (auto *Region = 12271 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12272 Region->emitUntiedSwitch(CGF); 12273 return VDAddr; 12274 } 12275 return UntiedAddr; 12276 } 12277 12278 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12279 const VarDecl *VD) const { 12280 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12281 if (It == FunctionToUntiedTaskStackMap.end()) 12282 return false; 12283 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12284 } 12285 12286 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12287 CodeGenModule &CGM, const OMPLoopDirective &S) 12288 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12289 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12290 if (!NeedToPush) 12291 return; 12292 NontemporalDeclsSet &DS = 12293 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12294 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12295 for (const Stmt *Ref : C->private_refs()) { 12296 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12297 const ValueDecl *VD; 12298 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12299 VD = DRE->getDecl(); 12300 } else { 12301 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12302 assert((ME->isImplicitCXXThis() || 12303 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12304 "Expected member of current class."); 12305 VD = ME->getMemberDecl(); 12306 } 12307 DS.insert(VD); 12308 } 12309 } 12310 } 12311 12312 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12313 if (!NeedToPush) 12314 return; 12315 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12316 } 12317 12318 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12319 CodeGenFunction &CGF, 12320 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12321 std::pair<Address, Address>> &LocalVars) 12322 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12323 if (!NeedToPush) 12324 return; 12325 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12326 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12327 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12328 } 12329 12330 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12331 if (!NeedToPush) 12332 return; 12333 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12334 } 12335 12336 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12337 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12338 12339 return llvm::any_of( 12340 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12341 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12342 } 12343 12344 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12345 const OMPExecutableDirective &S, 12346 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12347 const { 12348 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12349 // Vars in target/task regions must be excluded completely. 12350 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12351 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12352 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12353 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12354 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12355 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12356 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12357 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12358 } 12359 } 12360 // Exclude vars in private clauses. 12361 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12362 for (const Expr *Ref : C->varlists()) { 12363 if (!Ref->getType()->isScalarType()) 12364 continue; 12365 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12366 if (!DRE) 12367 continue; 12368 NeedToCheckForLPCs.insert(DRE->getDecl()); 12369 } 12370 } 12371 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12372 for (const Expr *Ref : C->varlists()) { 12373 if (!Ref->getType()->isScalarType()) 12374 continue; 12375 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12376 if (!DRE) 12377 continue; 12378 NeedToCheckForLPCs.insert(DRE->getDecl()); 12379 } 12380 } 12381 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12382 for (const Expr *Ref : C->varlists()) { 12383 if (!Ref->getType()->isScalarType()) 12384 continue; 12385 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12386 if (!DRE) 12387 continue; 12388 NeedToCheckForLPCs.insert(DRE->getDecl()); 12389 } 12390 } 12391 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12392 for (const Expr *Ref : C->varlists()) { 12393 if (!Ref->getType()->isScalarType()) 12394 continue; 12395 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12396 if (!DRE) 12397 continue; 12398 NeedToCheckForLPCs.insert(DRE->getDecl()); 12399 } 12400 } 12401 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12402 for (const Expr *Ref : C->varlists()) { 12403 if (!Ref->getType()->isScalarType()) 12404 continue; 12405 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12406 if (!DRE) 12407 continue; 12408 NeedToCheckForLPCs.insert(DRE->getDecl()); 12409 } 12410 } 12411 for (const Decl *VD : NeedToCheckForLPCs) { 12412 for (const LastprivateConditionalData &Data : 12413 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12414 if (Data.DeclToUniqueName.count(VD) > 0) { 12415 if (!Data.Disabled) 12416 NeedToAddForLPCsAsDisabled.insert(VD); 12417 break; 12418 } 12419 } 12420 } 12421 } 12422 12423 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12424 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12425 : CGM(CGF.CGM), 12426 Action((CGM.getLangOpts().OpenMP >= 50 && 12427 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12428 [](const OMPLastprivateClause *C) { 12429 return C->getKind() == 12430 OMPC_LASTPRIVATE_conditional; 12431 })) 12432 ? ActionToDo::PushAsLastprivateConditional 12433 : ActionToDo::DoNotPush) { 12434 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12435 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12436 return; 12437 assert(Action == ActionToDo::PushAsLastprivateConditional && 12438 "Expected a push action."); 12439 LastprivateConditionalData &Data = 12440 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12441 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12442 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12443 continue; 12444 12445 for (const Expr *Ref : C->varlists()) { 12446 Data.DeclToUniqueName.insert(std::make_pair( 12447 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12448 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12449 } 12450 } 12451 Data.IVLVal = IVLVal; 12452 Data.Fn = CGF.CurFn; 12453 } 12454 12455 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12456 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12457 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12458 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12459 if (CGM.getLangOpts().OpenMP < 50) 12460 return; 12461 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12462 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12463 if (!NeedToAddForLPCsAsDisabled.empty()) { 12464 Action = ActionToDo::DisableLastprivateConditional; 12465 LastprivateConditionalData &Data = 12466 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12467 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12468 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12469 Data.Fn = CGF.CurFn; 12470 Data.Disabled = true; 12471 } 12472 } 12473 12474 CGOpenMPRuntime::LastprivateConditionalRAII 12475 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12476 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12477 return LastprivateConditionalRAII(CGF, S); 12478 } 12479 12480 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12481 if (CGM.getLangOpts().OpenMP < 50) 12482 return; 12483 if (Action == ActionToDo::DisableLastprivateConditional) { 12484 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12485 "Expected list of disabled private vars."); 12486 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12487 } 12488 if (Action == ActionToDo::PushAsLastprivateConditional) { 12489 assert( 12490 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12491 "Expected list of lastprivate conditional vars."); 12492 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12493 } 12494 } 12495 12496 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12497 const VarDecl *VD) { 12498 ASTContext &C = CGM.getContext(); 12499 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12500 if (I == LastprivateConditionalToTypes.end()) 12501 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12502 QualType NewType; 12503 const FieldDecl *VDField; 12504 const FieldDecl *FiredField; 12505 LValue BaseLVal; 12506 auto VI = I->getSecond().find(VD); 12507 if (VI == I->getSecond().end()) { 12508 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12509 RD->startDefinition(); 12510 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12511 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12512 RD->completeDefinition(); 12513 NewType = C.getRecordType(RD); 12514 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12515 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12516 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12517 } else { 12518 NewType = std::get<0>(VI->getSecond()); 12519 VDField = std::get<1>(VI->getSecond()); 12520 FiredField = std::get<2>(VI->getSecond()); 12521 BaseLVal = std::get<3>(VI->getSecond()); 12522 } 12523 LValue FiredLVal = 12524 CGF.EmitLValueForField(BaseLVal, FiredField); 12525 CGF.EmitStoreOfScalar( 12526 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12527 FiredLVal); 12528 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12529 } 12530 12531 namespace { 12532 /// Checks if the lastprivate conditional variable is referenced in LHS. 12533 class LastprivateConditionalRefChecker final 12534 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12535 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12536 const Expr *FoundE = nullptr; 12537 const Decl *FoundD = nullptr; 12538 StringRef UniqueDeclName; 12539 LValue IVLVal; 12540 llvm::Function *FoundFn = nullptr; 12541 SourceLocation Loc; 12542 12543 public: 12544 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12545 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12546 llvm::reverse(LPM)) { 12547 auto It = D.DeclToUniqueName.find(E->getDecl()); 12548 if (It == D.DeclToUniqueName.end()) 12549 continue; 12550 if (D.Disabled) 12551 return false; 12552 FoundE = E; 12553 FoundD = E->getDecl()->getCanonicalDecl(); 12554 UniqueDeclName = It->second; 12555 IVLVal = D.IVLVal; 12556 FoundFn = D.Fn; 12557 break; 12558 } 12559 return FoundE == E; 12560 } 12561 bool VisitMemberExpr(const MemberExpr *E) { 12562 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12563 return false; 12564 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12565 llvm::reverse(LPM)) { 12566 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12567 if (It == D.DeclToUniqueName.end()) 12568 continue; 12569 if (D.Disabled) 12570 return false; 12571 FoundE = E; 12572 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12573 UniqueDeclName = It->second; 12574 IVLVal = D.IVLVal; 12575 FoundFn = D.Fn; 12576 break; 12577 } 12578 return FoundE == E; 12579 } 12580 bool VisitStmt(const Stmt *S) { 12581 for (const Stmt *Child : S->children()) { 12582 if (!Child) 12583 continue; 12584 if (const auto *E = dyn_cast<Expr>(Child)) 12585 if (!E->isGLValue()) 12586 continue; 12587 if (Visit(Child)) 12588 return true; 12589 } 12590 return false; 12591 } 12592 explicit LastprivateConditionalRefChecker( 12593 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12594 : LPM(LPM) {} 12595 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12596 getFoundData() const { 12597 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12598 } 12599 }; 12600 } // namespace 12601 12602 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12603 LValue IVLVal, 12604 StringRef UniqueDeclName, 12605 LValue LVal, 12606 SourceLocation Loc) { 12607 // Last updated loop counter for the lastprivate conditional var. 12608 // int<xx> last_iv = 0; 12609 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12610 llvm::Constant *LastIV = 12611 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12612 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12613 IVLVal.getAlignment().getAsAlign()); 12614 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12615 12616 // Last value of the lastprivate conditional. 12617 // decltype(priv_a) last_a; 12618 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12619 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12620 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12621 LValue LastLVal = CGF.MakeAddrLValue( 12622 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12623 12624 // Global loop counter. Required to handle inner parallel-for regions. 12625 // iv 12626 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12627 12628 // #pragma omp critical(a) 12629 // if (last_iv <= iv) { 12630 // last_iv = iv; 12631 // last_a = priv_a; 12632 // } 12633 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12634 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12635 Action.Enter(CGF); 12636 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12637 // (last_iv <= iv) ? Check if the variable is updated and store new 12638 // value in global var. 12639 llvm::Value *CmpRes; 12640 if (IVLVal.getType()->isSignedIntegerType()) { 12641 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12642 } else { 12643 assert(IVLVal.getType()->isUnsignedIntegerType() && 12644 "Loop iteration variable must be integer."); 12645 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12646 } 12647 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12648 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12649 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12650 // { 12651 CGF.EmitBlock(ThenBB); 12652 12653 // last_iv = iv; 12654 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12655 12656 // last_a = priv_a; 12657 switch (CGF.getEvaluationKind(LVal.getType())) { 12658 case TEK_Scalar: { 12659 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12660 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12661 break; 12662 } 12663 case TEK_Complex: { 12664 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12665 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12666 break; 12667 } 12668 case TEK_Aggregate: 12669 llvm_unreachable( 12670 "Aggregates are not supported in lastprivate conditional."); 12671 } 12672 // } 12673 CGF.EmitBranch(ExitBB); 12674 // There is no need to emit line number for unconditional branch. 12675 (void)ApplyDebugLocation::CreateEmpty(CGF); 12676 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12677 }; 12678 12679 if (CGM.getLangOpts().OpenMPSimd) { 12680 // Do not emit as a critical region as no parallel region could be emitted. 12681 RegionCodeGenTy ThenRCG(CodeGen); 12682 ThenRCG(CGF); 12683 } else { 12684 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12685 } 12686 } 12687 12688 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12689 const Expr *LHS) { 12690 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12691 return; 12692 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12693 if (!Checker.Visit(LHS)) 12694 return; 12695 const Expr *FoundE; 12696 const Decl *FoundD; 12697 StringRef UniqueDeclName; 12698 LValue IVLVal; 12699 llvm::Function *FoundFn; 12700 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12701 Checker.getFoundData(); 12702 if (FoundFn != CGF.CurFn) { 12703 // Special codegen for inner parallel regions. 12704 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12705 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12706 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12707 "Lastprivate conditional is not found in outer region."); 12708 QualType StructTy = std::get<0>(It->getSecond()); 12709 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12710 LValue PrivLVal = CGF.EmitLValue(FoundE); 12711 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12712 PrivLVal.getAddress(CGF), 12713 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12714 CGF.ConvertTypeForMem(StructTy)); 12715 LValue BaseLVal = 12716 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12717 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12718 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12719 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12720 FiredLVal, llvm::AtomicOrdering::Unordered, 12721 /*IsVolatile=*/true, /*isInit=*/false); 12722 return; 12723 } 12724 12725 // Private address of the lastprivate conditional in the current context. 12726 // priv_a 12727 LValue LVal = CGF.EmitLValue(FoundE); 12728 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12729 FoundE->getExprLoc()); 12730 } 12731 12732 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12733 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12734 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12735 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12736 return; 12737 auto Range = llvm::reverse(LastprivateConditionalStack); 12738 auto It = llvm::find_if( 12739 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12740 if (It == Range.end() || It->Fn != CGF.CurFn) 12741 return; 12742 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12743 assert(LPCI != LastprivateConditionalToTypes.end() && 12744 "Lastprivates must be registered already."); 12745 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12746 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12747 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12748 for (const auto &Pair : It->DeclToUniqueName) { 12749 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12750 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12751 continue; 12752 auto I = LPCI->getSecond().find(Pair.first); 12753 assert(I != LPCI->getSecond().end() && 12754 "Lastprivate must be rehistered already."); 12755 // bool Cmp = priv_a.Fired != 0; 12756 LValue BaseLVal = std::get<3>(I->getSecond()); 12757 LValue FiredLVal = 12758 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12759 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12760 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12761 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12762 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12763 // if (Cmp) { 12764 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12765 CGF.EmitBlock(ThenBB); 12766 Address Addr = CGF.GetAddrOfLocalVar(VD); 12767 LValue LVal; 12768 if (VD->getType()->isReferenceType()) 12769 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12770 AlignmentSource::Decl); 12771 else 12772 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12773 AlignmentSource::Decl); 12774 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12775 D.getBeginLoc()); 12776 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12777 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12778 // } 12779 } 12780 } 12781 12782 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12783 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12784 SourceLocation Loc) { 12785 if (CGF.getLangOpts().OpenMP < 50) 12786 return; 12787 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12788 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12789 "Unknown lastprivate conditional variable."); 12790 StringRef UniqueName = It->second; 12791 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12792 // The variable was not updated in the region - exit. 12793 if (!GV) 12794 return; 12795 LValue LPLVal = CGF.MakeAddrLValue( 12796 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12797 PrivLVal.getType().getNonReferenceType()); 12798 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12799 CGF.EmitStoreOfScalar(Res, PrivLVal); 12800 } 12801 12802 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12803 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12804 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12805 llvm_unreachable("Not supported in SIMD-only mode"); 12806 } 12807 12808 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12809 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12810 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12811 llvm_unreachable("Not supported in SIMD-only mode"); 12812 } 12813 12814 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12815 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12816 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12817 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12818 bool Tied, unsigned &NumberOfParts) { 12819 llvm_unreachable("Not supported in SIMD-only mode"); 12820 } 12821 12822 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12823 SourceLocation Loc, 12824 llvm::Function *OutlinedFn, 12825 ArrayRef<llvm::Value *> CapturedVars, 12826 const Expr *IfCond, 12827 llvm::Value *NumThreads) { 12828 llvm_unreachable("Not supported in SIMD-only mode"); 12829 } 12830 12831 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12832 CodeGenFunction &CGF, StringRef CriticalName, 12833 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12834 const Expr *Hint) { 12835 llvm_unreachable("Not supported in SIMD-only mode"); 12836 } 12837 12838 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12839 const RegionCodeGenTy &MasterOpGen, 12840 SourceLocation Loc) { 12841 llvm_unreachable("Not supported in SIMD-only mode"); 12842 } 12843 12844 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12845 const RegionCodeGenTy &MasterOpGen, 12846 SourceLocation Loc, 12847 const Expr *Filter) { 12848 llvm_unreachable("Not supported in SIMD-only mode"); 12849 } 12850 12851 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12852 SourceLocation Loc) { 12853 llvm_unreachable("Not supported in SIMD-only mode"); 12854 } 12855 12856 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12857 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12858 SourceLocation Loc) { 12859 llvm_unreachable("Not supported in SIMD-only mode"); 12860 } 12861 12862 void CGOpenMPSIMDRuntime::emitSingleRegion( 12863 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12864 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12865 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12866 ArrayRef<const Expr *> AssignmentOps) { 12867 llvm_unreachable("Not supported in SIMD-only mode"); 12868 } 12869 12870 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12871 const RegionCodeGenTy &OrderedOpGen, 12872 SourceLocation Loc, 12873 bool IsThreads) { 12874 llvm_unreachable("Not supported in SIMD-only mode"); 12875 } 12876 12877 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12878 SourceLocation Loc, 12879 OpenMPDirectiveKind Kind, 12880 bool EmitChecks, 12881 bool ForceSimpleCall) { 12882 llvm_unreachable("Not supported in SIMD-only mode"); 12883 } 12884 12885 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12886 CodeGenFunction &CGF, SourceLocation Loc, 12887 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12888 bool Ordered, const DispatchRTInput &DispatchValues) { 12889 llvm_unreachable("Not supported in SIMD-only mode"); 12890 } 12891 12892 void CGOpenMPSIMDRuntime::emitForStaticInit( 12893 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12894 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12895 llvm_unreachable("Not supported in SIMD-only mode"); 12896 } 12897 12898 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12899 CodeGenFunction &CGF, SourceLocation Loc, 12900 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12901 llvm_unreachable("Not supported in SIMD-only mode"); 12902 } 12903 12904 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12905 SourceLocation Loc, 12906 unsigned IVSize, 12907 bool IVSigned) { 12908 llvm_unreachable("Not supported in SIMD-only mode"); 12909 } 12910 12911 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12912 SourceLocation Loc, 12913 OpenMPDirectiveKind DKind) { 12914 llvm_unreachable("Not supported in SIMD-only mode"); 12915 } 12916 12917 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12918 SourceLocation Loc, 12919 unsigned IVSize, bool IVSigned, 12920 Address IL, Address LB, 12921 Address UB, Address ST) { 12922 llvm_unreachable("Not supported in SIMD-only mode"); 12923 } 12924 12925 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12926 llvm::Value *NumThreads, 12927 SourceLocation Loc) { 12928 llvm_unreachable("Not supported in SIMD-only mode"); 12929 } 12930 12931 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12932 ProcBindKind ProcBind, 12933 SourceLocation Loc) { 12934 llvm_unreachable("Not supported in SIMD-only mode"); 12935 } 12936 12937 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12938 const VarDecl *VD, 12939 Address VDAddr, 12940 SourceLocation Loc) { 12941 llvm_unreachable("Not supported in SIMD-only mode"); 12942 } 12943 12944 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12945 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12946 CodeGenFunction *CGF) { 12947 llvm_unreachable("Not supported in SIMD-only mode"); 12948 } 12949 12950 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12951 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12952 llvm_unreachable("Not supported in SIMD-only mode"); 12953 } 12954 12955 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12956 ArrayRef<const Expr *> Vars, 12957 SourceLocation Loc, 12958 llvm::AtomicOrdering AO) { 12959 llvm_unreachable("Not supported in SIMD-only mode"); 12960 } 12961 12962 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12963 const OMPExecutableDirective &D, 12964 llvm::Function *TaskFunction, 12965 QualType SharedsTy, Address Shareds, 12966 const Expr *IfCond, 12967 const OMPTaskDataTy &Data) { 12968 llvm_unreachable("Not supported in SIMD-only mode"); 12969 } 12970 12971 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12972 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12973 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12974 const Expr *IfCond, const OMPTaskDataTy &Data) { 12975 llvm_unreachable("Not supported in SIMD-only mode"); 12976 } 12977 12978 void CGOpenMPSIMDRuntime::emitReduction( 12979 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12980 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12981 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12982 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12983 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12984 ReductionOps, Options); 12985 } 12986 12987 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12988 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12989 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12990 llvm_unreachable("Not supported in SIMD-only mode"); 12991 } 12992 12993 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12994 SourceLocation Loc, 12995 bool IsWorksharingReduction) { 12996 llvm_unreachable("Not supported in SIMD-only mode"); 12997 } 12998 12999 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13000 SourceLocation Loc, 13001 ReductionCodeGen &RCG, 13002 unsigned N) { 13003 llvm_unreachable("Not supported in SIMD-only mode"); 13004 } 13005 13006 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13007 SourceLocation Loc, 13008 llvm::Value *ReductionsPtr, 13009 LValue SharedLVal) { 13010 llvm_unreachable("Not supported in SIMD-only mode"); 13011 } 13012 13013 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13014 SourceLocation Loc, 13015 const OMPTaskDataTy &Data) { 13016 llvm_unreachable("Not supported in SIMD-only mode"); 13017 } 13018 13019 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13020 CodeGenFunction &CGF, SourceLocation Loc, 13021 OpenMPDirectiveKind CancelRegion) { 13022 llvm_unreachable("Not supported in SIMD-only mode"); 13023 } 13024 13025 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13026 SourceLocation Loc, const Expr *IfCond, 13027 OpenMPDirectiveKind CancelRegion) { 13028 llvm_unreachable("Not supported in SIMD-only mode"); 13029 } 13030 13031 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13032 const OMPExecutableDirective &D, StringRef ParentName, 13033 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13034 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13035 llvm_unreachable("Not supported in SIMD-only mode"); 13036 } 13037 13038 void CGOpenMPSIMDRuntime::emitTargetCall( 13039 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13040 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13041 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13042 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13043 const OMPLoopDirective &D)> 13044 SizeEmitter) { 13045 llvm_unreachable("Not supported in SIMD-only mode"); 13046 } 13047 13048 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13049 llvm_unreachable("Not supported in SIMD-only mode"); 13050 } 13051 13052 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13053 llvm_unreachable("Not supported in SIMD-only mode"); 13054 } 13055 13056 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13057 return false; 13058 } 13059 13060 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13061 const OMPExecutableDirective &D, 13062 SourceLocation Loc, 13063 llvm::Function *OutlinedFn, 13064 ArrayRef<llvm::Value *> CapturedVars) { 13065 llvm_unreachable("Not supported in SIMD-only mode"); 13066 } 13067 13068 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13069 const Expr *NumTeams, 13070 const Expr *ThreadLimit, 13071 SourceLocation Loc) { 13072 llvm_unreachable("Not supported in SIMD-only mode"); 13073 } 13074 13075 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13076 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13077 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13078 llvm_unreachable("Not supported in SIMD-only mode"); 13079 } 13080 13081 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13082 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13083 const Expr *Device) { 13084 llvm_unreachable("Not supported in SIMD-only mode"); 13085 } 13086 13087 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13088 const OMPLoopDirective &D, 13089 ArrayRef<Expr *> NumIterations) { 13090 llvm_unreachable("Not supported in SIMD-only mode"); 13091 } 13092 13093 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13094 const OMPDependClause *C) { 13095 llvm_unreachable("Not supported in SIMD-only mode"); 13096 } 13097 13098 const VarDecl * 13099 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13100 const VarDecl *NativeParam) const { 13101 llvm_unreachable("Not supported in SIMD-only mode"); 13102 } 13103 13104 Address 13105 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13106 const VarDecl *NativeParam, 13107 const VarDecl *TargetParam) const { 13108 llvm_unreachable("Not supported in SIMD-only mode"); 13109 } 13110