1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 } 1530 1531 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1532 return OMPBuilder.IdentPtr; 1533 } 1534 1535 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1536 if (!Kmpc_MicroTy) { 1537 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1538 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1539 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1540 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1541 } 1542 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1543 } 1544 1545 llvm::FunctionCallee 1546 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1547 assert((IVSize == 32 || IVSize == 64) && 1548 "IV size is not compatible with the omp runtime"); 1549 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1550 : "__kmpc_for_static_init_4u") 1551 : (IVSigned ? "__kmpc_for_static_init_8" 1552 : "__kmpc_for_static_init_8u"); 1553 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1554 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1555 llvm::Type *TypeParams[] = { 1556 getIdentTyPointerTy(), // loc 1557 CGM.Int32Ty, // tid 1558 CGM.Int32Ty, // schedtype 1559 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1560 PtrTy, // p_lower 1561 PtrTy, // p_upper 1562 PtrTy, // p_stride 1563 ITy, // incr 1564 ITy // chunk 1565 }; 1566 auto *FnTy = 1567 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1568 return CGM.CreateRuntimeFunction(FnTy, Name); 1569 } 1570 1571 llvm::FunctionCallee 1572 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1573 assert((IVSize == 32 || IVSize == 64) && 1574 "IV size is not compatible with the omp runtime"); 1575 StringRef Name = 1576 IVSize == 32 1577 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1578 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1579 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1580 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1581 CGM.Int32Ty, // tid 1582 CGM.Int32Ty, // schedtype 1583 ITy, // lower 1584 ITy, // upper 1585 ITy, // stride 1586 ITy // chunk 1587 }; 1588 auto *FnTy = 1589 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1590 return CGM.CreateRuntimeFunction(FnTy, Name); 1591 } 1592 1593 llvm::FunctionCallee 1594 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1595 assert((IVSize == 32 || IVSize == 64) && 1596 "IV size is not compatible with the omp runtime"); 1597 StringRef Name = 1598 IVSize == 32 1599 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1600 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1601 llvm::Type *TypeParams[] = { 1602 getIdentTyPointerTy(), // loc 1603 CGM.Int32Ty, // tid 1604 }; 1605 auto *FnTy = 1606 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1607 return CGM.CreateRuntimeFunction(FnTy, Name); 1608 } 1609 1610 llvm::FunctionCallee 1611 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1612 assert((IVSize == 32 || IVSize == 64) && 1613 "IV size is not compatible with the omp runtime"); 1614 StringRef Name = 1615 IVSize == 32 1616 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1617 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1618 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1619 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1620 llvm::Type *TypeParams[] = { 1621 getIdentTyPointerTy(), // loc 1622 CGM.Int32Ty, // tid 1623 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1624 PtrTy, // p_lower 1625 PtrTy, // p_upper 1626 PtrTy // p_stride 1627 }; 1628 auto *FnTy = 1629 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1630 return CGM.CreateRuntimeFunction(FnTy, Name); 1631 } 1632 1633 /// Obtain information that uniquely identifies a target entry. This 1634 /// consists of the file and device IDs as well as line number associated with 1635 /// the relevant entry source location. 1636 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1637 unsigned &DeviceID, unsigned &FileID, 1638 unsigned &LineNum) { 1639 SourceManager &SM = C.getSourceManager(); 1640 1641 // The loc should be always valid and have a file ID (the user cannot use 1642 // #pragma directives in macros) 1643 1644 assert(Loc.isValid() && "Source location is expected to be always valid."); 1645 1646 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1647 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1648 1649 llvm::sys::fs::UniqueID ID; 1650 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1651 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1652 << PLoc.getFilename() << EC.message(); 1653 1654 DeviceID = ID.getDevice(); 1655 FileID = ID.getFile(); 1656 LineNum = PLoc.getLine(); 1657 } 1658 1659 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1660 if (CGM.getLangOpts().OpenMPSimd) 1661 return Address::invalid(); 1662 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1663 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1664 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1665 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1666 HasRequiresUnifiedSharedMemory))) { 1667 SmallString<64> PtrName; 1668 { 1669 llvm::raw_svector_ostream OS(PtrName); 1670 OS << CGM.getMangledName(GlobalDecl(VD)); 1671 if (!VD->isExternallyVisible()) { 1672 unsigned DeviceID, FileID, Line; 1673 getTargetEntryUniqueInfo(CGM.getContext(), 1674 VD->getCanonicalDecl()->getBeginLoc(), 1675 DeviceID, FileID, Line); 1676 OS << llvm::format("_%x", FileID); 1677 } 1678 OS << "_decl_tgt_ref_ptr"; 1679 } 1680 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1681 if (!Ptr) { 1682 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1683 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1684 PtrName); 1685 1686 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1687 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1688 1689 if (!CGM.getLangOpts().OpenMPIsDevice) 1690 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1691 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1692 } 1693 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1694 } 1695 return Address::invalid(); 1696 } 1697 1698 llvm::Constant * 1699 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1700 assert(!CGM.getLangOpts().OpenMPUseTLS || 1701 !CGM.getContext().getTargetInfo().isTLSSupported()); 1702 // Lookup the entry, lazily creating it if necessary. 1703 std::string Suffix = getName({"cache", ""}); 1704 return getOrCreateInternalVariable( 1705 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1706 } 1707 1708 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1709 const VarDecl *VD, 1710 Address VDAddr, 1711 SourceLocation Loc) { 1712 if (CGM.getLangOpts().OpenMPUseTLS && 1713 CGM.getContext().getTargetInfo().isTLSSupported()) 1714 return VDAddr; 1715 1716 llvm::Type *VarTy = VDAddr.getElementType(); 1717 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1718 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1719 CGM.Int8PtrTy), 1720 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1721 getOrCreateThreadPrivateCache(VD)}; 1722 return Address(CGF.EmitRuntimeCall( 1723 OMPBuilder.getOrCreateRuntimeFunction( 1724 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1725 Args), 1726 VDAddr.getAlignment()); 1727 } 1728 1729 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1730 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1731 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1732 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1733 // library. 1734 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1736 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1737 OMPLoc); 1738 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1739 // to register constructor/destructor for variable. 1740 llvm::Value *Args[] = { 1741 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1742 Ctor, CopyCtor, Dtor}; 1743 CGF.EmitRuntimeCall( 1744 OMPBuilder.getOrCreateRuntimeFunction( 1745 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1746 Args); 1747 } 1748 1749 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1750 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1751 bool PerformInit, CodeGenFunction *CGF) { 1752 if (CGM.getLangOpts().OpenMPUseTLS && 1753 CGM.getContext().getTargetInfo().isTLSSupported()) 1754 return nullptr; 1755 1756 VD = VD->getDefinition(CGM.getContext()); 1757 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1758 QualType ASTTy = VD->getType(); 1759 1760 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1761 const Expr *Init = VD->getAnyInitializer(); 1762 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1763 // Generate function that re-emits the declaration's initializer into the 1764 // threadprivate copy of the variable VD 1765 CodeGenFunction CtorCGF(CGM); 1766 FunctionArgList Args; 1767 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1768 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1769 ImplicitParamDecl::Other); 1770 Args.push_back(&Dst); 1771 1772 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1773 CGM.getContext().VoidPtrTy, Args); 1774 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1775 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1776 llvm::Function *Fn = 1777 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1778 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1779 Args, Loc, Loc); 1780 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1781 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1782 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1783 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1784 Arg = CtorCGF.Builder.CreateElementBitCast( 1785 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1786 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1787 /*IsInitializer=*/true); 1788 ArgVal = CtorCGF.EmitLoadOfScalar( 1789 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1790 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1791 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1792 CtorCGF.FinishFunction(); 1793 Ctor = Fn; 1794 } 1795 if (VD->getType().isDestructedType() != QualType::DK_none) { 1796 // Generate function that emits destructor call for the threadprivate copy 1797 // of the variable VD 1798 CodeGenFunction DtorCGF(CGM); 1799 FunctionArgList Args; 1800 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1801 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1802 ImplicitParamDecl::Other); 1803 Args.push_back(&Dst); 1804 1805 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1806 CGM.getContext().VoidTy, Args); 1807 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1808 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1809 llvm::Function *Fn = 1810 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1811 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1812 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1813 Loc, Loc); 1814 // Create a scope with an artificial location for the body of this function. 1815 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1816 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1817 DtorCGF.GetAddrOfLocalVar(&Dst), 1818 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1819 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1820 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1821 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1822 DtorCGF.FinishFunction(); 1823 Dtor = Fn; 1824 } 1825 // Do not emit init function if it is not required. 1826 if (!Ctor && !Dtor) 1827 return nullptr; 1828 1829 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1830 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1831 /*isVarArg=*/false) 1832 ->getPointerTo(); 1833 // Copying constructor for the threadprivate variable. 1834 // Must be NULL - reserved by runtime, but currently it requires that this 1835 // parameter is always NULL. Otherwise it fires assertion. 1836 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1837 if (Ctor == nullptr) { 1838 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1839 /*isVarArg=*/false) 1840 ->getPointerTo(); 1841 Ctor = llvm::Constant::getNullValue(CtorTy); 1842 } 1843 if (Dtor == nullptr) { 1844 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1845 /*isVarArg=*/false) 1846 ->getPointerTo(); 1847 Dtor = llvm::Constant::getNullValue(DtorTy); 1848 } 1849 if (!CGF) { 1850 auto *InitFunctionTy = 1851 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1852 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1853 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1854 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1855 CodeGenFunction InitCGF(CGM); 1856 FunctionArgList ArgList; 1857 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1858 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1859 Loc, Loc); 1860 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1861 InitCGF.FinishFunction(); 1862 return InitFunction; 1863 } 1864 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1865 } 1866 return nullptr; 1867 } 1868 1869 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1870 llvm::GlobalVariable *Addr, 1871 bool PerformInit) { 1872 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1873 !CGM.getLangOpts().OpenMPIsDevice) 1874 return false; 1875 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1876 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1877 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1878 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1879 HasRequiresUnifiedSharedMemory)) 1880 return CGM.getLangOpts().OpenMPIsDevice; 1881 VD = VD->getDefinition(CGM.getContext()); 1882 assert(VD && "Unknown VarDecl"); 1883 1884 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1885 return CGM.getLangOpts().OpenMPIsDevice; 1886 1887 QualType ASTTy = VD->getType(); 1888 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1889 1890 // Produce the unique prefix to identify the new target regions. We use 1891 // the source location of the variable declaration which we know to not 1892 // conflict with any target region. 1893 unsigned DeviceID; 1894 unsigned FileID; 1895 unsigned Line; 1896 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1897 SmallString<128> Buffer, Out; 1898 { 1899 llvm::raw_svector_ostream OS(Buffer); 1900 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1901 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1902 } 1903 1904 const Expr *Init = VD->getAnyInitializer(); 1905 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1906 llvm::Constant *Ctor; 1907 llvm::Constant *ID; 1908 if (CGM.getLangOpts().OpenMPIsDevice) { 1909 // Generate function that re-emits the declaration's initializer into 1910 // the threadprivate copy of the variable VD 1911 CodeGenFunction CtorCGF(CGM); 1912 1913 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1914 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1915 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1916 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1917 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1918 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1919 FunctionArgList(), Loc, Loc); 1920 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1921 CtorCGF.EmitAnyExprToMem(Init, 1922 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1923 Init->getType().getQualifiers(), 1924 /*IsInitializer=*/true); 1925 CtorCGF.FinishFunction(); 1926 Ctor = Fn; 1927 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1928 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1929 } else { 1930 Ctor = new llvm::GlobalVariable( 1931 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1932 llvm::GlobalValue::PrivateLinkage, 1933 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1934 ID = Ctor; 1935 } 1936 1937 // Register the information for the entry associated with the constructor. 1938 Out.clear(); 1939 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1940 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1941 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1942 } 1943 if (VD->getType().isDestructedType() != QualType::DK_none) { 1944 llvm::Constant *Dtor; 1945 llvm::Constant *ID; 1946 if (CGM.getLangOpts().OpenMPIsDevice) { 1947 // Generate function that emits destructor call for the threadprivate 1948 // copy of the variable VD 1949 CodeGenFunction DtorCGF(CGM); 1950 1951 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1952 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1953 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1954 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1955 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1956 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1957 FunctionArgList(), Loc, Loc); 1958 // Create a scope with an artificial location for the body of this 1959 // function. 1960 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1961 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1962 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1963 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1964 DtorCGF.FinishFunction(); 1965 Dtor = Fn; 1966 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1967 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1968 } else { 1969 Dtor = new llvm::GlobalVariable( 1970 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1971 llvm::GlobalValue::PrivateLinkage, 1972 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1973 ID = Dtor; 1974 } 1975 // Register the information for the entry associated with the destructor. 1976 Out.clear(); 1977 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1978 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1979 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1980 } 1981 return CGM.getLangOpts().OpenMPIsDevice; 1982 } 1983 1984 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1985 QualType VarType, 1986 StringRef Name) { 1987 std::string Suffix = getName({"artificial", ""}); 1988 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1989 llvm::Value *GAddr = 1990 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1991 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1992 CGM.getTarget().isTLSSupported()) { 1993 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1994 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1995 } 1996 std::string CacheSuffix = getName({"cache", ""}); 1997 llvm::Value *Args[] = { 1998 emitUpdateLocation(CGF, SourceLocation()), 1999 getThreadID(CGF, SourceLocation()), 2000 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2001 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2002 /*isSigned=*/false), 2003 getOrCreateInternalVariable( 2004 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2005 return Address( 2006 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2007 CGF.EmitRuntimeCall( 2008 OMPBuilder.getOrCreateRuntimeFunction( 2009 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2010 Args), 2011 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2012 CGM.getContext().getTypeAlignInChars(VarType)); 2013 } 2014 2015 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2016 const RegionCodeGenTy &ThenGen, 2017 const RegionCodeGenTy &ElseGen) { 2018 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2019 2020 // If the condition constant folds and can be elided, try to avoid emitting 2021 // the condition and the dead arm of the if/else. 2022 bool CondConstant; 2023 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2024 if (CondConstant) 2025 ThenGen(CGF); 2026 else 2027 ElseGen(CGF); 2028 return; 2029 } 2030 2031 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2032 // emit the conditional branch. 2033 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2034 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2035 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2036 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2037 2038 // Emit the 'then' code. 2039 CGF.EmitBlock(ThenBlock); 2040 ThenGen(CGF); 2041 CGF.EmitBranch(ContBlock); 2042 // Emit the 'else' code if present. 2043 // There is no need to emit line number for unconditional branch. 2044 (void)ApplyDebugLocation::CreateEmpty(CGF); 2045 CGF.EmitBlock(ElseBlock); 2046 ElseGen(CGF); 2047 // There is no need to emit line number for unconditional branch. 2048 (void)ApplyDebugLocation::CreateEmpty(CGF); 2049 CGF.EmitBranch(ContBlock); 2050 // Emit the continuation block for code after the if. 2051 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2052 } 2053 2054 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2055 llvm::Function *OutlinedFn, 2056 ArrayRef<llvm::Value *> CapturedVars, 2057 const Expr *IfCond) { 2058 if (!CGF.HaveInsertPoint()) 2059 return; 2060 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2061 auto &M = CGM.getModule(); 2062 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2063 this](CodeGenFunction &CGF, PrePostActionTy &) { 2064 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2065 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2066 llvm::Value *Args[] = { 2067 RTLoc, 2068 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2069 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2070 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2071 RealArgs.append(std::begin(Args), std::end(Args)); 2072 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2073 2074 llvm::FunctionCallee RTLFn = 2075 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2076 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2077 }; 2078 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2079 this](CodeGenFunction &CGF, PrePostActionTy &) { 2080 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2081 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2082 // Build calls: 2083 // __kmpc_serialized_parallel(&Loc, GTid); 2084 llvm::Value *Args[] = {RTLoc, ThreadID}; 2085 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2086 M, OMPRTL___kmpc_serialized_parallel), 2087 Args); 2088 2089 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2090 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2091 Address ZeroAddrBound = 2092 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2093 /*Name=*/".bound.zero.addr"); 2094 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2095 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2096 // ThreadId for serialized parallels is 0. 2097 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2098 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2099 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2100 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2101 2102 // __kmpc_end_serialized_parallel(&Loc, GTid); 2103 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2104 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2105 M, OMPRTL___kmpc_end_serialized_parallel), 2106 EndArgs); 2107 }; 2108 if (IfCond) { 2109 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2110 } else { 2111 RegionCodeGenTy ThenRCG(ThenGen); 2112 ThenRCG(CGF); 2113 } 2114 } 2115 2116 // If we're inside an (outlined) parallel region, use the region info's 2117 // thread-ID variable (it is passed in a first argument of the outlined function 2118 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2119 // regular serial code region, get thread ID by calling kmp_int32 2120 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2121 // return the address of that temp. 2122 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2123 SourceLocation Loc) { 2124 if (auto *OMPRegionInfo = 2125 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2126 if (OMPRegionInfo->getThreadIDVariable()) 2127 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2128 2129 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2130 QualType Int32Ty = 2131 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2132 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2133 CGF.EmitStoreOfScalar(ThreadID, 2134 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2135 2136 return ThreadIDTemp; 2137 } 2138 2139 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2140 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2141 SmallString<256> Buffer; 2142 llvm::raw_svector_ostream Out(Buffer); 2143 Out << Name; 2144 StringRef RuntimeName = Out.str(); 2145 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2146 if (Elem.second) { 2147 assert(Elem.second->getType()->getPointerElementType() == Ty && 2148 "OMP internal variable has different type than requested"); 2149 return &*Elem.second; 2150 } 2151 2152 return Elem.second = new llvm::GlobalVariable( 2153 CGM.getModule(), Ty, /*IsConstant*/ false, 2154 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2155 Elem.first(), /*InsertBefore=*/nullptr, 2156 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2157 } 2158 2159 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2160 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2161 std::string Name = getName({Prefix, "var"}); 2162 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2163 } 2164 2165 namespace { 2166 /// Common pre(post)-action for different OpenMP constructs. 2167 class CommonActionTy final : public PrePostActionTy { 2168 llvm::FunctionCallee EnterCallee; 2169 ArrayRef<llvm::Value *> EnterArgs; 2170 llvm::FunctionCallee ExitCallee; 2171 ArrayRef<llvm::Value *> ExitArgs; 2172 bool Conditional; 2173 llvm::BasicBlock *ContBlock = nullptr; 2174 2175 public: 2176 CommonActionTy(llvm::FunctionCallee EnterCallee, 2177 ArrayRef<llvm::Value *> EnterArgs, 2178 llvm::FunctionCallee ExitCallee, 2179 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2180 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2181 ExitArgs(ExitArgs), Conditional(Conditional) {} 2182 void Enter(CodeGenFunction &CGF) override { 2183 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2184 if (Conditional) { 2185 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2186 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2187 ContBlock = CGF.createBasicBlock("omp_if.end"); 2188 // Generate the branch (If-stmt) 2189 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2190 CGF.EmitBlock(ThenBlock); 2191 } 2192 } 2193 void Done(CodeGenFunction &CGF) { 2194 // Emit the rest of blocks/branches 2195 CGF.EmitBranch(ContBlock); 2196 CGF.EmitBlock(ContBlock, true); 2197 } 2198 void Exit(CodeGenFunction &CGF) override { 2199 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2200 } 2201 }; 2202 } // anonymous namespace 2203 2204 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2205 StringRef CriticalName, 2206 const RegionCodeGenTy &CriticalOpGen, 2207 SourceLocation Loc, const Expr *Hint) { 2208 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2209 // CriticalOpGen(); 2210 // __kmpc_end_critical(ident_t *, gtid, Lock); 2211 // Prepare arguments and build a call to __kmpc_critical 2212 if (!CGF.HaveInsertPoint()) 2213 return; 2214 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2215 getCriticalRegionLock(CriticalName)}; 2216 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2217 std::end(Args)); 2218 if (Hint) { 2219 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2220 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2221 } 2222 CommonActionTy Action( 2223 OMPBuilder.getOrCreateRuntimeFunction( 2224 CGM.getModule(), 2225 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2226 EnterArgs, 2227 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2228 OMPRTL___kmpc_end_critical), 2229 Args); 2230 CriticalOpGen.setAction(Action); 2231 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2232 } 2233 2234 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2235 const RegionCodeGenTy &MasterOpGen, 2236 SourceLocation Loc) { 2237 if (!CGF.HaveInsertPoint()) 2238 return; 2239 // if(__kmpc_master(ident_t *, gtid)) { 2240 // MasterOpGen(); 2241 // __kmpc_end_master(ident_t *, gtid); 2242 // } 2243 // Prepare arguments and build a call to __kmpc_master 2244 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2245 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2246 CGM.getModule(), OMPRTL___kmpc_master), 2247 Args, 2248 OMPBuilder.getOrCreateRuntimeFunction( 2249 CGM.getModule(), OMPRTL___kmpc_end_master), 2250 Args, 2251 /*Conditional=*/true); 2252 MasterOpGen.setAction(Action); 2253 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2254 Action.Done(CGF); 2255 } 2256 2257 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2258 SourceLocation Loc) { 2259 if (!CGF.HaveInsertPoint()) 2260 return; 2261 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2262 OMPBuilder.CreateTaskyield(CGF.Builder); 2263 } else { 2264 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2265 llvm::Value *Args[] = { 2266 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2267 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2268 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2269 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2270 Args); 2271 } 2272 2273 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2274 Region->emitUntiedSwitch(CGF); 2275 } 2276 2277 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2278 const RegionCodeGenTy &TaskgroupOpGen, 2279 SourceLocation Loc) { 2280 if (!CGF.HaveInsertPoint()) 2281 return; 2282 // __kmpc_taskgroup(ident_t *, gtid); 2283 // TaskgroupOpGen(); 2284 // __kmpc_end_taskgroup(ident_t *, gtid); 2285 // Prepare arguments and build a call to __kmpc_taskgroup 2286 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2287 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2289 Args, 2290 OMPBuilder.getOrCreateRuntimeFunction( 2291 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2292 Args); 2293 TaskgroupOpGen.setAction(Action); 2294 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2295 } 2296 2297 /// Given an array of pointers to variables, project the address of a 2298 /// given variable. 2299 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2300 unsigned Index, const VarDecl *Var) { 2301 // Pull out the pointer to the variable. 2302 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2303 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2304 2305 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2306 Addr = CGF.Builder.CreateElementBitCast( 2307 Addr, CGF.ConvertTypeForMem(Var->getType())); 2308 return Addr; 2309 } 2310 2311 static llvm::Value *emitCopyprivateCopyFunction( 2312 CodeGenModule &CGM, llvm::Type *ArgsType, 2313 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2314 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2315 SourceLocation Loc) { 2316 ASTContext &C = CGM.getContext(); 2317 // void copy_func(void *LHSArg, void *RHSArg); 2318 FunctionArgList Args; 2319 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2320 ImplicitParamDecl::Other); 2321 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2322 ImplicitParamDecl::Other); 2323 Args.push_back(&LHSArg); 2324 Args.push_back(&RHSArg); 2325 const auto &CGFI = 2326 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2327 std::string Name = 2328 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2329 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2330 llvm::GlobalValue::InternalLinkage, Name, 2331 &CGM.getModule()); 2332 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2333 Fn->setDoesNotRecurse(); 2334 CodeGenFunction CGF(CGM); 2335 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2336 // Dest = (void*[n])(LHSArg); 2337 // Src = (void*[n])(RHSArg); 2338 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2339 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2340 ArgsType), CGF.getPointerAlign()); 2341 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2342 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2343 ArgsType), CGF.getPointerAlign()); 2344 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2345 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2346 // ... 2347 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2348 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2349 const auto *DestVar = 2350 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2351 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2352 2353 const auto *SrcVar = 2354 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2355 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2356 2357 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2358 QualType Type = VD->getType(); 2359 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2360 } 2361 CGF.FinishFunction(); 2362 return Fn; 2363 } 2364 2365 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2366 const RegionCodeGenTy &SingleOpGen, 2367 SourceLocation Loc, 2368 ArrayRef<const Expr *> CopyprivateVars, 2369 ArrayRef<const Expr *> SrcExprs, 2370 ArrayRef<const Expr *> DstExprs, 2371 ArrayRef<const Expr *> AssignmentOps) { 2372 if (!CGF.HaveInsertPoint()) 2373 return; 2374 assert(CopyprivateVars.size() == SrcExprs.size() && 2375 CopyprivateVars.size() == DstExprs.size() && 2376 CopyprivateVars.size() == AssignmentOps.size()); 2377 ASTContext &C = CGM.getContext(); 2378 // int32 did_it = 0; 2379 // if(__kmpc_single(ident_t *, gtid)) { 2380 // SingleOpGen(); 2381 // __kmpc_end_single(ident_t *, gtid); 2382 // did_it = 1; 2383 // } 2384 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2385 // <copy_func>, did_it); 2386 2387 Address DidIt = Address::invalid(); 2388 if (!CopyprivateVars.empty()) { 2389 // int32 did_it = 0; 2390 QualType KmpInt32Ty = 2391 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2392 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2393 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2394 } 2395 // Prepare arguments and build a call to __kmpc_single 2396 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2397 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2398 CGM.getModule(), OMPRTL___kmpc_single), 2399 Args, 2400 OMPBuilder.getOrCreateRuntimeFunction( 2401 CGM.getModule(), OMPRTL___kmpc_end_single), 2402 Args, 2403 /*Conditional=*/true); 2404 SingleOpGen.setAction(Action); 2405 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2406 if (DidIt.isValid()) { 2407 // did_it = 1; 2408 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2409 } 2410 Action.Done(CGF); 2411 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2412 // <copy_func>, did_it); 2413 if (DidIt.isValid()) { 2414 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2415 QualType CopyprivateArrayTy = C.getConstantArrayType( 2416 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2417 /*IndexTypeQuals=*/0); 2418 // Create a list of all private variables for copyprivate. 2419 Address CopyprivateList = 2420 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2421 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2422 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2423 CGF.Builder.CreateStore( 2424 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2425 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2426 CGF.VoidPtrTy), 2427 Elem); 2428 } 2429 // Build function that copies private values from single region to all other 2430 // threads in the corresponding parallel region. 2431 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2432 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2433 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2434 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2435 Address CL = 2436 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2437 CGF.VoidPtrTy); 2438 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2439 llvm::Value *Args[] = { 2440 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2441 getThreadID(CGF, Loc), // i32 <gtid> 2442 BufSize, // size_t <buf_size> 2443 CL.getPointer(), // void *<copyprivate list> 2444 CpyFn, // void (*) (void *, void *) <copy_func> 2445 DidItVal // i32 did_it 2446 }; 2447 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2448 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2449 Args); 2450 } 2451 } 2452 2453 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2454 const RegionCodeGenTy &OrderedOpGen, 2455 SourceLocation Loc, bool IsThreads) { 2456 if (!CGF.HaveInsertPoint()) 2457 return; 2458 // __kmpc_ordered(ident_t *, gtid); 2459 // OrderedOpGen(); 2460 // __kmpc_end_ordered(ident_t *, gtid); 2461 // Prepare arguments and build a call to __kmpc_ordered 2462 if (IsThreads) { 2463 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2464 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2465 CGM.getModule(), OMPRTL___kmpc_ordered), 2466 Args, 2467 OMPBuilder.getOrCreateRuntimeFunction( 2468 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2469 Args); 2470 OrderedOpGen.setAction(Action); 2471 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2472 return; 2473 } 2474 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2475 } 2476 2477 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2478 unsigned Flags; 2479 if (Kind == OMPD_for) 2480 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2481 else if (Kind == OMPD_sections) 2482 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2483 else if (Kind == OMPD_single) 2484 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2485 else if (Kind == OMPD_barrier) 2486 Flags = OMP_IDENT_BARRIER_EXPL; 2487 else 2488 Flags = OMP_IDENT_BARRIER_IMPL; 2489 return Flags; 2490 } 2491 2492 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2493 CodeGenFunction &CGF, const OMPLoopDirective &S, 2494 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2495 // Check if the loop directive is actually a doacross loop directive. In this 2496 // case choose static, 1 schedule. 2497 if (llvm::any_of( 2498 S.getClausesOfKind<OMPOrderedClause>(), 2499 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2500 ScheduleKind = OMPC_SCHEDULE_static; 2501 // Chunk size is 1 in this case. 2502 llvm::APInt ChunkSize(32, 1); 2503 ChunkExpr = IntegerLiteral::Create( 2504 CGF.getContext(), ChunkSize, 2505 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2506 SourceLocation()); 2507 } 2508 } 2509 2510 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2511 OpenMPDirectiveKind Kind, bool EmitChecks, 2512 bool ForceSimpleCall) { 2513 // Check if we should use the OMPBuilder 2514 auto *OMPRegionInfo = 2515 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2516 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2517 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( 2518 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2519 return; 2520 } 2521 2522 if (!CGF.HaveInsertPoint()) 2523 return; 2524 // Build call __kmpc_cancel_barrier(loc, thread_id); 2525 // Build call __kmpc_barrier(loc, thread_id); 2526 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2527 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2528 // thread_id); 2529 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2530 getThreadID(CGF, Loc)}; 2531 if (OMPRegionInfo) { 2532 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2533 llvm::Value *Result = CGF.EmitRuntimeCall( 2534 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2535 OMPRTL___kmpc_cancel_barrier), 2536 Args); 2537 if (EmitChecks) { 2538 // if (__kmpc_cancel_barrier()) { 2539 // exit from construct; 2540 // } 2541 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2542 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2543 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2544 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2545 CGF.EmitBlock(ExitBB); 2546 // exit from construct; 2547 CodeGenFunction::JumpDest CancelDestination = 2548 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2549 CGF.EmitBranchThroughCleanup(CancelDestination); 2550 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2551 } 2552 return; 2553 } 2554 } 2555 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2556 CGM.getModule(), OMPRTL___kmpc_barrier), 2557 Args); 2558 } 2559 2560 /// Map the OpenMP loop schedule to the runtime enumeration. 2561 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2562 bool Chunked, bool Ordered) { 2563 switch (ScheduleKind) { 2564 case OMPC_SCHEDULE_static: 2565 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2566 : (Ordered ? OMP_ord_static : OMP_sch_static); 2567 case OMPC_SCHEDULE_dynamic: 2568 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2569 case OMPC_SCHEDULE_guided: 2570 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2571 case OMPC_SCHEDULE_runtime: 2572 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2573 case OMPC_SCHEDULE_auto: 2574 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2575 case OMPC_SCHEDULE_unknown: 2576 assert(!Chunked && "chunk was specified but schedule kind not known"); 2577 return Ordered ? OMP_ord_static : OMP_sch_static; 2578 } 2579 llvm_unreachable("Unexpected runtime schedule"); 2580 } 2581 2582 /// Map the OpenMP distribute schedule to the runtime enumeration. 2583 static OpenMPSchedType 2584 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2585 // only static is allowed for dist_schedule 2586 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2587 } 2588 2589 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2590 bool Chunked) const { 2591 OpenMPSchedType Schedule = 2592 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2593 return Schedule == OMP_sch_static; 2594 } 2595 2596 bool CGOpenMPRuntime::isStaticNonchunked( 2597 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2598 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2599 return Schedule == OMP_dist_sch_static; 2600 } 2601 2602 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2603 bool Chunked) const { 2604 OpenMPSchedType Schedule = 2605 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2606 return Schedule == OMP_sch_static_chunked; 2607 } 2608 2609 bool CGOpenMPRuntime::isStaticChunked( 2610 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2611 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2612 return Schedule == OMP_dist_sch_static_chunked; 2613 } 2614 2615 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2616 OpenMPSchedType Schedule = 2617 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2618 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2619 return Schedule != OMP_sch_static; 2620 } 2621 2622 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2623 OpenMPScheduleClauseModifier M1, 2624 OpenMPScheduleClauseModifier M2) { 2625 int Modifier = 0; 2626 switch (M1) { 2627 case OMPC_SCHEDULE_MODIFIER_monotonic: 2628 Modifier = OMP_sch_modifier_monotonic; 2629 break; 2630 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2631 Modifier = OMP_sch_modifier_nonmonotonic; 2632 break; 2633 case OMPC_SCHEDULE_MODIFIER_simd: 2634 if (Schedule == OMP_sch_static_chunked) 2635 Schedule = OMP_sch_static_balanced_chunked; 2636 break; 2637 case OMPC_SCHEDULE_MODIFIER_last: 2638 case OMPC_SCHEDULE_MODIFIER_unknown: 2639 break; 2640 } 2641 switch (M2) { 2642 case OMPC_SCHEDULE_MODIFIER_monotonic: 2643 Modifier = OMP_sch_modifier_monotonic; 2644 break; 2645 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2646 Modifier = OMP_sch_modifier_nonmonotonic; 2647 break; 2648 case OMPC_SCHEDULE_MODIFIER_simd: 2649 if (Schedule == OMP_sch_static_chunked) 2650 Schedule = OMP_sch_static_balanced_chunked; 2651 break; 2652 case OMPC_SCHEDULE_MODIFIER_last: 2653 case OMPC_SCHEDULE_MODIFIER_unknown: 2654 break; 2655 } 2656 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2657 // If the static schedule kind is specified or if the ordered clause is 2658 // specified, and if the nonmonotonic modifier is not specified, the effect is 2659 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2660 // modifier is specified, the effect is as if the nonmonotonic modifier is 2661 // specified. 2662 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2663 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2664 Schedule == OMP_sch_static_balanced_chunked || 2665 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2666 Schedule == OMP_dist_sch_static_chunked || 2667 Schedule == OMP_dist_sch_static)) 2668 Modifier = OMP_sch_modifier_nonmonotonic; 2669 } 2670 return Schedule | Modifier; 2671 } 2672 2673 void CGOpenMPRuntime::emitForDispatchInit( 2674 CodeGenFunction &CGF, SourceLocation Loc, 2675 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2676 bool Ordered, const DispatchRTInput &DispatchValues) { 2677 if (!CGF.HaveInsertPoint()) 2678 return; 2679 OpenMPSchedType Schedule = getRuntimeSchedule( 2680 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2681 assert(Ordered || 2682 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2683 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2684 Schedule != OMP_sch_static_balanced_chunked)); 2685 // Call __kmpc_dispatch_init( 2686 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2687 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2688 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2689 2690 // If the Chunk was not specified in the clause - use default value 1. 2691 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2692 : CGF.Builder.getIntN(IVSize, 1); 2693 llvm::Value *Args[] = { 2694 emitUpdateLocation(CGF, Loc), 2695 getThreadID(CGF, Loc), 2696 CGF.Builder.getInt32(addMonoNonMonoModifier( 2697 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2698 DispatchValues.LB, // Lower 2699 DispatchValues.UB, // Upper 2700 CGF.Builder.getIntN(IVSize, 1), // Stride 2701 Chunk // Chunk 2702 }; 2703 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2704 } 2705 2706 static void emitForStaticInitCall( 2707 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2708 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2709 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2710 const CGOpenMPRuntime::StaticRTInput &Values) { 2711 if (!CGF.HaveInsertPoint()) 2712 return; 2713 2714 assert(!Values.Ordered); 2715 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2716 Schedule == OMP_sch_static_balanced_chunked || 2717 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2718 Schedule == OMP_dist_sch_static || 2719 Schedule == OMP_dist_sch_static_chunked); 2720 2721 // Call __kmpc_for_static_init( 2722 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2723 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2724 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2725 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2726 llvm::Value *Chunk = Values.Chunk; 2727 if (Chunk == nullptr) { 2728 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2729 Schedule == OMP_dist_sch_static) && 2730 "expected static non-chunked schedule"); 2731 // If the Chunk was not specified in the clause - use default value 1. 2732 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2733 } else { 2734 assert((Schedule == OMP_sch_static_chunked || 2735 Schedule == OMP_sch_static_balanced_chunked || 2736 Schedule == OMP_ord_static_chunked || 2737 Schedule == OMP_dist_sch_static_chunked) && 2738 "expected static chunked schedule"); 2739 } 2740 llvm::Value *Args[] = { 2741 UpdateLocation, 2742 ThreadId, 2743 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2744 M2)), // Schedule type 2745 Values.IL.getPointer(), // &isLastIter 2746 Values.LB.getPointer(), // &LB 2747 Values.UB.getPointer(), // &UB 2748 Values.ST.getPointer(), // &Stride 2749 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2750 Chunk // Chunk 2751 }; 2752 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2753 } 2754 2755 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2756 SourceLocation Loc, 2757 OpenMPDirectiveKind DKind, 2758 const OpenMPScheduleTy &ScheduleKind, 2759 const StaticRTInput &Values) { 2760 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2761 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2762 assert(isOpenMPWorksharingDirective(DKind) && 2763 "Expected loop-based or sections-based directive."); 2764 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2765 isOpenMPLoopDirective(DKind) 2766 ? OMP_IDENT_WORK_LOOP 2767 : OMP_IDENT_WORK_SECTIONS); 2768 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2769 llvm::FunctionCallee StaticInitFunction = 2770 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2771 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2772 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2773 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2774 } 2775 2776 void CGOpenMPRuntime::emitDistributeStaticInit( 2777 CodeGenFunction &CGF, SourceLocation Loc, 2778 OpenMPDistScheduleClauseKind SchedKind, 2779 const CGOpenMPRuntime::StaticRTInput &Values) { 2780 OpenMPSchedType ScheduleNum = 2781 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2782 llvm::Value *UpdatedLocation = 2783 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2784 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2785 llvm::FunctionCallee StaticInitFunction = 2786 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2787 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2788 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2789 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2790 } 2791 2792 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2793 SourceLocation Loc, 2794 OpenMPDirectiveKind DKind) { 2795 if (!CGF.HaveInsertPoint()) 2796 return; 2797 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2798 llvm::Value *Args[] = { 2799 emitUpdateLocation(CGF, Loc, 2800 isOpenMPDistributeDirective(DKind) 2801 ? OMP_IDENT_WORK_DISTRIBUTE 2802 : isOpenMPLoopDirective(DKind) 2803 ? OMP_IDENT_WORK_LOOP 2804 : OMP_IDENT_WORK_SECTIONS), 2805 getThreadID(CGF, Loc)}; 2806 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2807 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2808 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2809 Args); 2810 } 2811 2812 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2813 SourceLocation Loc, 2814 unsigned IVSize, 2815 bool IVSigned) { 2816 if (!CGF.HaveInsertPoint()) 2817 return; 2818 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2819 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2820 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2821 } 2822 2823 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2824 SourceLocation Loc, unsigned IVSize, 2825 bool IVSigned, Address IL, 2826 Address LB, Address UB, 2827 Address ST) { 2828 // Call __kmpc_dispatch_next( 2829 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2830 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2831 // kmp_int[32|64] *p_stride); 2832 llvm::Value *Args[] = { 2833 emitUpdateLocation(CGF, Loc), 2834 getThreadID(CGF, Loc), 2835 IL.getPointer(), // &isLastIter 2836 LB.getPointer(), // &Lower 2837 UB.getPointer(), // &Upper 2838 ST.getPointer() // &Stride 2839 }; 2840 llvm::Value *Call = 2841 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2842 return CGF.EmitScalarConversion( 2843 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2844 CGF.getContext().BoolTy, Loc); 2845 } 2846 2847 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2848 llvm::Value *NumThreads, 2849 SourceLocation Loc) { 2850 if (!CGF.HaveInsertPoint()) 2851 return; 2852 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2853 llvm::Value *Args[] = { 2854 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2855 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2856 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2857 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2858 Args); 2859 } 2860 2861 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2862 ProcBindKind ProcBind, 2863 SourceLocation Loc) { 2864 if (!CGF.HaveInsertPoint()) 2865 return; 2866 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2867 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2868 llvm::Value *Args[] = { 2869 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2870 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2871 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2872 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2873 Args); 2874 } 2875 2876 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2877 SourceLocation Loc, llvm::AtomicOrdering AO) { 2878 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2879 OMPBuilder.CreateFlush(CGF.Builder); 2880 } else { 2881 if (!CGF.HaveInsertPoint()) 2882 return; 2883 // Build call void __kmpc_flush(ident_t *loc) 2884 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_flush), 2886 emitUpdateLocation(CGF, Loc)); 2887 } 2888 } 2889 2890 namespace { 2891 /// Indexes of fields for type kmp_task_t. 2892 enum KmpTaskTFields { 2893 /// List of shared variables. 2894 KmpTaskTShareds, 2895 /// Task routine. 2896 KmpTaskTRoutine, 2897 /// Partition id for the untied tasks. 2898 KmpTaskTPartId, 2899 /// Function with call of destructors for private variables. 2900 Data1, 2901 /// Task priority. 2902 Data2, 2903 /// (Taskloops only) Lower bound. 2904 KmpTaskTLowerBound, 2905 /// (Taskloops only) Upper bound. 2906 KmpTaskTUpperBound, 2907 /// (Taskloops only) Stride. 2908 KmpTaskTStride, 2909 /// (Taskloops only) Is last iteration flag. 2910 KmpTaskTLastIter, 2911 /// (Taskloops only) Reduction data. 2912 KmpTaskTReductions, 2913 }; 2914 } // anonymous namespace 2915 2916 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2917 return OffloadEntriesTargetRegion.empty() && 2918 OffloadEntriesDeviceGlobalVar.empty(); 2919 } 2920 2921 /// Initialize target region entry. 2922 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2923 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2924 StringRef ParentName, unsigned LineNum, 2925 unsigned Order) { 2926 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2927 "only required for the device " 2928 "code generation."); 2929 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2930 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2931 OMPTargetRegionEntryTargetRegion); 2932 ++OffloadingEntriesNum; 2933 } 2934 2935 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2936 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2937 StringRef ParentName, unsigned LineNum, 2938 llvm::Constant *Addr, llvm::Constant *ID, 2939 OMPTargetRegionEntryKind Flags) { 2940 // If we are emitting code for a target, the entry is already initialized, 2941 // only has to be registered. 2942 if (CGM.getLangOpts().OpenMPIsDevice) { 2943 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 2944 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2945 DiagnosticsEngine::Error, 2946 "Unable to find target region on line '%0' in the device code."); 2947 CGM.getDiags().Report(DiagID) << LineNum; 2948 return; 2949 } 2950 auto &Entry = 2951 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2952 assert(Entry.isValid() && "Entry not initialized!"); 2953 Entry.setAddress(Addr); 2954 Entry.setID(ID); 2955 Entry.setFlags(Flags); 2956 } else { 2957 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2958 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2959 ++OffloadingEntriesNum; 2960 } 2961 } 2962 2963 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2964 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2965 unsigned LineNum) const { 2966 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2967 if (PerDevice == OffloadEntriesTargetRegion.end()) 2968 return false; 2969 auto PerFile = PerDevice->second.find(FileID); 2970 if (PerFile == PerDevice->second.end()) 2971 return false; 2972 auto PerParentName = PerFile->second.find(ParentName); 2973 if (PerParentName == PerFile->second.end()) 2974 return false; 2975 auto PerLine = PerParentName->second.find(LineNum); 2976 if (PerLine == PerParentName->second.end()) 2977 return false; 2978 // Fail if this entry is already registered. 2979 if (PerLine->second.getAddress() || PerLine->second.getID()) 2980 return false; 2981 return true; 2982 } 2983 2984 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2985 const OffloadTargetRegionEntryInfoActTy &Action) { 2986 // Scan all target region entries and perform the provided action. 2987 for (const auto &D : OffloadEntriesTargetRegion) 2988 for (const auto &F : D.second) 2989 for (const auto &P : F.second) 2990 for (const auto &L : P.second) 2991 Action(D.first, F.first, P.first(), L.first, L.second); 2992 } 2993 2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2995 initializeDeviceGlobalVarEntryInfo(StringRef Name, 2996 OMPTargetGlobalVarEntryKind Flags, 2997 unsigned Order) { 2998 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2999 "only required for the device " 3000 "code generation."); 3001 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3002 ++OffloadingEntriesNum; 3003 } 3004 3005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3006 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3007 CharUnits VarSize, 3008 OMPTargetGlobalVarEntryKind Flags, 3009 llvm::GlobalValue::LinkageTypes Linkage) { 3010 if (CGM.getLangOpts().OpenMPIsDevice) { 3011 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3012 assert(Entry.isValid() && Entry.getFlags() == Flags && 3013 "Entry not initialized!"); 3014 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3015 "Resetting with the new address."); 3016 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3017 if (Entry.getVarSize().isZero()) { 3018 Entry.setVarSize(VarSize); 3019 Entry.setLinkage(Linkage); 3020 } 3021 return; 3022 } 3023 Entry.setVarSize(VarSize); 3024 Entry.setLinkage(Linkage); 3025 Entry.setAddress(Addr); 3026 } else { 3027 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3028 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3029 assert(Entry.isValid() && Entry.getFlags() == Flags && 3030 "Entry not initialized!"); 3031 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3032 "Resetting with the new address."); 3033 if (Entry.getVarSize().isZero()) { 3034 Entry.setVarSize(VarSize); 3035 Entry.setLinkage(Linkage); 3036 } 3037 return; 3038 } 3039 OffloadEntriesDeviceGlobalVar.try_emplace( 3040 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3041 ++OffloadingEntriesNum; 3042 } 3043 } 3044 3045 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3046 actOnDeviceGlobalVarEntriesInfo( 3047 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3048 // Scan all target region entries and perform the provided action. 3049 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3050 Action(E.getKey(), E.getValue()); 3051 } 3052 3053 void CGOpenMPRuntime::createOffloadEntry( 3054 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3055 llvm::GlobalValue::LinkageTypes Linkage) { 3056 StringRef Name = Addr->getName(); 3057 llvm::Module &M = CGM.getModule(); 3058 llvm::LLVMContext &C = M.getContext(); 3059 3060 // Create constant string with the name. 3061 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3062 3063 std::string StringName = getName({"omp_offloading", "entry_name"}); 3064 auto *Str = new llvm::GlobalVariable( 3065 M, StrPtrInit->getType(), /*isConstant=*/true, 3066 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3067 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3068 3069 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3070 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3071 llvm::ConstantInt::get(CGM.SizeTy, Size), 3072 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3073 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3074 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3075 llvm::GlobalVariable *Entry = createGlobalStruct( 3076 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3077 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3078 3079 // The entry has to be created in the section the linker expects it to be. 3080 Entry->setSection("omp_offloading_entries"); 3081 } 3082 3083 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3084 // Emit the offloading entries and metadata so that the device codegen side 3085 // can easily figure out what to emit. The produced metadata looks like 3086 // this: 3087 // 3088 // !omp_offload.info = !{!1, ...} 3089 // 3090 // Right now we only generate metadata for function that contain target 3091 // regions. 3092 3093 // If we are in simd mode or there are no entries, we don't need to do 3094 // anything. 3095 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3096 return; 3097 3098 llvm::Module &M = CGM.getModule(); 3099 llvm::LLVMContext &C = M.getContext(); 3100 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3101 SourceLocation, StringRef>, 3102 16> 3103 OrderedEntries(OffloadEntriesInfoManager.size()); 3104 llvm::SmallVector<StringRef, 16> ParentFunctions( 3105 OffloadEntriesInfoManager.size()); 3106 3107 // Auxiliary methods to create metadata values and strings. 3108 auto &&GetMDInt = [this](unsigned V) { 3109 return llvm::ConstantAsMetadata::get( 3110 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3111 }; 3112 3113 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3114 3115 // Create the offloading info metadata node. 3116 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3117 3118 // Create function that emits metadata for each target region entry; 3119 auto &&TargetRegionMetadataEmitter = 3120 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3121 &GetMDString]( 3122 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3123 unsigned Line, 3124 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3125 // Generate metadata for target regions. Each entry of this metadata 3126 // contains: 3127 // - Entry 0 -> Kind of this type of metadata (0). 3128 // - Entry 1 -> Device ID of the file where the entry was identified. 3129 // - Entry 2 -> File ID of the file where the entry was identified. 3130 // - Entry 3 -> Mangled name of the function where the entry was 3131 // identified. 3132 // - Entry 4 -> Line in the file where the entry was identified. 3133 // - Entry 5 -> Order the entry was created. 3134 // The first element of the metadata node is the kind. 3135 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3136 GetMDInt(FileID), GetMDString(ParentName), 3137 GetMDInt(Line), GetMDInt(E.getOrder())}; 3138 3139 SourceLocation Loc; 3140 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3141 E = CGM.getContext().getSourceManager().fileinfo_end(); 3142 I != E; ++I) { 3143 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3144 I->getFirst()->getUniqueID().getFile() == FileID) { 3145 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3146 I->getFirst(), Line, 1); 3147 break; 3148 } 3149 } 3150 // Save this entry in the right position of the ordered entries array. 3151 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3152 ParentFunctions[E.getOrder()] = ParentName; 3153 3154 // Add metadata to the named metadata node. 3155 MD->addOperand(llvm::MDNode::get(C, Ops)); 3156 }; 3157 3158 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3159 TargetRegionMetadataEmitter); 3160 3161 // Create function that emits metadata for each device global variable entry; 3162 auto &&DeviceGlobalVarMetadataEmitter = 3163 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3164 MD](StringRef MangledName, 3165 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3166 &E) { 3167 // Generate metadata for global variables. Each entry of this metadata 3168 // contains: 3169 // - Entry 0 -> Kind of this type of metadata (1). 3170 // - Entry 1 -> Mangled name of the variable. 3171 // - Entry 2 -> Declare target kind. 3172 // - Entry 3 -> Order the entry was created. 3173 // The first element of the metadata node is the kind. 3174 llvm::Metadata *Ops[] = { 3175 GetMDInt(E.getKind()), GetMDString(MangledName), 3176 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3177 3178 // Save this entry in the right position of the ordered entries array. 3179 OrderedEntries[E.getOrder()] = 3180 std::make_tuple(&E, SourceLocation(), MangledName); 3181 3182 // Add metadata to the named metadata node. 3183 MD->addOperand(llvm::MDNode::get(C, Ops)); 3184 }; 3185 3186 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3187 DeviceGlobalVarMetadataEmitter); 3188 3189 for (const auto &E : OrderedEntries) { 3190 assert(std::get<0>(E) && "All ordered entries must exist!"); 3191 if (const auto *CE = 3192 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3193 std::get<0>(E))) { 3194 if (!CE->getID() || !CE->getAddress()) { 3195 // Do not blame the entry if the parent funtion is not emitted. 3196 StringRef FnName = ParentFunctions[CE->getOrder()]; 3197 if (!CGM.GetGlobalValue(FnName)) 3198 continue; 3199 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3200 DiagnosticsEngine::Error, 3201 "Offloading entry for target region in %0 is incorrect: either the " 3202 "address or the ID is invalid."); 3203 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3204 continue; 3205 } 3206 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3207 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3208 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3209 OffloadEntryInfoDeviceGlobalVar>( 3210 std::get<0>(E))) { 3211 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3212 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3213 CE->getFlags()); 3214 switch (Flags) { 3215 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3216 if (CGM.getLangOpts().OpenMPIsDevice && 3217 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3218 continue; 3219 if (!CE->getAddress()) { 3220 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3221 DiagnosticsEngine::Error, "Offloading entry for declare target " 3222 "variable %0 is incorrect: the " 3223 "address is invalid."); 3224 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3225 continue; 3226 } 3227 // The vaiable has no definition - no need to add the entry. 3228 if (CE->getVarSize().isZero()) 3229 continue; 3230 break; 3231 } 3232 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3233 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3234 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3235 "Declaret target link address is set."); 3236 if (CGM.getLangOpts().OpenMPIsDevice) 3237 continue; 3238 if (!CE->getAddress()) { 3239 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3240 DiagnosticsEngine::Error, 3241 "Offloading entry for declare target variable is incorrect: the " 3242 "address is invalid."); 3243 CGM.getDiags().Report(DiagID); 3244 continue; 3245 } 3246 break; 3247 } 3248 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3249 CE->getVarSize().getQuantity(), Flags, 3250 CE->getLinkage()); 3251 } else { 3252 llvm_unreachable("Unsupported entry kind."); 3253 } 3254 } 3255 } 3256 3257 /// Loads all the offload entries information from the host IR 3258 /// metadata. 3259 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3260 // If we are in target mode, load the metadata from the host IR. This code has 3261 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3262 3263 if (!CGM.getLangOpts().OpenMPIsDevice) 3264 return; 3265 3266 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3267 return; 3268 3269 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3270 if (auto EC = Buf.getError()) { 3271 CGM.getDiags().Report(diag::err_cannot_open_file) 3272 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3273 return; 3274 } 3275 3276 llvm::LLVMContext C; 3277 auto ME = expectedToErrorOrAndEmitErrors( 3278 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3279 3280 if (auto EC = ME.getError()) { 3281 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3282 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3283 CGM.getDiags().Report(DiagID) 3284 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3285 return; 3286 } 3287 3288 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3289 if (!MD) 3290 return; 3291 3292 for (llvm::MDNode *MN : MD->operands()) { 3293 auto &&GetMDInt = [MN](unsigned Idx) { 3294 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3295 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3296 }; 3297 3298 auto &&GetMDString = [MN](unsigned Idx) { 3299 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3300 return V->getString(); 3301 }; 3302 3303 switch (GetMDInt(0)) { 3304 default: 3305 llvm_unreachable("Unexpected metadata!"); 3306 break; 3307 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3308 OffloadingEntryInfoTargetRegion: 3309 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3310 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3311 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3312 /*Order=*/GetMDInt(5)); 3313 break; 3314 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3315 OffloadingEntryInfoDeviceGlobalVar: 3316 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3317 /*MangledName=*/GetMDString(1), 3318 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3319 /*Flags=*/GetMDInt(2)), 3320 /*Order=*/GetMDInt(3)); 3321 break; 3322 } 3323 } 3324 } 3325 3326 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3327 if (!KmpRoutineEntryPtrTy) { 3328 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3329 ASTContext &C = CGM.getContext(); 3330 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3331 FunctionProtoType::ExtProtoInfo EPI; 3332 KmpRoutineEntryPtrQTy = C.getPointerType( 3333 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3334 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3335 } 3336 } 3337 3338 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3339 // Make sure the type of the entry is already created. This is the type we 3340 // have to create: 3341 // struct __tgt_offload_entry{ 3342 // void *addr; // Pointer to the offload entry info. 3343 // // (function or global) 3344 // char *name; // Name of the function or global. 3345 // size_t size; // Size of the entry info (0 if it a function). 3346 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3347 // int32_t reserved; // Reserved, to use by the runtime library. 3348 // }; 3349 if (TgtOffloadEntryQTy.isNull()) { 3350 ASTContext &C = CGM.getContext(); 3351 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3352 RD->startDefinition(); 3353 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3354 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3355 addFieldToRecordDecl(C, RD, C.getSizeType()); 3356 addFieldToRecordDecl( 3357 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3358 addFieldToRecordDecl( 3359 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3360 RD->completeDefinition(); 3361 RD->addAttr(PackedAttr::CreateImplicit(C)); 3362 TgtOffloadEntryQTy = C.getRecordType(RD); 3363 } 3364 return TgtOffloadEntryQTy; 3365 } 3366 3367 namespace { 3368 struct PrivateHelpersTy { 3369 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3370 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3371 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3372 PrivateElemInit(PrivateElemInit) {} 3373 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3374 const Expr *OriginalRef = nullptr; 3375 const VarDecl *Original = nullptr; 3376 const VarDecl *PrivateCopy = nullptr; 3377 const VarDecl *PrivateElemInit = nullptr; 3378 bool isLocalPrivate() const { 3379 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3380 } 3381 }; 3382 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3383 } // anonymous namespace 3384 3385 static RecordDecl * 3386 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3387 if (!Privates.empty()) { 3388 ASTContext &C = CGM.getContext(); 3389 // Build struct .kmp_privates_t. { 3390 // /* private vars */ 3391 // }; 3392 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3393 RD->startDefinition(); 3394 for (const auto &Pair : Privates) { 3395 const VarDecl *VD = Pair.second.Original; 3396 QualType Type = VD->getType().getNonReferenceType(); 3397 // If the private variable is a local variable with lvalue ref type, 3398 // allocate the pointer instead of the pointee type. 3399 if (Pair.second.isLocalPrivate() && 3400 VD->getType()->isLValueReferenceType()) 3401 Type = C.getPointerType(Type); 3402 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3403 if (VD->hasAttrs()) { 3404 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3405 E(VD->getAttrs().end()); 3406 I != E; ++I) 3407 FD->addAttr(*I); 3408 } 3409 } 3410 RD->completeDefinition(); 3411 return RD; 3412 } 3413 return nullptr; 3414 } 3415 3416 static RecordDecl * 3417 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3418 QualType KmpInt32Ty, 3419 QualType KmpRoutineEntryPointerQTy) { 3420 ASTContext &C = CGM.getContext(); 3421 // Build struct kmp_task_t { 3422 // void * shareds; 3423 // kmp_routine_entry_t routine; 3424 // kmp_int32 part_id; 3425 // kmp_cmplrdata_t data1; 3426 // kmp_cmplrdata_t data2; 3427 // For taskloops additional fields: 3428 // kmp_uint64 lb; 3429 // kmp_uint64 ub; 3430 // kmp_int64 st; 3431 // kmp_int32 liter; 3432 // void * reductions; 3433 // }; 3434 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3435 UD->startDefinition(); 3436 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3437 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3438 UD->completeDefinition(); 3439 QualType KmpCmplrdataTy = C.getRecordType(UD); 3440 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3441 RD->startDefinition(); 3442 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3443 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3444 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3445 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3446 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3447 if (isOpenMPTaskLoopDirective(Kind)) { 3448 QualType KmpUInt64Ty = 3449 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3450 QualType KmpInt64Ty = 3451 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3452 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3453 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3454 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3455 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3456 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3457 } 3458 RD->completeDefinition(); 3459 return RD; 3460 } 3461 3462 static RecordDecl * 3463 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3464 ArrayRef<PrivateDataTy> Privates) { 3465 ASTContext &C = CGM.getContext(); 3466 // Build struct kmp_task_t_with_privates { 3467 // kmp_task_t task_data; 3468 // .kmp_privates_t. privates; 3469 // }; 3470 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3471 RD->startDefinition(); 3472 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3473 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3474 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3475 RD->completeDefinition(); 3476 return RD; 3477 } 3478 3479 /// Emit a proxy function which accepts kmp_task_t as the second 3480 /// argument. 3481 /// \code 3482 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3483 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3484 /// For taskloops: 3485 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3486 /// tt->reductions, tt->shareds); 3487 /// return 0; 3488 /// } 3489 /// \endcode 3490 static llvm::Function * 3491 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3492 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3493 QualType KmpTaskTWithPrivatesPtrQTy, 3494 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3495 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3496 llvm::Value *TaskPrivatesMap) { 3497 ASTContext &C = CGM.getContext(); 3498 FunctionArgList Args; 3499 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3500 ImplicitParamDecl::Other); 3501 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3502 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3503 ImplicitParamDecl::Other); 3504 Args.push_back(&GtidArg); 3505 Args.push_back(&TaskTypeArg); 3506 const auto &TaskEntryFnInfo = 3507 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3508 llvm::FunctionType *TaskEntryTy = 3509 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3510 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3511 auto *TaskEntry = llvm::Function::Create( 3512 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3513 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3514 TaskEntry->setDoesNotRecurse(); 3515 CodeGenFunction CGF(CGM); 3516 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3517 Loc, Loc); 3518 3519 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3520 // tt, 3521 // For taskloops: 3522 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3523 // tt->task_data.shareds); 3524 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3525 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3526 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3527 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3528 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3529 const auto *KmpTaskTWithPrivatesQTyRD = 3530 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3531 LValue Base = 3532 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3533 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3534 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3535 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3536 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3537 3538 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3539 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3540 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3541 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3542 CGF.ConvertTypeForMem(SharedsPtrTy)); 3543 3544 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3545 llvm::Value *PrivatesParam; 3546 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3547 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3548 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3549 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3550 } else { 3551 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3552 } 3553 3554 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3555 TaskPrivatesMap, 3556 CGF.Builder 3557 .CreatePointerBitCastOrAddrSpaceCast( 3558 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3559 .getPointer()}; 3560 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3561 std::end(CommonArgs)); 3562 if (isOpenMPTaskLoopDirective(Kind)) { 3563 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3564 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3565 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3566 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3567 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3568 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3569 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3570 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3571 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3572 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3573 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3574 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3575 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3576 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3577 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3578 CallArgs.push_back(LBParam); 3579 CallArgs.push_back(UBParam); 3580 CallArgs.push_back(StParam); 3581 CallArgs.push_back(LIParam); 3582 CallArgs.push_back(RParam); 3583 } 3584 CallArgs.push_back(SharedsParam); 3585 3586 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3587 CallArgs); 3588 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3589 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3590 CGF.FinishFunction(); 3591 return TaskEntry; 3592 } 3593 3594 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3595 SourceLocation Loc, 3596 QualType KmpInt32Ty, 3597 QualType KmpTaskTWithPrivatesPtrQTy, 3598 QualType KmpTaskTWithPrivatesQTy) { 3599 ASTContext &C = CGM.getContext(); 3600 FunctionArgList Args; 3601 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3602 ImplicitParamDecl::Other); 3603 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3604 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3605 ImplicitParamDecl::Other); 3606 Args.push_back(&GtidArg); 3607 Args.push_back(&TaskTypeArg); 3608 const auto &DestructorFnInfo = 3609 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3610 llvm::FunctionType *DestructorFnTy = 3611 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3612 std::string Name = 3613 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3614 auto *DestructorFn = 3615 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3616 Name, &CGM.getModule()); 3617 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3618 DestructorFnInfo); 3619 DestructorFn->setDoesNotRecurse(); 3620 CodeGenFunction CGF(CGM); 3621 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3622 Args, Loc, Loc); 3623 3624 LValue Base = CGF.EmitLoadOfPointerLValue( 3625 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3626 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3627 const auto *KmpTaskTWithPrivatesQTyRD = 3628 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3629 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3630 Base = CGF.EmitLValueForField(Base, *FI); 3631 for (const auto *Field : 3632 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3633 if (QualType::DestructionKind DtorKind = 3634 Field->getType().isDestructedType()) { 3635 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3636 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3637 } 3638 } 3639 CGF.FinishFunction(); 3640 return DestructorFn; 3641 } 3642 3643 /// Emit a privates mapping function for correct handling of private and 3644 /// firstprivate variables. 3645 /// \code 3646 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3647 /// **noalias priv1,..., <tyn> **noalias privn) { 3648 /// *priv1 = &.privates.priv1; 3649 /// ...; 3650 /// *privn = &.privates.privn; 3651 /// } 3652 /// \endcode 3653 static llvm::Value * 3654 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3655 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3656 ArrayRef<PrivateDataTy> Privates) { 3657 ASTContext &C = CGM.getContext(); 3658 FunctionArgList Args; 3659 ImplicitParamDecl TaskPrivatesArg( 3660 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3661 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3662 ImplicitParamDecl::Other); 3663 Args.push_back(&TaskPrivatesArg); 3664 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3665 unsigned Counter = 1; 3666 for (const Expr *E : Data.PrivateVars) { 3667 Args.push_back(ImplicitParamDecl::Create( 3668 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3669 C.getPointerType(C.getPointerType(E->getType())) 3670 .withConst() 3671 .withRestrict(), 3672 ImplicitParamDecl::Other)); 3673 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3674 PrivateVarsPos[VD] = Counter; 3675 ++Counter; 3676 } 3677 for (const Expr *E : Data.FirstprivateVars) { 3678 Args.push_back(ImplicitParamDecl::Create( 3679 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3680 C.getPointerType(C.getPointerType(E->getType())) 3681 .withConst() 3682 .withRestrict(), 3683 ImplicitParamDecl::Other)); 3684 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3685 PrivateVarsPos[VD] = Counter; 3686 ++Counter; 3687 } 3688 for (const Expr *E : Data.LastprivateVars) { 3689 Args.push_back(ImplicitParamDecl::Create( 3690 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3691 C.getPointerType(C.getPointerType(E->getType())) 3692 .withConst() 3693 .withRestrict(), 3694 ImplicitParamDecl::Other)); 3695 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3696 PrivateVarsPos[VD] = Counter; 3697 ++Counter; 3698 } 3699 for (const VarDecl *VD : Data.PrivateLocals) { 3700 QualType Ty = VD->getType().getNonReferenceType(); 3701 if (VD->getType()->isLValueReferenceType()) 3702 Ty = C.getPointerType(Ty); 3703 Args.push_back(ImplicitParamDecl::Create( 3704 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3705 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3706 ImplicitParamDecl::Other)); 3707 PrivateVarsPos[VD] = Counter; 3708 ++Counter; 3709 } 3710 const auto &TaskPrivatesMapFnInfo = 3711 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3712 llvm::FunctionType *TaskPrivatesMapTy = 3713 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3714 std::string Name = 3715 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3716 auto *TaskPrivatesMap = llvm::Function::Create( 3717 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3718 &CGM.getModule()); 3719 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3720 TaskPrivatesMapFnInfo); 3721 if (CGM.getLangOpts().Optimize) { 3722 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3723 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3724 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3725 } 3726 CodeGenFunction CGF(CGM); 3727 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3728 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3729 3730 // *privi = &.privates.privi; 3731 LValue Base = CGF.EmitLoadOfPointerLValue( 3732 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3733 TaskPrivatesArg.getType()->castAs<PointerType>()); 3734 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3735 Counter = 0; 3736 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3737 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3738 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3739 LValue RefLVal = 3740 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3741 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3742 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3743 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3744 ++Counter; 3745 } 3746 CGF.FinishFunction(); 3747 return TaskPrivatesMap; 3748 } 3749 3750 /// Emit initialization for private variables in task-based directives. 3751 static void emitPrivatesInit(CodeGenFunction &CGF, 3752 const OMPExecutableDirective &D, 3753 Address KmpTaskSharedsPtr, LValue TDBase, 3754 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3755 QualType SharedsTy, QualType SharedsPtrTy, 3756 const OMPTaskDataTy &Data, 3757 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3758 ASTContext &C = CGF.getContext(); 3759 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3760 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3761 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3762 ? OMPD_taskloop 3763 : OMPD_task; 3764 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3765 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3766 LValue SrcBase; 3767 bool IsTargetTask = 3768 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3769 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3770 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3771 // PointersArray and SizesArray. The original variables for these arrays are 3772 // not captured and we get their addresses explicitly. 3773 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3774 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3775 SrcBase = CGF.MakeAddrLValue( 3776 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3777 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3778 SharedsTy); 3779 } 3780 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3781 for (const PrivateDataTy &Pair : Privates) { 3782 // Do not initialize private locals. 3783 if (Pair.second.isLocalPrivate()) 3784 continue; 3785 const VarDecl *VD = Pair.second.PrivateCopy; 3786 const Expr *Init = VD->getAnyInitializer(); 3787 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3788 !CGF.isTrivialInitializer(Init)))) { 3789 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3790 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3791 const VarDecl *OriginalVD = Pair.second.Original; 3792 // Check if the variable is the target-based BasePointersArray, 3793 // PointersArray or SizesArray. 3794 LValue SharedRefLValue; 3795 QualType Type = PrivateLValue.getType(); 3796 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3797 if (IsTargetTask && !SharedField) { 3798 assert(isa<ImplicitParamDecl>(OriginalVD) && 3799 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3800 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3801 ->getNumParams() == 0 && 3802 isa<TranslationUnitDecl>( 3803 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3804 ->getDeclContext()) && 3805 "Expected artificial target data variable."); 3806 SharedRefLValue = 3807 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3808 } else if (ForDup) { 3809 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3810 SharedRefLValue = CGF.MakeAddrLValue( 3811 Address(SharedRefLValue.getPointer(CGF), 3812 C.getDeclAlign(OriginalVD)), 3813 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3814 SharedRefLValue.getTBAAInfo()); 3815 } else if (CGF.LambdaCaptureFields.count( 3816 Pair.second.Original->getCanonicalDecl()) > 0 || 3817 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3818 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3819 } else { 3820 // Processing for implicitly captured variables. 3821 InlinedOpenMPRegionRAII Region( 3822 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3823 /*HasCancel=*/false); 3824 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3825 } 3826 if (Type->isArrayType()) { 3827 // Initialize firstprivate array. 3828 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3829 // Perform simple memcpy. 3830 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3831 } else { 3832 // Initialize firstprivate array using element-by-element 3833 // initialization. 3834 CGF.EmitOMPAggregateAssign( 3835 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3836 Type, 3837 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3838 Address SrcElement) { 3839 // Clean up any temporaries needed by the initialization. 3840 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3841 InitScope.addPrivate( 3842 Elem, [SrcElement]() -> Address { return SrcElement; }); 3843 (void)InitScope.Privatize(); 3844 // Emit initialization for single element. 3845 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3846 CGF, &CapturesInfo); 3847 CGF.EmitAnyExprToMem(Init, DestElement, 3848 Init->getType().getQualifiers(), 3849 /*IsInitializer=*/false); 3850 }); 3851 } 3852 } else { 3853 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3854 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3855 return SharedRefLValue.getAddress(CGF); 3856 }); 3857 (void)InitScope.Privatize(); 3858 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3859 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3860 /*capturedByInit=*/false); 3861 } 3862 } else { 3863 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3864 } 3865 } 3866 ++FI; 3867 } 3868 } 3869 3870 /// Check if duplication function is required for taskloops. 3871 static bool checkInitIsRequired(CodeGenFunction &CGF, 3872 ArrayRef<PrivateDataTy> Privates) { 3873 bool InitRequired = false; 3874 for (const PrivateDataTy &Pair : Privates) { 3875 if (Pair.second.isLocalPrivate()) 3876 continue; 3877 const VarDecl *VD = Pair.second.PrivateCopy; 3878 const Expr *Init = VD->getAnyInitializer(); 3879 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3880 !CGF.isTrivialInitializer(Init)); 3881 if (InitRequired) 3882 break; 3883 } 3884 return InitRequired; 3885 } 3886 3887 3888 /// Emit task_dup function (for initialization of 3889 /// private/firstprivate/lastprivate vars and last_iter flag) 3890 /// \code 3891 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3892 /// lastpriv) { 3893 /// // setup lastprivate flag 3894 /// task_dst->last = lastpriv; 3895 /// // could be constructor calls here... 3896 /// } 3897 /// \endcode 3898 static llvm::Value * 3899 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3900 const OMPExecutableDirective &D, 3901 QualType KmpTaskTWithPrivatesPtrQTy, 3902 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3903 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3904 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3905 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3906 ASTContext &C = CGM.getContext(); 3907 FunctionArgList Args; 3908 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3909 KmpTaskTWithPrivatesPtrQTy, 3910 ImplicitParamDecl::Other); 3911 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3912 KmpTaskTWithPrivatesPtrQTy, 3913 ImplicitParamDecl::Other); 3914 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3915 ImplicitParamDecl::Other); 3916 Args.push_back(&DstArg); 3917 Args.push_back(&SrcArg); 3918 Args.push_back(&LastprivArg); 3919 const auto &TaskDupFnInfo = 3920 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3921 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3922 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3923 auto *TaskDup = llvm::Function::Create( 3924 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3925 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3926 TaskDup->setDoesNotRecurse(); 3927 CodeGenFunction CGF(CGM); 3928 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3929 Loc); 3930 3931 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3932 CGF.GetAddrOfLocalVar(&DstArg), 3933 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3934 // task_dst->liter = lastpriv; 3935 if (WithLastIter) { 3936 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3937 LValue Base = CGF.EmitLValueForField( 3938 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3939 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3940 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3941 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3942 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3943 } 3944 3945 // Emit initial values for private copies (if any). 3946 assert(!Privates.empty()); 3947 Address KmpTaskSharedsPtr = Address::invalid(); 3948 if (!Data.FirstprivateVars.empty()) { 3949 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3950 CGF.GetAddrOfLocalVar(&SrcArg), 3951 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3952 LValue Base = CGF.EmitLValueForField( 3953 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3954 KmpTaskSharedsPtr = Address( 3955 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3956 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3957 KmpTaskTShareds)), 3958 Loc), 3959 CGM.getNaturalTypeAlignment(SharedsTy)); 3960 } 3961 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3962 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3963 CGF.FinishFunction(); 3964 return TaskDup; 3965 } 3966 3967 /// Checks if destructor function is required to be generated. 3968 /// \return true if cleanups are required, false otherwise. 3969 static bool 3970 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3971 ArrayRef<PrivateDataTy> Privates) { 3972 for (const PrivateDataTy &P : Privates) { 3973 if (P.second.isLocalPrivate()) 3974 continue; 3975 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3976 if (Ty.isDestructedType()) 3977 return true; 3978 } 3979 return false; 3980 } 3981 3982 namespace { 3983 /// Loop generator for OpenMP iterator expression. 3984 class OMPIteratorGeneratorScope final 3985 : public CodeGenFunction::OMPPrivateScope { 3986 CodeGenFunction &CGF; 3987 const OMPIteratorExpr *E = nullptr; 3988 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3989 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3990 OMPIteratorGeneratorScope() = delete; 3991 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3992 3993 public: 3994 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3995 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3996 if (!E) 3997 return; 3998 SmallVector<llvm::Value *, 4> Uppers; 3999 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4000 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4001 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4002 addPrivate(VD, [&CGF, VD]() { 4003 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4004 }); 4005 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4006 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4007 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4008 "counter.addr"); 4009 }); 4010 } 4011 Privatize(); 4012 4013 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4014 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4015 LValue CLVal = 4016 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4017 HelperData.CounterVD->getType()); 4018 // Counter = 0; 4019 CGF.EmitStoreOfScalar( 4020 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4021 CLVal); 4022 CodeGenFunction::JumpDest &ContDest = 4023 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4024 CodeGenFunction::JumpDest &ExitDest = 4025 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4026 // N = <number-of_iterations>; 4027 llvm::Value *N = Uppers[I]; 4028 // cont: 4029 // if (Counter < N) goto body; else goto exit; 4030 CGF.EmitBlock(ContDest.getBlock()); 4031 auto *CVal = 4032 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4033 llvm::Value *Cmp = 4034 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4035 ? CGF.Builder.CreateICmpSLT(CVal, N) 4036 : CGF.Builder.CreateICmpULT(CVal, N); 4037 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4038 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4039 // body: 4040 CGF.EmitBlock(BodyBB); 4041 // Iteri = Begini + Counter * Stepi; 4042 CGF.EmitIgnoredExpr(HelperData.Update); 4043 } 4044 } 4045 ~OMPIteratorGeneratorScope() { 4046 if (!E) 4047 return; 4048 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4049 // Counter = Counter + 1; 4050 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4051 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4052 // goto cont; 4053 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4054 // exit: 4055 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4056 } 4057 } 4058 }; 4059 } // namespace 4060 4061 static std::pair<llvm::Value *, llvm::Value *> 4062 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4063 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4064 llvm::Value *Addr; 4065 if (OASE) { 4066 const Expr *Base = OASE->getBase(); 4067 Addr = CGF.EmitScalarExpr(Base); 4068 } else { 4069 Addr = CGF.EmitLValue(E).getPointer(CGF); 4070 } 4071 llvm::Value *SizeVal; 4072 QualType Ty = E->getType(); 4073 if (OASE) { 4074 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4075 for (const Expr *SE : OASE->getDimensions()) { 4076 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4077 Sz = CGF.EmitScalarConversion( 4078 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4079 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4080 } 4081 } else if (const auto *ASE = 4082 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4083 LValue UpAddrLVal = 4084 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4085 llvm::Value *UpAddr = 4086 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4087 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4088 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4089 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4090 } else { 4091 SizeVal = CGF.getTypeSize(Ty); 4092 } 4093 return std::make_pair(Addr, SizeVal); 4094 } 4095 4096 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4097 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4098 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4099 if (KmpTaskAffinityInfoTy.isNull()) { 4100 RecordDecl *KmpAffinityInfoRD = 4101 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4102 KmpAffinityInfoRD->startDefinition(); 4103 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4104 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4105 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4106 KmpAffinityInfoRD->completeDefinition(); 4107 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4108 } 4109 } 4110 4111 CGOpenMPRuntime::TaskResultTy 4112 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4113 const OMPExecutableDirective &D, 4114 llvm::Function *TaskFunction, QualType SharedsTy, 4115 Address Shareds, const OMPTaskDataTy &Data) { 4116 ASTContext &C = CGM.getContext(); 4117 llvm::SmallVector<PrivateDataTy, 4> Privates; 4118 // Aggregate privates and sort them by the alignment. 4119 const auto *I = Data.PrivateCopies.begin(); 4120 for (const Expr *E : Data.PrivateVars) { 4121 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4122 Privates.emplace_back( 4123 C.getDeclAlign(VD), 4124 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4125 /*PrivateElemInit=*/nullptr)); 4126 ++I; 4127 } 4128 I = Data.FirstprivateCopies.begin(); 4129 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4130 for (const Expr *E : Data.FirstprivateVars) { 4131 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4132 Privates.emplace_back( 4133 C.getDeclAlign(VD), 4134 PrivateHelpersTy( 4135 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4136 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4137 ++I; 4138 ++IElemInitRef; 4139 } 4140 I = Data.LastprivateCopies.begin(); 4141 for (const Expr *E : Data.LastprivateVars) { 4142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4143 Privates.emplace_back( 4144 C.getDeclAlign(VD), 4145 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4146 /*PrivateElemInit=*/nullptr)); 4147 ++I; 4148 } 4149 for (const VarDecl *VD : Data.PrivateLocals) 4150 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4151 llvm::stable_sort(Privates, 4152 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4153 return L.first > R.first; 4154 }); 4155 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4156 // Build type kmp_routine_entry_t (if not built yet). 4157 emitKmpRoutineEntryT(KmpInt32Ty); 4158 // Build type kmp_task_t (if not built yet). 4159 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4160 if (SavedKmpTaskloopTQTy.isNull()) { 4161 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4162 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4163 } 4164 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4165 } else { 4166 assert((D.getDirectiveKind() == OMPD_task || 4167 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4168 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4169 "Expected taskloop, task or target directive"); 4170 if (SavedKmpTaskTQTy.isNull()) { 4171 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4172 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4173 } 4174 KmpTaskTQTy = SavedKmpTaskTQTy; 4175 } 4176 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4177 // Build particular struct kmp_task_t for the given task. 4178 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4179 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4180 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4181 QualType KmpTaskTWithPrivatesPtrQTy = 4182 C.getPointerType(KmpTaskTWithPrivatesQTy); 4183 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4184 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4185 KmpTaskTWithPrivatesTy->getPointerTo(); 4186 llvm::Value *KmpTaskTWithPrivatesTySize = 4187 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4188 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4189 4190 // Emit initial values for private copies (if any). 4191 llvm::Value *TaskPrivatesMap = nullptr; 4192 llvm::Type *TaskPrivatesMapTy = 4193 std::next(TaskFunction->arg_begin(), 3)->getType(); 4194 if (!Privates.empty()) { 4195 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4196 TaskPrivatesMap = 4197 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4198 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4199 TaskPrivatesMap, TaskPrivatesMapTy); 4200 } else { 4201 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4202 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4203 } 4204 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4205 // kmp_task_t *tt); 4206 llvm::Function *TaskEntry = emitProxyTaskFunction( 4207 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4208 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4209 TaskPrivatesMap); 4210 4211 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4212 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4213 // kmp_routine_entry_t *task_entry); 4214 // Task flags. Format is taken from 4215 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4216 // description of kmp_tasking_flags struct. 4217 enum { 4218 TiedFlag = 0x1, 4219 FinalFlag = 0x2, 4220 DestructorsFlag = 0x8, 4221 PriorityFlag = 0x20, 4222 DetachableFlag = 0x40, 4223 }; 4224 unsigned Flags = Data.Tied ? TiedFlag : 0; 4225 bool NeedsCleanup = false; 4226 if (!Privates.empty()) { 4227 NeedsCleanup = 4228 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4229 if (NeedsCleanup) 4230 Flags = Flags | DestructorsFlag; 4231 } 4232 if (Data.Priority.getInt()) 4233 Flags = Flags | PriorityFlag; 4234 if (D.hasClausesOfKind<OMPDetachClause>()) 4235 Flags = Flags | DetachableFlag; 4236 llvm::Value *TaskFlags = 4237 Data.Final.getPointer() 4238 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4239 CGF.Builder.getInt32(FinalFlag), 4240 CGF.Builder.getInt32(/*C=*/0)) 4241 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4242 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4243 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4244 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4245 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4246 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4247 TaskEntry, KmpRoutineEntryPtrTy)}; 4248 llvm::Value *NewTask; 4249 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4250 // Check if we have any device clause associated with the directive. 4251 const Expr *Device = nullptr; 4252 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4253 Device = C->getDevice(); 4254 // Emit device ID if any otherwise use default value. 4255 llvm::Value *DeviceID; 4256 if (Device) 4257 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4258 CGF.Int64Ty, /*isSigned=*/true); 4259 else 4260 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4261 AllocArgs.push_back(DeviceID); 4262 NewTask = CGF.EmitRuntimeCall( 4263 OMPBuilder.getOrCreateRuntimeFunction( 4264 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4265 AllocArgs); 4266 } else { 4267 NewTask = 4268 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4269 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4270 AllocArgs); 4271 } 4272 // Emit detach clause initialization. 4273 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4274 // task_descriptor); 4275 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4276 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4277 LValue EvtLVal = CGF.EmitLValue(Evt); 4278 4279 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4280 // int gtid, kmp_task_t *task); 4281 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4282 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4283 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4284 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4285 OMPBuilder.getOrCreateRuntimeFunction( 4286 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4287 {Loc, Tid, NewTask}); 4288 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4289 Evt->getExprLoc()); 4290 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4291 } 4292 // Process affinity clauses. 4293 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4294 // Process list of affinity data. 4295 ASTContext &C = CGM.getContext(); 4296 Address AffinitiesArray = Address::invalid(); 4297 // Calculate number of elements to form the array of affinity data. 4298 llvm::Value *NumOfElements = nullptr; 4299 unsigned NumAffinities = 0; 4300 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4301 if (const Expr *Modifier = C->getModifier()) { 4302 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4303 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4304 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4305 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4306 NumOfElements = 4307 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4308 } 4309 } else { 4310 NumAffinities += C->varlist_size(); 4311 } 4312 } 4313 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4314 // Fields ids in kmp_task_affinity_info record. 4315 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4316 4317 QualType KmpTaskAffinityInfoArrayTy; 4318 if (NumOfElements) { 4319 NumOfElements = CGF.Builder.CreateNUWAdd( 4320 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4321 OpaqueValueExpr OVE( 4322 Loc, 4323 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4324 VK_RValue); 4325 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4326 RValue::get(NumOfElements)); 4327 KmpTaskAffinityInfoArrayTy = 4328 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4329 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4330 // Properly emit variable-sized array. 4331 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4332 ImplicitParamDecl::Other); 4333 CGF.EmitVarDecl(*PD); 4334 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4335 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4336 /*isSigned=*/false); 4337 } else { 4338 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4339 KmpTaskAffinityInfoTy, 4340 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4341 ArrayType::Normal, /*IndexTypeQuals=*/0); 4342 AffinitiesArray = 4343 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4344 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4345 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4346 /*isSigned=*/false); 4347 } 4348 4349 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4350 // Fill array by elements without iterators. 4351 unsigned Pos = 0; 4352 bool HasIterator = false; 4353 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4354 if (C->getModifier()) { 4355 HasIterator = true; 4356 continue; 4357 } 4358 for (const Expr *E : C->varlists()) { 4359 llvm::Value *Addr; 4360 llvm::Value *Size; 4361 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4362 LValue Base = 4363 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4364 KmpTaskAffinityInfoTy); 4365 // affs[i].base_addr = &<Affinities[i].second>; 4366 LValue BaseAddrLVal = CGF.EmitLValueForField( 4367 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4368 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4369 BaseAddrLVal); 4370 // affs[i].len = sizeof(<Affinities[i].second>); 4371 LValue LenLVal = CGF.EmitLValueForField( 4372 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4373 CGF.EmitStoreOfScalar(Size, LenLVal); 4374 ++Pos; 4375 } 4376 } 4377 LValue PosLVal; 4378 if (HasIterator) { 4379 PosLVal = CGF.MakeAddrLValue( 4380 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4381 C.getSizeType()); 4382 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4383 } 4384 // Process elements with iterators. 4385 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4386 const Expr *Modifier = C->getModifier(); 4387 if (!Modifier) 4388 continue; 4389 OMPIteratorGeneratorScope IteratorScope( 4390 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4391 for (const Expr *E : C->varlists()) { 4392 llvm::Value *Addr; 4393 llvm::Value *Size; 4394 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4395 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4396 LValue Base = CGF.MakeAddrLValue( 4397 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4398 AffinitiesArray.getAlignment()), 4399 KmpTaskAffinityInfoTy); 4400 // affs[i].base_addr = &<Affinities[i].second>; 4401 LValue BaseAddrLVal = CGF.EmitLValueForField( 4402 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4403 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4404 BaseAddrLVal); 4405 // affs[i].len = sizeof(<Affinities[i].second>); 4406 LValue LenLVal = CGF.EmitLValueForField( 4407 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4408 CGF.EmitStoreOfScalar(Size, LenLVal); 4409 Idx = CGF.Builder.CreateNUWAdd( 4410 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4411 CGF.EmitStoreOfScalar(Idx, PosLVal); 4412 } 4413 } 4414 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4415 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4416 // naffins, kmp_task_affinity_info_t *affin_list); 4417 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4418 llvm::Value *GTid = getThreadID(CGF, Loc); 4419 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4420 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4421 // FIXME: Emit the function and ignore its result for now unless the 4422 // runtime function is properly implemented. 4423 (void)CGF.EmitRuntimeCall( 4424 OMPBuilder.getOrCreateRuntimeFunction( 4425 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4426 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4427 } 4428 llvm::Value *NewTaskNewTaskTTy = 4429 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4430 NewTask, KmpTaskTWithPrivatesPtrTy); 4431 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4432 KmpTaskTWithPrivatesQTy); 4433 LValue TDBase = 4434 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4435 // Fill the data in the resulting kmp_task_t record. 4436 // Copy shareds if there are any. 4437 Address KmpTaskSharedsPtr = Address::invalid(); 4438 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4439 KmpTaskSharedsPtr = 4440 Address(CGF.EmitLoadOfScalar( 4441 CGF.EmitLValueForField( 4442 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4443 KmpTaskTShareds)), 4444 Loc), 4445 CGM.getNaturalTypeAlignment(SharedsTy)); 4446 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4447 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4448 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4449 } 4450 // Emit initial values for private copies (if any). 4451 TaskResultTy Result; 4452 if (!Privates.empty()) { 4453 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4454 SharedsTy, SharedsPtrTy, Data, Privates, 4455 /*ForDup=*/false); 4456 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4457 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4458 Result.TaskDupFn = emitTaskDupFunction( 4459 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4460 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4461 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4462 } 4463 } 4464 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4465 enum { Priority = 0, Destructors = 1 }; 4466 // Provide pointer to function with destructors for privates. 4467 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4468 const RecordDecl *KmpCmplrdataUD = 4469 (*FI)->getType()->getAsUnionType()->getDecl(); 4470 if (NeedsCleanup) { 4471 llvm::Value *DestructorFn = emitDestructorsFunction( 4472 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4473 KmpTaskTWithPrivatesQTy); 4474 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4475 LValue DestructorsLV = CGF.EmitLValueForField( 4476 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4477 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4478 DestructorFn, KmpRoutineEntryPtrTy), 4479 DestructorsLV); 4480 } 4481 // Set priority. 4482 if (Data.Priority.getInt()) { 4483 LValue Data2LV = CGF.EmitLValueForField( 4484 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4485 LValue PriorityLV = CGF.EmitLValueForField( 4486 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4487 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4488 } 4489 Result.NewTask = NewTask; 4490 Result.TaskEntry = TaskEntry; 4491 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4492 Result.TDBase = TDBase; 4493 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4494 return Result; 4495 } 4496 4497 namespace { 4498 /// Dependence kind for RTL. 4499 enum RTLDependenceKindTy { 4500 DepIn = 0x01, 4501 DepInOut = 0x3, 4502 DepMutexInOutSet = 0x4 4503 }; 4504 /// Fields ids in kmp_depend_info record. 4505 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4506 } // namespace 4507 4508 /// Translates internal dependency kind into the runtime kind. 4509 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4510 RTLDependenceKindTy DepKind; 4511 switch (K) { 4512 case OMPC_DEPEND_in: 4513 DepKind = DepIn; 4514 break; 4515 // Out and InOut dependencies must use the same code. 4516 case OMPC_DEPEND_out: 4517 case OMPC_DEPEND_inout: 4518 DepKind = DepInOut; 4519 break; 4520 case OMPC_DEPEND_mutexinoutset: 4521 DepKind = DepMutexInOutSet; 4522 break; 4523 case OMPC_DEPEND_source: 4524 case OMPC_DEPEND_sink: 4525 case OMPC_DEPEND_depobj: 4526 case OMPC_DEPEND_unknown: 4527 llvm_unreachable("Unknown task dependence type"); 4528 } 4529 return DepKind; 4530 } 4531 4532 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4533 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4534 QualType &FlagsTy) { 4535 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4536 if (KmpDependInfoTy.isNull()) { 4537 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4538 KmpDependInfoRD->startDefinition(); 4539 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4540 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4541 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4542 KmpDependInfoRD->completeDefinition(); 4543 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4544 } 4545 } 4546 4547 std::pair<llvm::Value *, LValue> 4548 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4549 SourceLocation Loc) { 4550 ASTContext &C = CGM.getContext(); 4551 QualType FlagsTy; 4552 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4553 RecordDecl *KmpDependInfoRD = 4554 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4555 LValue Base = CGF.EmitLoadOfPointerLValue( 4556 DepobjLVal.getAddress(CGF), 4557 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4558 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4559 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4560 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4561 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4562 Base.getTBAAInfo()); 4563 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4564 Addr.getPointer(), 4565 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4566 LValue NumDepsBase = CGF.MakeAddrLValue( 4567 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4568 Base.getBaseInfo(), Base.getTBAAInfo()); 4569 // NumDeps = deps[i].base_addr; 4570 LValue BaseAddrLVal = CGF.EmitLValueForField( 4571 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4572 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4573 return std::make_pair(NumDeps, Base); 4574 } 4575 4576 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4577 llvm::PointerUnion<unsigned *, LValue *> Pos, 4578 const OMPTaskDataTy::DependData &Data, 4579 Address DependenciesArray) { 4580 CodeGenModule &CGM = CGF.CGM; 4581 ASTContext &C = CGM.getContext(); 4582 QualType FlagsTy; 4583 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4584 RecordDecl *KmpDependInfoRD = 4585 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4586 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4587 4588 OMPIteratorGeneratorScope IteratorScope( 4589 CGF, cast_or_null<OMPIteratorExpr>( 4590 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4591 : nullptr)); 4592 for (const Expr *E : Data.DepExprs) { 4593 llvm::Value *Addr; 4594 llvm::Value *Size; 4595 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4596 LValue Base; 4597 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4598 Base = CGF.MakeAddrLValue( 4599 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4600 } else { 4601 LValue &PosLVal = *Pos.get<LValue *>(); 4602 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4603 Base = CGF.MakeAddrLValue( 4604 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4605 DependenciesArray.getAlignment()), 4606 KmpDependInfoTy); 4607 } 4608 // deps[i].base_addr = &<Dependencies[i].second>; 4609 LValue BaseAddrLVal = CGF.EmitLValueForField( 4610 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4611 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4612 BaseAddrLVal); 4613 // deps[i].len = sizeof(<Dependencies[i].second>); 4614 LValue LenLVal = CGF.EmitLValueForField( 4615 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4616 CGF.EmitStoreOfScalar(Size, LenLVal); 4617 // deps[i].flags = <Dependencies[i].first>; 4618 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4619 LValue FlagsLVal = CGF.EmitLValueForField( 4620 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4621 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4622 FlagsLVal); 4623 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4624 ++(*P); 4625 } else { 4626 LValue &PosLVal = *Pos.get<LValue *>(); 4627 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4628 Idx = CGF.Builder.CreateNUWAdd(Idx, 4629 llvm::ConstantInt::get(Idx->getType(), 1)); 4630 CGF.EmitStoreOfScalar(Idx, PosLVal); 4631 } 4632 } 4633 } 4634 4635 static SmallVector<llvm::Value *, 4> 4636 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4637 const OMPTaskDataTy::DependData &Data) { 4638 assert(Data.DepKind == OMPC_DEPEND_depobj && 4639 "Expected depobj dependecy kind."); 4640 SmallVector<llvm::Value *, 4> Sizes; 4641 SmallVector<LValue, 4> SizeLVals; 4642 ASTContext &C = CGF.getContext(); 4643 QualType FlagsTy; 4644 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4645 RecordDecl *KmpDependInfoRD = 4646 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4647 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4648 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4649 { 4650 OMPIteratorGeneratorScope IteratorScope( 4651 CGF, cast_or_null<OMPIteratorExpr>( 4652 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4653 : nullptr)); 4654 for (const Expr *E : Data.DepExprs) { 4655 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4656 LValue Base = CGF.EmitLoadOfPointerLValue( 4657 DepobjLVal.getAddress(CGF), 4658 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4659 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4660 Base.getAddress(CGF), KmpDependInfoPtrT); 4661 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4662 Base.getTBAAInfo()); 4663 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4664 Addr.getPointer(), 4665 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4666 LValue NumDepsBase = CGF.MakeAddrLValue( 4667 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4668 Base.getBaseInfo(), Base.getTBAAInfo()); 4669 // NumDeps = deps[i].base_addr; 4670 LValue BaseAddrLVal = CGF.EmitLValueForField( 4671 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4672 llvm::Value *NumDeps = 4673 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4674 LValue NumLVal = CGF.MakeAddrLValue( 4675 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4676 C.getUIntPtrType()); 4677 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4678 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4679 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4680 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4681 CGF.EmitStoreOfScalar(Add, NumLVal); 4682 SizeLVals.push_back(NumLVal); 4683 } 4684 } 4685 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4686 llvm::Value *Size = 4687 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4688 Sizes.push_back(Size); 4689 } 4690 return Sizes; 4691 } 4692 4693 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4694 LValue PosLVal, 4695 const OMPTaskDataTy::DependData &Data, 4696 Address DependenciesArray) { 4697 assert(Data.DepKind == OMPC_DEPEND_depobj && 4698 "Expected depobj dependecy kind."); 4699 ASTContext &C = CGF.getContext(); 4700 QualType FlagsTy; 4701 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4702 RecordDecl *KmpDependInfoRD = 4703 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4704 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4705 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4706 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4707 { 4708 OMPIteratorGeneratorScope IteratorScope( 4709 CGF, cast_or_null<OMPIteratorExpr>( 4710 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4711 : nullptr)); 4712 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4713 const Expr *E = Data.DepExprs[I]; 4714 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4715 LValue Base = CGF.EmitLoadOfPointerLValue( 4716 DepobjLVal.getAddress(CGF), 4717 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4718 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4719 Base.getAddress(CGF), KmpDependInfoPtrT); 4720 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4721 Base.getTBAAInfo()); 4722 4723 // Get number of elements in a single depobj. 4724 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4725 Addr.getPointer(), 4726 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4727 LValue NumDepsBase = CGF.MakeAddrLValue( 4728 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4729 Base.getBaseInfo(), Base.getTBAAInfo()); 4730 // NumDeps = deps[i].base_addr; 4731 LValue BaseAddrLVal = CGF.EmitLValueForField( 4732 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4733 llvm::Value *NumDeps = 4734 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4735 4736 // memcopy dependency data. 4737 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4738 ElSize, 4739 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4740 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4741 Address DepAddr = 4742 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4743 DependenciesArray.getAlignment()); 4744 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4745 4746 // Increase pos. 4747 // pos += size; 4748 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4749 CGF.EmitStoreOfScalar(Add, PosLVal); 4750 } 4751 } 4752 } 4753 4754 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4755 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4756 SourceLocation Loc) { 4757 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4758 return D.DepExprs.empty(); 4759 })) 4760 return std::make_pair(nullptr, Address::invalid()); 4761 // Process list of dependencies. 4762 ASTContext &C = CGM.getContext(); 4763 Address DependenciesArray = Address::invalid(); 4764 llvm::Value *NumOfElements = nullptr; 4765 unsigned NumDependencies = std::accumulate( 4766 Dependencies.begin(), Dependencies.end(), 0, 4767 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4768 return D.DepKind == OMPC_DEPEND_depobj 4769 ? V 4770 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4771 }); 4772 QualType FlagsTy; 4773 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4774 bool HasDepobjDeps = false; 4775 bool HasRegularWithIterators = false; 4776 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4777 llvm::Value *NumOfRegularWithIterators = 4778 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4779 // Calculate number of depobj dependecies and regular deps with the iterators. 4780 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4781 if (D.DepKind == OMPC_DEPEND_depobj) { 4782 SmallVector<llvm::Value *, 4> Sizes = 4783 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4784 for (llvm::Value *Size : Sizes) { 4785 NumOfDepobjElements = 4786 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4787 } 4788 HasDepobjDeps = true; 4789 continue; 4790 } 4791 // Include number of iterations, if any. 4792 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4793 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4794 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4795 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4796 NumOfRegularWithIterators = 4797 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4798 } 4799 HasRegularWithIterators = true; 4800 continue; 4801 } 4802 } 4803 4804 QualType KmpDependInfoArrayTy; 4805 if (HasDepobjDeps || HasRegularWithIterators) { 4806 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4807 /*isSigned=*/false); 4808 if (HasDepobjDeps) { 4809 NumOfElements = 4810 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4811 } 4812 if (HasRegularWithIterators) { 4813 NumOfElements = 4814 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4815 } 4816 OpaqueValueExpr OVE(Loc, 4817 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4818 VK_RValue); 4819 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4820 RValue::get(NumOfElements)); 4821 KmpDependInfoArrayTy = 4822 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4823 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4824 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4825 // Properly emit variable-sized array. 4826 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4827 ImplicitParamDecl::Other); 4828 CGF.EmitVarDecl(*PD); 4829 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4830 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4831 /*isSigned=*/false); 4832 } else { 4833 KmpDependInfoArrayTy = C.getConstantArrayType( 4834 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4835 ArrayType::Normal, /*IndexTypeQuals=*/0); 4836 DependenciesArray = 4837 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4838 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4839 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4840 /*isSigned=*/false); 4841 } 4842 unsigned Pos = 0; 4843 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4844 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4845 Dependencies[I].IteratorExpr) 4846 continue; 4847 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4848 DependenciesArray); 4849 } 4850 // Copy regular dependecies with iterators. 4851 LValue PosLVal = CGF.MakeAddrLValue( 4852 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4853 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4854 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4855 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4856 !Dependencies[I].IteratorExpr) 4857 continue; 4858 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4859 DependenciesArray); 4860 } 4861 // Copy final depobj arrays without iterators. 4862 if (HasDepobjDeps) { 4863 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4864 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4865 continue; 4866 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4867 DependenciesArray); 4868 } 4869 } 4870 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4871 DependenciesArray, CGF.VoidPtrTy); 4872 return std::make_pair(NumOfElements, DependenciesArray); 4873 } 4874 4875 Address CGOpenMPRuntime::emitDepobjDependClause( 4876 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4877 SourceLocation Loc) { 4878 if (Dependencies.DepExprs.empty()) 4879 return Address::invalid(); 4880 // Process list of dependencies. 4881 ASTContext &C = CGM.getContext(); 4882 Address DependenciesArray = Address::invalid(); 4883 unsigned NumDependencies = Dependencies.DepExprs.size(); 4884 QualType FlagsTy; 4885 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4886 RecordDecl *KmpDependInfoRD = 4887 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4888 4889 llvm::Value *Size; 4890 // Define type kmp_depend_info[<Dependencies.size()>]; 4891 // For depobj reserve one extra element to store the number of elements. 4892 // It is required to handle depobj(x) update(in) construct. 4893 // kmp_depend_info[<Dependencies.size()>] deps; 4894 llvm::Value *NumDepsVal; 4895 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4896 if (const auto *IE = 4897 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4898 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4899 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4900 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4901 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4902 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4903 } 4904 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4905 NumDepsVal); 4906 CharUnits SizeInBytes = 4907 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4908 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4909 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4910 NumDepsVal = 4911 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4912 } else { 4913 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4914 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4915 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4916 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4917 Size = CGM.getSize(Sz.alignTo(Align)); 4918 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4919 } 4920 // Need to allocate on the dynamic memory. 4921 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4922 // Use default allocator. 4923 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4924 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4925 4926 llvm::Value *Addr = 4927 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4928 CGM.getModule(), OMPRTL___kmpc_alloc), 4929 Args, ".dep.arr.addr"); 4930 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4931 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4932 DependenciesArray = Address(Addr, Align); 4933 // Write number of elements in the first element of array for depobj. 4934 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4935 // deps[i].base_addr = NumDependencies; 4936 LValue BaseAddrLVal = CGF.EmitLValueForField( 4937 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4938 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4939 llvm::PointerUnion<unsigned *, LValue *> Pos; 4940 unsigned Idx = 1; 4941 LValue PosLVal; 4942 if (Dependencies.IteratorExpr) { 4943 PosLVal = CGF.MakeAddrLValue( 4944 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4945 C.getSizeType()); 4946 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4947 /*IsInit=*/true); 4948 Pos = &PosLVal; 4949 } else { 4950 Pos = &Idx; 4951 } 4952 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4953 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4954 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4955 return DependenciesArray; 4956 } 4957 4958 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4959 SourceLocation Loc) { 4960 ASTContext &C = CGM.getContext(); 4961 QualType FlagsTy; 4962 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4963 LValue Base = CGF.EmitLoadOfPointerLValue( 4964 DepobjLVal.getAddress(CGF), 4965 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4966 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4967 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4968 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4969 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4970 Addr.getPointer(), 4971 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4972 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4973 CGF.VoidPtrTy); 4974 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4975 // Use default allocator. 4976 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4977 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4978 4979 // _kmpc_free(gtid, addr, nullptr); 4980 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4981 CGM.getModule(), OMPRTL___kmpc_free), 4982 Args); 4983 } 4984 4985 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4986 OpenMPDependClauseKind NewDepKind, 4987 SourceLocation Loc) { 4988 ASTContext &C = CGM.getContext(); 4989 QualType FlagsTy; 4990 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4991 RecordDecl *KmpDependInfoRD = 4992 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4993 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4994 llvm::Value *NumDeps; 4995 LValue Base; 4996 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4997 4998 Address Begin = Base.getAddress(CGF); 4999 // Cast from pointer to array type to pointer to single element. 5000 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5001 // The basic structure here is a while-do loop. 5002 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5003 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5004 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5005 CGF.EmitBlock(BodyBB); 5006 llvm::PHINode *ElementPHI = 5007 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5008 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5009 Begin = Address(ElementPHI, Begin.getAlignment()); 5010 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5011 Base.getTBAAInfo()); 5012 // deps[i].flags = NewDepKind; 5013 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5014 LValue FlagsLVal = CGF.EmitLValueForField( 5015 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5016 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5017 FlagsLVal); 5018 5019 // Shift the address forward by one element. 5020 Address ElementNext = 5021 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5022 ElementPHI->addIncoming(ElementNext.getPointer(), 5023 CGF.Builder.GetInsertBlock()); 5024 llvm::Value *IsEmpty = 5025 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5026 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5027 // Done. 5028 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5029 } 5030 5031 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5032 const OMPExecutableDirective &D, 5033 llvm::Function *TaskFunction, 5034 QualType SharedsTy, Address Shareds, 5035 const Expr *IfCond, 5036 const OMPTaskDataTy &Data) { 5037 if (!CGF.HaveInsertPoint()) 5038 return; 5039 5040 TaskResultTy Result = 5041 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5042 llvm::Value *NewTask = Result.NewTask; 5043 llvm::Function *TaskEntry = Result.TaskEntry; 5044 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5045 LValue TDBase = Result.TDBase; 5046 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5047 // Process list of dependences. 5048 Address DependenciesArray = Address::invalid(); 5049 llvm::Value *NumOfElements; 5050 std::tie(NumOfElements, DependenciesArray) = 5051 emitDependClause(CGF, Data.Dependences, Loc); 5052 5053 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5054 // libcall. 5055 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5056 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5057 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5058 // list is not empty 5059 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5060 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5061 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5062 llvm::Value *DepTaskArgs[7]; 5063 if (!Data.Dependences.empty()) { 5064 DepTaskArgs[0] = UpLoc; 5065 DepTaskArgs[1] = ThreadID; 5066 DepTaskArgs[2] = NewTask; 5067 DepTaskArgs[3] = NumOfElements; 5068 DepTaskArgs[4] = DependenciesArray.getPointer(); 5069 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5070 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5071 } 5072 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5073 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5074 if (!Data.Tied) { 5075 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5076 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5077 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5078 } 5079 if (!Data.Dependences.empty()) { 5080 CGF.EmitRuntimeCall( 5081 OMPBuilder.getOrCreateRuntimeFunction( 5082 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5083 DepTaskArgs); 5084 } else { 5085 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5086 CGM.getModule(), OMPRTL___kmpc_omp_task), 5087 TaskArgs); 5088 } 5089 // Check if parent region is untied and build return for untied task; 5090 if (auto *Region = 5091 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5092 Region->emitUntiedSwitch(CGF); 5093 }; 5094 5095 llvm::Value *DepWaitTaskArgs[6]; 5096 if (!Data.Dependences.empty()) { 5097 DepWaitTaskArgs[0] = UpLoc; 5098 DepWaitTaskArgs[1] = ThreadID; 5099 DepWaitTaskArgs[2] = NumOfElements; 5100 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5101 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5102 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5103 } 5104 auto &M = CGM.getModule(); 5105 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5106 TaskEntry, &Data, &DepWaitTaskArgs, 5107 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5108 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5109 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5110 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5111 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5112 // is specified. 5113 if (!Data.Dependences.empty()) 5114 CGF.EmitRuntimeCall( 5115 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5116 DepWaitTaskArgs); 5117 // Call proxy_task_entry(gtid, new_task); 5118 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5119 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5120 Action.Enter(CGF); 5121 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5122 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5123 OutlinedFnArgs); 5124 }; 5125 5126 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5127 // kmp_task_t *new_task); 5128 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5129 // kmp_task_t *new_task); 5130 RegionCodeGenTy RCG(CodeGen); 5131 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5132 M, OMPRTL___kmpc_omp_task_begin_if0), 5133 TaskArgs, 5134 OMPBuilder.getOrCreateRuntimeFunction( 5135 M, OMPRTL___kmpc_omp_task_complete_if0), 5136 TaskArgs); 5137 RCG.setAction(Action); 5138 RCG(CGF); 5139 }; 5140 5141 if (IfCond) { 5142 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5143 } else { 5144 RegionCodeGenTy ThenRCG(ThenCodeGen); 5145 ThenRCG(CGF); 5146 } 5147 } 5148 5149 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5150 const OMPLoopDirective &D, 5151 llvm::Function *TaskFunction, 5152 QualType SharedsTy, Address Shareds, 5153 const Expr *IfCond, 5154 const OMPTaskDataTy &Data) { 5155 if (!CGF.HaveInsertPoint()) 5156 return; 5157 TaskResultTy Result = 5158 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5159 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5160 // libcall. 5161 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5162 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5163 // sched, kmp_uint64 grainsize, void *task_dup); 5164 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5165 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5166 llvm::Value *IfVal; 5167 if (IfCond) { 5168 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5169 /*isSigned=*/true); 5170 } else { 5171 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5172 } 5173 5174 LValue LBLVal = CGF.EmitLValueForField( 5175 Result.TDBase, 5176 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5177 const auto *LBVar = 5178 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5179 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5180 LBLVal.getQuals(), 5181 /*IsInitializer=*/true); 5182 LValue UBLVal = CGF.EmitLValueForField( 5183 Result.TDBase, 5184 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5185 const auto *UBVar = 5186 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5187 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5188 UBLVal.getQuals(), 5189 /*IsInitializer=*/true); 5190 LValue StLVal = CGF.EmitLValueForField( 5191 Result.TDBase, 5192 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5193 const auto *StVar = 5194 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5195 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5196 StLVal.getQuals(), 5197 /*IsInitializer=*/true); 5198 // Store reductions address. 5199 LValue RedLVal = CGF.EmitLValueForField( 5200 Result.TDBase, 5201 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5202 if (Data.Reductions) { 5203 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5204 } else { 5205 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5206 CGF.getContext().VoidPtrTy); 5207 } 5208 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5209 llvm::Value *TaskArgs[] = { 5210 UpLoc, 5211 ThreadID, 5212 Result.NewTask, 5213 IfVal, 5214 LBLVal.getPointer(CGF), 5215 UBLVal.getPointer(CGF), 5216 CGF.EmitLoadOfScalar(StLVal, Loc), 5217 llvm::ConstantInt::getSigned( 5218 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5219 llvm::ConstantInt::getSigned( 5220 CGF.IntTy, Data.Schedule.getPointer() 5221 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5222 : NoSchedule), 5223 Data.Schedule.getPointer() 5224 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5225 /*isSigned=*/false) 5226 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5227 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5228 Result.TaskDupFn, CGF.VoidPtrTy) 5229 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5230 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5231 CGM.getModule(), OMPRTL___kmpc_taskloop), 5232 TaskArgs); 5233 } 5234 5235 /// Emit reduction operation for each element of array (required for 5236 /// array sections) LHS op = RHS. 5237 /// \param Type Type of array. 5238 /// \param LHSVar Variable on the left side of the reduction operation 5239 /// (references element of array in original variable). 5240 /// \param RHSVar Variable on the right side of the reduction operation 5241 /// (references element of array in original variable). 5242 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5243 /// RHSVar. 5244 static void EmitOMPAggregateReduction( 5245 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5246 const VarDecl *RHSVar, 5247 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5248 const Expr *, const Expr *)> &RedOpGen, 5249 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5250 const Expr *UpExpr = nullptr) { 5251 // Perform element-by-element initialization. 5252 QualType ElementTy; 5253 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5254 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5255 5256 // Drill down to the base element type on both arrays. 5257 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5258 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5259 5260 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5261 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5262 // Cast from pointer to array type to pointer to single element. 5263 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5264 // The basic structure here is a while-do loop. 5265 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5266 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5267 llvm::Value *IsEmpty = 5268 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5269 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5270 5271 // Enter the loop body, making that address the current address. 5272 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5273 CGF.EmitBlock(BodyBB); 5274 5275 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5276 5277 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5278 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5279 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5280 Address RHSElementCurrent = 5281 Address(RHSElementPHI, 5282 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5283 5284 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5285 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5286 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5287 Address LHSElementCurrent = 5288 Address(LHSElementPHI, 5289 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5290 5291 // Emit copy. 5292 CodeGenFunction::OMPPrivateScope Scope(CGF); 5293 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5294 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5295 Scope.Privatize(); 5296 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5297 Scope.ForceCleanup(); 5298 5299 // Shift the address forward by one element. 5300 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5301 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5302 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5303 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5304 // Check whether we've reached the end. 5305 llvm::Value *Done = 5306 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5307 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5308 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5309 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5310 5311 // Done. 5312 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5313 } 5314 5315 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5316 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5317 /// UDR combiner function. 5318 static void emitReductionCombiner(CodeGenFunction &CGF, 5319 const Expr *ReductionOp) { 5320 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5321 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5322 if (const auto *DRE = 5323 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5324 if (const auto *DRD = 5325 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5326 std::pair<llvm::Function *, llvm::Function *> Reduction = 5327 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5328 RValue Func = RValue::get(Reduction.first); 5329 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5330 CGF.EmitIgnoredExpr(ReductionOp); 5331 return; 5332 } 5333 CGF.EmitIgnoredExpr(ReductionOp); 5334 } 5335 5336 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5337 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5338 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5339 ArrayRef<const Expr *> ReductionOps) { 5340 ASTContext &C = CGM.getContext(); 5341 5342 // void reduction_func(void *LHSArg, void *RHSArg); 5343 FunctionArgList Args; 5344 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5345 ImplicitParamDecl::Other); 5346 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5347 ImplicitParamDecl::Other); 5348 Args.push_back(&LHSArg); 5349 Args.push_back(&RHSArg); 5350 const auto &CGFI = 5351 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5352 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5353 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5354 llvm::GlobalValue::InternalLinkage, Name, 5355 &CGM.getModule()); 5356 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5357 Fn->setDoesNotRecurse(); 5358 CodeGenFunction CGF(CGM); 5359 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5360 5361 // Dst = (void*[n])(LHSArg); 5362 // Src = (void*[n])(RHSArg); 5363 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5364 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5365 ArgsType), CGF.getPointerAlign()); 5366 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5367 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5368 ArgsType), CGF.getPointerAlign()); 5369 5370 // ... 5371 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5372 // ... 5373 CodeGenFunction::OMPPrivateScope Scope(CGF); 5374 auto IPriv = Privates.begin(); 5375 unsigned Idx = 0; 5376 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5377 const auto *RHSVar = 5378 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5379 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5380 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5381 }); 5382 const auto *LHSVar = 5383 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5384 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5385 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5386 }); 5387 QualType PrivTy = (*IPriv)->getType(); 5388 if (PrivTy->isVariablyModifiedType()) { 5389 // Get array size and emit VLA type. 5390 ++Idx; 5391 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5392 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5393 const VariableArrayType *VLA = 5394 CGF.getContext().getAsVariableArrayType(PrivTy); 5395 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5396 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5397 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5398 CGF.EmitVariablyModifiedType(PrivTy); 5399 } 5400 } 5401 Scope.Privatize(); 5402 IPriv = Privates.begin(); 5403 auto ILHS = LHSExprs.begin(); 5404 auto IRHS = RHSExprs.begin(); 5405 for (const Expr *E : ReductionOps) { 5406 if ((*IPriv)->getType()->isArrayType()) { 5407 // Emit reduction for array section. 5408 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5409 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5410 EmitOMPAggregateReduction( 5411 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5412 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5413 emitReductionCombiner(CGF, E); 5414 }); 5415 } else { 5416 // Emit reduction for array subscript or single variable. 5417 emitReductionCombiner(CGF, E); 5418 } 5419 ++IPriv; 5420 ++ILHS; 5421 ++IRHS; 5422 } 5423 Scope.ForceCleanup(); 5424 CGF.FinishFunction(); 5425 return Fn; 5426 } 5427 5428 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5429 const Expr *ReductionOp, 5430 const Expr *PrivateRef, 5431 const DeclRefExpr *LHS, 5432 const DeclRefExpr *RHS) { 5433 if (PrivateRef->getType()->isArrayType()) { 5434 // Emit reduction for array section. 5435 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5436 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5437 EmitOMPAggregateReduction( 5438 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5439 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5440 emitReductionCombiner(CGF, ReductionOp); 5441 }); 5442 } else { 5443 // Emit reduction for array subscript or single variable. 5444 emitReductionCombiner(CGF, ReductionOp); 5445 } 5446 } 5447 5448 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5449 ArrayRef<const Expr *> Privates, 5450 ArrayRef<const Expr *> LHSExprs, 5451 ArrayRef<const Expr *> RHSExprs, 5452 ArrayRef<const Expr *> ReductionOps, 5453 ReductionOptionsTy Options) { 5454 if (!CGF.HaveInsertPoint()) 5455 return; 5456 5457 bool WithNowait = Options.WithNowait; 5458 bool SimpleReduction = Options.SimpleReduction; 5459 5460 // Next code should be emitted for reduction: 5461 // 5462 // static kmp_critical_name lock = { 0 }; 5463 // 5464 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5465 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5466 // ... 5467 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5468 // *(Type<n>-1*)rhs[<n>-1]); 5469 // } 5470 // 5471 // ... 5472 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5473 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5474 // RedList, reduce_func, &<lock>)) { 5475 // case 1: 5476 // ... 5477 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5478 // ... 5479 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5480 // break; 5481 // case 2: 5482 // ... 5483 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5484 // ... 5485 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5486 // break; 5487 // default:; 5488 // } 5489 // 5490 // if SimpleReduction is true, only the next code is generated: 5491 // ... 5492 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5493 // ... 5494 5495 ASTContext &C = CGM.getContext(); 5496 5497 if (SimpleReduction) { 5498 CodeGenFunction::RunCleanupsScope Scope(CGF); 5499 auto IPriv = Privates.begin(); 5500 auto ILHS = LHSExprs.begin(); 5501 auto IRHS = RHSExprs.begin(); 5502 for (const Expr *E : ReductionOps) { 5503 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5504 cast<DeclRefExpr>(*IRHS)); 5505 ++IPriv; 5506 ++ILHS; 5507 ++IRHS; 5508 } 5509 return; 5510 } 5511 5512 // 1. Build a list of reduction variables. 5513 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5514 auto Size = RHSExprs.size(); 5515 for (const Expr *E : Privates) { 5516 if (E->getType()->isVariablyModifiedType()) 5517 // Reserve place for array size. 5518 ++Size; 5519 } 5520 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5521 QualType ReductionArrayTy = 5522 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5523 /*IndexTypeQuals=*/0); 5524 Address ReductionList = 5525 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5526 auto IPriv = Privates.begin(); 5527 unsigned Idx = 0; 5528 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5529 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5530 CGF.Builder.CreateStore( 5531 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5532 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5533 Elem); 5534 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5535 // Store array size. 5536 ++Idx; 5537 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5538 llvm::Value *Size = CGF.Builder.CreateIntCast( 5539 CGF.getVLASize( 5540 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5541 .NumElts, 5542 CGF.SizeTy, /*isSigned=*/false); 5543 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5544 Elem); 5545 } 5546 } 5547 5548 // 2. Emit reduce_func(). 5549 llvm::Function *ReductionFn = emitReductionFunction( 5550 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5551 LHSExprs, RHSExprs, ReductionOps); 5552 5553 // 3. Create static kmp_critical_name lock = { 0 }; 5554 std::string Name = getName({"reduction"}); 5555 llvm::Value *Lock = getCriticalRegionLock(Name); 5556 5557 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5558 // RedList, reduce_func, &<lock>); 5559 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5560 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5561 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5562 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5563 ReductionList.getPointer(), CGF.VoidPtrTy); 5564 llvm::Value *Args[] = { 5565 IdentTLoc, // ident_t *<loc> 5566 ThreadId, // i32 <gtid> 5567 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5568 ReductionArrayTySize, // size_type sizeof(RedList) 5569 RL, // void *RedList 5570 ReductionFn, // void (*) (void *, void *) <reduce_func> 5571 Lock // kmp_critical_name *&<lock> 5572 }; 5573 llvm::Value *Res = CGF.EmitRuntimeCall( 5574 OMPBuilder.getOrCreateRuntimeFunction( 5575 CGM.getModule(), 5576 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5577 Args); 5578 5579 // 5. Build switch(res) 5580 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5581 llvm::SwitchInst *SwInst = 5582 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5583 5584 // 6. Build case 1: 5585 // ... 5586 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5587 // ... 5588 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5589 // break; 5590 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5591 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5592 CGF.EmitBlock(Case1BB); 5593 5594 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5595 llvm::Value *EndArgs[] = { 5596 IdentTLoc, // ident_t *<loc> 5597 ThreadId, // i32 <gtid> 5598 Lock // kmp_critical_name *&<lock> 5599 }; 5600 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5601 CodeGenFunction &CGF, PrePostActionTy &Action) { 5602 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5603 auto IPriv = Privates.begin(); 5604 auto ILHS = LHSExprs.begin(); 5605 auto IRHS = RHSExprs.begin(); 5606 for (const Expr *E : ReductionOps) { 5607 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5608 cast<DeclRefExpr>(*IRHS)); 5609 ++IPriv; 5610 ++ILHS; 5611 ++IRHS; 5612 } 5613 }; 5614 RegionCodeGenTy RCG(CodeGen); 5615 CommonActionTy Action( 5616 nullptr, llvm::None, 5617 OMPBuilder.getOrCreateRuntimeFunction( 5618 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5619 : OMPRTL___kmpc_end_reduce), 5620 EndArgs); 5621 RCG.setAction(Action); 5622 RCG(CGF); 5623 5624 CGF.EmitBranch(DefaultBB); 5625 5626 // 7. Build case 2: 5627 // ... 5628 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5629 // ... 5630 // break; 5631 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5632 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5633 CGF.EmitBlock(Case2BB); 5634 5635 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5636 CodeGenFunction &CGF, PrePostActionTy &Action) { 5637 auto ILHS = LHSExprs.begin(); 5638 auto IRHS = RHSExprs.begin(); 5639 auto IPriv = Privates.begin(); 5640 for (const Expr *E : ReductionOps) { 5641 const Expr *XExpr = nullptr; 5642 const Expr *EExpr = nullptr; 5643 const Expr *UpExpr = nullptr; 5644 BinaryOperatorKind BO = BO_Comma; 5645 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5646 if (BO->getOpcode() == BO_Assign) { 5647 XExpr = BO->getLHS(); 5648 UpExpr = BO->getRHS(); 5649 } 5650 } 5651 // Try to emit update expression as a simple atomic. 5652 const Expr *RHSExpr = UpExpr; 5653 if (RHSExpr) { 5654 // Analyze RHS part of the whole expression. 5655 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5656 RHSExpr->IgnoreParenImpCasts())) { 5657 // If this is a conditional operator, analyze its condition for 5658 // min/max reduction operator. 5659 RHSExpr = ACO->getCond(); 5660 } 5661 if (const auto *BORHS = 5662 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5663 EExpr = BORHS->getRHS(); 5664 BO = BORHS->getOpcode(); 5665 } 5666 } 5667 if (XExpr) { 5668 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5669 auto &&AtomicRedGen = [BO, VD, 5670 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5671 const Expr *EExpr, const Expr *UpExpr) { 5672 LValue X = CGF.EmitLValue(XExpr); 5673 RValue E; 5674 if (EExpr) 5675 E = CGF.EmitAnyExpr(EExpr); 5676 CGF.EmitOMPAtomicSimpleUpdateExpr( 5677 X, E, BO, /*IsXLHSInRHSPart=*/true, 5678 llvm::AtomicOrdering::Monotonic, Loc, 5679 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5680 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5681 PrivateScope.addPrivate( 5682 VD, [&CGF, VD, XRValue, Loc]() { 5683 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5684 CGF.emitOMPSimpleStore( 5685 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5686 VD->getType().getNonReferenceType(), Loc); 5687 return LHSTemp; 5688 }); 5689 (void)PrivateScope.Privatize(); 5690 return CGF.EmitAnyExpr(UpExpr); 5691 }); 5692 }; 5693 if ((*IPriv)->getType()->isArrayType()) { 5694 // Emit atomic reduction for array section. 5695 const auto *RHSVar = 5696 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5697 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5698 AtomicRedGen, XExpr, EExpr, UpExpr); 5699 } else { 5700 // Emit atomic reduction for array subscript or single variable. 5701 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5702 } 5703 } else { 5704 // Emit as a critical region. 5705 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5706 const Expr *, const Expr *) { 5707 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5708 std::string Name = RT.getName({"atomic_reduction"}); 5709 RT.emitCriticalRegion( 5710 CGF, Name, 5711 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5712 Action.Enter(CGF); 5713 emitReductionCombiner(CGF, E); 5714 }, 5715 Loc); 5716 }; 5717 if ((*IPriv)->getType()->isArrayType()) { 5718 const auto *LHSVar = 5719 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5720 const auto *RHSVar = 5721 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5722 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5723 CritRedGen); 5724 } else { 5725 CritRedGen(CGF, nullptr, nullptr, nullptr); 5726 } 5727 } 5728 ++ILHS; 5729 ++IRHS; 5730 ++IPriv; 5731 } 5732 }; 5733 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5734 if (!WithNowait) { 5735 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5736 llvm::Value *EndArgs[] = { 5737 IdentTLoc, // ident_t *<loc> 5738 ThreadId, // i32 <gtid> 5739 Lock // kmp_critical_name *&<lock> 5740 }; 5741 CommonActionTy Action(nullptr, llvm::None, 5742 OMPBuilder.getOrCreateRuntimeFunction( 5743 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5744 EndArgs); 5745 AtomicRCG.setAction(Action); 5746 AtomicRCG(CGF); 5747 } else { 5748 AtomicRCG(CGF); 5749 } 5750 5751 CGF.EmitBranch(DefaultBB); 5752 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5753 } 5754 5755 /// Generates unique name for artificial threadprivate variables. 5756 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5757 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5758 const Expr *Ref) { 5759 SmallString<256> Buffer; 5760 llvm::raw_svector_ostream Out(Buffer); 5761 const clang::DeclRefExpr *DE; 5762 const VarDecl *D = ::getBaseDecl(Ref, DE); 5763 if (!D) 5764 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5765 D = D->getCanonicalDecl(); 5766 std::string Name = CGM.getOpenMPRuntime().getName( 5767 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5768 Out << Prefix << Name << "_" 5769 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5770 return std::string(Out.str()); 5771 } 5772 5773 /// Emits reduction initializer function: 5774 /// \code 5775 /// void @.red_init(void* %arg, void* %orig) { 5776 /// %0 = bitcast void* %arg to <type>* 5777 /// store <type> <init>, <type>* %0 5778 /// ret void 5779 /// } 5780 /// \endcode 5781 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5782 SourceLocation Loc, 5783 ReductionCodeGen &RCG, unsigned N) { 5784 ASTContext &C = CGM.getContext(); 5785 QualType VoidPtrTy = C.VoidPtrTy; 5786 VoidPtrTy.addRestrict(); 5787 FunctionArgList Args; 5788 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5789 ImplicitParamDecl::Other); 5790 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5791 ImplicitParamDecl::Other); 5792 Args.emplace_back(&Param); 5793 Args.emplace_back(&ParamOrig); 5794 const auto &FnInfo = 5795 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5796 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5797 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5798 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5799 Name, &CGM.getModule()); 5800 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5801 Fn->setDoesNotRecurse(); 5802 CodeGenFunction CGF(CGM); 5803 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5804 Address PrivateAddr = CGF.EmitLoadOfPointer( 5805 CGF.GetAddrOfLocalVar(&Param), 5806 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5807 llvm::Value *Size = nullptr; 5808 // If the size of the reduction item is non-constant, load it from global 5809 // threadprivate variable. 5810 if (RCG.getSizes(N).second) { 5811 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5812 CGF, CGM.getContext().getSizeType(), 5813 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5814 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5815 CGM.getContext().getSizeType(), Loc); 5816 } 5817 RCG.emitAggregateType(CGF, N, Size); 5818 LValue OrigLVal; 5819 // If initializer uses initializer from declare reduction construct, emit a 5820 // pointer to the address of the original reduction item (reuired by reduction 5821 // initializer) 5822 if (RCG.usesReductionInitializer(N)) { 5823 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5824 SharedAddr = CGF.EmitLoadOfPointer( 5825 SharedAddr, 5826 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5827 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5828 } else { 5829 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5830 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5831 CGM.getContext().VoidPtrTy); 5832 } 5833 // Emit the initializer: 5834 // %0 = bitcast void* %arg to <type>* 5835 // store <type> <init>, <type>* %0 5836 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5837 [](CodeGenFunction &) { return false; }); 5838 CGF.FinishFunction(); 5839 return Fn; 5840 } 5841 5842 /// Emits reduction combiner function: 5843 /// \code 5844 /// void @.red_comb(void* %arg0, void* %arg1) { 5845 /// %lhs = bitcast void* %arg0 to <type>* 5846 /// %rhs = bitcast void* %arg1 to <type>* 5847 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5848 /// store <type> %2, <type>* %lhs 5849 /// ret void 5850 /// } 5851 /// \endcode 5852 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5853 SourceLocation Loc, 5854 ReductionCodeGen &RCG, unsigned N, 5855 const Expr *ReductionOp, 5856 const Expr *LHS, const Expr *RHS, 5857 const Expr *PrivateRef) { 5858 ASTContext &C = CGM.getContext(); 5859 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5860 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5861 FunctionArgList Args; 5862 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5863 C.VoidPtrTy, ImplicitParamDecl::Other); 5864 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5865 ImplicitParamDecl::Other); 5866 Args.emplace_back(&ParamInOut); 5867 Args.emplace_back(&ParamIn); 5868 const auto &FnInfo = 5869 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5870 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5871 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5872 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5873 Name, &CGM.getModule()); 5874 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5875 Fn->setDoesNotRecurse(); 5876 CodeGenFunction CGF(CGM); 5877 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5878 llvm::Value *Size = nullptr; 5879 // If the size of the reduction item is non-constant, load it from global 5880 // threadprivate variable. 5881 if (RCG.getSizes(N).second) { 5882 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5883 CGF, CGM.getContext().getSizeType(), 5884 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5885 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5886 CGM.getContext().getSizeType(), Loc); 5887 } 5888 RCG.emitAggregateType(CGF, N, Size); 5889 // Remap lhs and rhs variables to the addresses of the function arguments. 5890 // %lhs = bitcast void* %arg0 to <type>* 5891 // %rhs = bitcast void* %arg1 to <type>* 5892 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5893 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5894 // Pull out the pointer to the variable. 5895 Address PtrAddr = CGF.EmitLoadOfPointer( 5896 CGF.GetAddrOfLocalVar(&ParamInOut), 5897 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5898 return CGF.Builder.CreateElementBitCast( 5899 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5900 }); 5901 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5902 // Pull out the pointer to the variable. 5903 Address PtrAddr = CGF.EmitLoadOfPointer( 5904 CGF.GetAddrOfLocalVar(&ParamIn), 5905 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5906 return CGF.Builder.CreateElementBitCast( 5907 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5908 }); 5909 PrivateScope.Privatize(); 5910 // Emit the combiner body: 5911 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5912 // store <type> %2, <type>* %lhs 5913 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5914 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5915 cast<DeclRefExpr>(RHS)); 5916 CGF.FinishFunction(); 5917 return Fn; 5918 } 5919 5920 /// Emits reduction finalizer function: 5921 /// \code 5922 /// void @.red_fini(void* %arg) { 5923 /// %0 = bitcast void* %arg to <type>* 5924 /// <destroy>(<type>* %0) 5925 /// ret void 5926 /// } 5927 /// \endcode 5928 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5929 SourceLocation Loc, 5930 ReductionCodeGen &RCG, unsigned N) { 5931 if (!RCG.needCleanups(N)) 5932 return nullptr; 5933 ASTContext &C = CGM.getContext(); 5934 FunctionArgList Args; 5935 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5936 ImplicitParamDecl::Other); 5937 Args.emplace_back(&Param); 5938 const auto &FnInfo = 5939 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5940 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5941 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5942 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5943 Name, &CGM.getModule()); 5944 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5945 Fn->setDoesNotRecurse(); 5946 CodeGenFunction CGF(CGM); 5947 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5948 Address PrivateAddr = CGF.EmitLoadOfPointer( 5949 CGF.GetAddrOfLocalVar(&Param), 5950 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5951 llvm::Value *Size = nullptr; 5952 // If the size of the reduction item is non-constant, load it from global 5953 // threadprivate variable. 5954 if (RCG.getSizes(N).second) { 5955 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5956 CGF, CGM.getContext().getSizeType(), 5957 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5958 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5959 CGM.getContext().getSizeType(), Loc); 5960 } 5961 RCG.emitAggregateType(CGF, N, Size); 5962 // Emit the finalizer body: 5963 // <destroy>(<type>* %0) 5964 RCG.emitCleanups(CGF, N, PrivateAddr); 5965 CGF.FinishFunction(Loc); 5966 return Fn; 5967 } 5968 5969 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5970 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5971 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5972 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5973 return nullptr; 5974 5975 // Build typedef struct: 5976 // kmp_taskred_input { 5977 // void *reduce_shar; // shared reduction item 5978 // void *reduce_orig; // original reduction item used for initialization 5979 // size_t reduce_size; // size of data item 5980 // void *reduce_init; // data initialization routine 5981 // void *reduce_fini; // data finalization routine 5982 // void *reduce_comb; // data combiner routine 5983 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5984 // } kmp_taskred_input_t; 5985 ASTContext &C = CGM.getContext(); 5986 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5987 RD->startDefinition(); 5988 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5989 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5990 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5991 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5992 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5993 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5994 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5995 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5996 RD->completeDefinition(); 5997 QualType RDType = C.getRecordType(RD); 5998 unsigned Size = Data.ReductionVars.size(); 5999 llvm::APInt ArraySize(/*numBits=*/64, Size); 6000 QualType ArrayRDType = C.getConstantArrayType( 6001 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6002 // kmp_task_red_input_t .rd_input.[Size]; 6003 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6004 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6005 Data.ReductionCopies, Data.ReductionOps); 6006 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6007 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6008 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6009 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6010 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6011 TaskRedInput.getPointer(), Idxs, 6012 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6013 ".rd_input.gep."); 6014 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6015 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6016 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6017 RCG.emitSharedOrigLValue(CGF, Cnt); 6018 llvm::Value *CastedShared = 6019 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6020 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6021 // ElemLVal.reduce_orig = &Origs[Cnt]; 6022 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6023 llvm::Value *CastedOrig = 6024 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6025 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6026 RCG.emitAggregateType(CGF, Cnt); 6027 llvm::Value *SizeValInChars; 6028 llvm::Value *SizeVal; 6029 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6030 // We use delayed creation/initialization for VLAs and array sections. It is 6031 // required because runtime does not provide the way to pass the sizes of 6032 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6033 // threadprivate global variables are used to store these values and use 6034 // them in the functions. 6035 bool DelayedCreation = !!SizeVal; 6036 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6037 /*isSigned=*/false); 6038 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6039 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6040 // ElemLVal.reduce_init = init; 6041 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6042 llvm::Value *InitAddr = 6043 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6044 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6045 // ElemLVal.reduce_fini = fini; 6046 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6047 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6048 llvm::Value *FiniAddr = Fini 6049 ? CGF.EmitCastToVoidPtr(Fini) 6050 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6051 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6052 // ElemLVal.reduce_comb = comb; 6053 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6054 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6055 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6056 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6057 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6058 // ElemLVal.flags = 0; 6059 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6060 if (DelayedCreation) { 6061 CGF.EmitStoreOfScalar( 6062 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6063 FlagsLVal); 6064 } else 6065 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6066 FlagsLVal.getType()); 6067 } 6068 if (Data.IsReductionWithTaskMod) { 6069 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6070 // is_ws, int num, void *data); 6071 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6072 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6073 CGM.IntTy, /*isSigned=*/true); 6074 llvm::Value *Args[] = { 6075 IdentTLoc, GTid, 6076 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6077 /*isSigned=*/true), 6078 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6079 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6080 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6081 return CGF.EmitRuntimeCall( 6082 OMPBuilder.getOrCreateRuntimeFunction( 6083 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6084 Args); 6085 } 6086 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6087 llvm::Value *Args[] = { 6088 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6089 /*isSigned=*/true), 6090 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6091 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6092 CGM.VoidPtrTy)}; 6093 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6094 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6095 Args); 6096 } 6097 6098 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6099 SourceLocation Loc, 6100 bool IsWorksharingReduction) { 6101 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6102 // is_ws, int num, void *data); 6103 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6104 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6105 CGM.IntTy, /*isSigned=*/true); 6106 llvm::Value *Args[] = {IdentTLoc, GTid, 6107 llvm::ConstantInt::get(CGM.IntTy, 6108 IsWorksharingReduction ? 1 : 0, 6109 /*isSigned=*/true)}; 6110 (void)CGF.EmitRuntimeCall( 6111 OMPBuilder.getOrCreateRuntimeFunction( 6112 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6113 Args); 6114 } 6115 6116 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6117 SourceLocation Loc, 6118 ReductionCodeGen &RCG, 6119 unsigned N) { 6120 auto Sizes = RCG.getSizes(N); 6121 // Emit threadprivate global variable if the type is non-constant 6122 // (Sizes.second = nullptr). 6123 if (Sizes.second) { 6124 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6125 /*isSigned=*/false); 6126 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6127 CGF, CGM.getContext().getSizeType(), 6128 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6129 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6130 } 6131 } 6132 6133 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6134 SourceLocation Loc, 6135 llvm::Value *ReductionsPtr, 6136 LValue SharedLVal) { 6137 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6138 // *d); 6139 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6140 CGM.IntTy, 6141 /*isSigned=*/true), 6142 ReductionsPtr, 6143 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6144 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6145 return Address( 6146 CGF.EmitRuntimeCall( 6147 OMPBuilder.getOrCreateRuntimeFunction( 6148 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6149 Args), 6150 SharedLVal.getAlignment()); 6151 } 6152 6153 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6154 SourceLocation Loc) { 6155 if (!CGF.HaveInsertPoint()) 6156 return; 6157 6158 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6159 OMPBuilder.CreateTaskwait(CGF.Builder); 6160 } else { 6161 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6162 // global_tid); 6163 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6164 // Ignore return result until untied tasks are supported. 6165 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6166 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6167 Args); 6168 } 6169 6170 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6171 Region->emitUntiedSwitch(CGF); 6172 } 6173 6174 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6175 OpenMPDirectiveKind InnerKind, 6176 const RegionCodeGenTy &CodeGen, 6177 bool HasCancel) { 6178 if (!CGF.HaveInsertPoint()) 6179 return; 6180 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6181 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6182 } 6183 6184 namespace { 6185 enum RTCancelKind { 6186 CancelNoreq = 0, 6187 CancelParallel = 1, 6188 CancelLoop = 2, 6189 CancelSections = 3, 6190 CancelTaskgroup = 4 6191 }; 6192 } // anonymous namespace 6193 6194 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6195 RTCancelKind CancelKind = CancelNoreq; 6196 if (CancelRegion == OMPD_parallel) 6197 CancelKind = CancelParallel; 6198 else if (CancelRegion == OMPD_for) 6199 CancelKind = CancelLoop; 6200 else if (CancelRegion == OMPD_sections) 6201 CancelKind = CancelSections; 6202 else { 6203 assert(CancelRegion == OMPD_taskgroup); 6204 CancelKind = CancelTaskgroup; 6205 } 6206 return CancelKind; 6207 } 6208 6209 void CGOpenMPRuntime::emitCancellationPointCall( 6210 CodeGenFunction &CGF, SourceLocation Loc, 6211 OpenMPDirectiveKind CancelRegion) { 6212 if (!CGF.HaveInsertPoint()) 6213 return; 6214 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6215 // global_tid, kmp_int32 cncl_kind); 6216 if (auto *OMPRegionInfo = 6217 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6218 // For 'cancellation point taskgroup', the task region info may not have a 6219 // cancel. This may instead happen in another adjacent task. 6220 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6221 llvm::Value *Args[] = { 6222 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6223 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6224 // Ignore return result until untied tasks are supported. 6225 llvm::Value *Result = CGF.EmitRuntimeCall( 6226 OMPBuilder.getOrCreateRuntimeFunction( 6227 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6228 Args); 6229 // if (__kmpc_cancellationpoint()) { 6230 // exit from construct; 6231 // } 6232 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6233 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6234 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6235 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6236 CGF.EmitBlock(ExitBB); 6237 // exit from construct; 6238 CodeGenFunction::JumpDest CancelDest = 6239 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6240 CGF.EmitBranchThroughCleanup(CancelDest); 6241 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6242 } 6243 } 6244 } 6245 6246 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6247 const Expr *IfCond, 6248 OpenMPDirectiveKind CancelRegion) { 6249 if (!CGF.HaveInsertPoint()) 6250 return; 6251 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6252 // kmp_int32 cncl_kind); 6253 auto &M = CGM.getModule(); 6254 if (auto *OMPRegionInfo = 6255 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6256 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6257 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6258 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6259 llvm::Value *Args[] = { 6260 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6261 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6262 // Ignore return result until untied tasks are supported. 6263 llvm::Value *Result = CGF.EmitRuntimeCall( 6264 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6265 // if (__kmpc_cancel()) { 6266 // exit from construct; 6267 // } 6268 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6269 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6270 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6271 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6272 CGF.EmitBlock(ExitBB); 6273 // exit from construct; 6274 CodeGenFunction::JumpDest CancelDest = 6275 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6276 CGF.EmitBranchThroughCleanup(CancelDest); 6277 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6278 }; 6279 if (IfCond) { 6280 emitIfClause(CGF, IfCond, ThenGen, 6281 [](CodeGenFunction &, PrePostActionTy &) {}); 6282 } else { 6283 RegionCodeGenTy ThenRCG(ThenGen); 6284 ThenRCG(CGF); 6285 } 6286 } 6287 } 6288 6289 namespace { 6290 /// Cleanup action for uses_allocators support. 6291 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6292 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6293 6294 public: 6295 OMPUsesAllocatorsActionTy( 6296 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6297 : Allocators(Allocators) {} 6298 void Enter(CodeGenFunction &CGF) override { 6299 if (!CGF.HaveInsertPoint()) 6300 return; 6301 for (const auto &AllocatorData : Allocators) { 6302 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6303 CGF, AllocatorData.first, AllocatorData.second); 6304 } 6305 } 6306 void Exit(CodeGenFunction &CGF) override { 6307 if (!CGF.HaveInsertPoint()) 6308 return; 6309 for (const auto &AllocatorData : Allocators) { 6310 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6311 AllocatorData.first); 6312 } 6313 } 6314 }; 6315 } // namespace 6316 6317 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6318 const OMPExecutableDirective &D, StringRef ParentName, 6319 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6320 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6321 assert(!ParentName.empty() && "Invalid target region parent name!"); 6322 HasEmittedTargetRegion = true; 6323 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6324 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6325 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6326 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6327 if (!D.AllocatorTraits) 6328 continue; 6329 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6330 } 6331 } 6332 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6333 CodeGen.setAction(UsesAllocatorAction); 6334 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6335 IsOffloadEntry, CodeGen); 6336 } 6337 6338 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6339 const Expr *Allocator, 6340 const Expr *AllocatorTraits) { 6341 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6342 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6343 // Use default memspace handle. 6344 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6345 llvm::Value *NumTraits = llvm::ConstantInt::get( 6346 CGF.IntTy, cast<ConstantArrayType>( 6347 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6348 ->getSize() 6349 .getLimitedValue()); 6350 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6351 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6352 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6353 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6354 AllocatorTraitsLVal.getBaseInfo(), 6355 AllocatorTraitsLVal.getTBAAInfo()); 6356 llvm::Value *Traits = 6357 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6358 6359 llvm::Value *AllocatorVal = 6360 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6361 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6362 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6363 // Store to allocator. 6364 CGF.EmitVarDecl(*cast<VarDecl>( 6365 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6366 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6367 AllocatorVal = 6368 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6369 Allocator->getType(), Allocator->getExprLoc()); 6370 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6371 } 6372 6373 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6374 const Expr *Allocator) { 6375 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6376 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6377 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6378 llvm::Value *AllocatorVal = 6379 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6380 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6381 CGF.getContext().VoidPtrTy, 6382 Allocator->getExprLoc()); 6383 (void)CGF.EmitRuntimeCall( 6384 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6385 OMPRTL___kmpc_destroy_allocator), 6386 {ThreadId, AllocatorVal}); 6387 } 6388 6389 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6390 const OMPExecutableDirective &D, StringRef ParentName, 6391 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6392 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6393 // Create a unique name for the entry function using the source location 6394 // information of the current target region. The name will be something like: 6395 // 6396 // __omp_offloading_DD_FFFF_PP_lBB 6397 // 6398 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6399 // mangled name of the function that encloses the target region and BB is the 6400 // line number of the target region. 6401 6402 unsigned DeviceID; 6403 unsigned FileID; 6404 unsigned Line; 6405 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6406 Line); 6407 SmallString<64> EntryFnName; 6408 { 6409 llvm::raw_svector_ostream OS(EntryFnName); 6410 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6411 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6412 } 6413 6414 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6415 6416 CodeGenFunction CGF(CGM, true); 6417 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6418 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6419 6420 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6421 6422 // If this target outline function is not an offload entry, we don't need to 6423 // register it. 6424 if (!IsOffloadEntry) 6425 return; 6426 6427 // The target region ID is used by the runtime library to identify the current 6428 // target region, so it only has to be unique and not necessarily point to 6429 // anything. It could be the pointer to the outlined function that implements 6430 // the target region, but we aren't using that so that the compiler doesn't 6431 // need to keep that, and could therefore inline the host function if proven 6432 // worthwhile during optimization. In the other hand, if emitting code for the 6433 // device, the ID has to be the function address so that it can retrieved from 6434 // the offloading entry and launched by the runtime library. We also mark the 6435 // outlined function to have external linkage in case we are emitting code for 6436 // the device, because these functions will be entry points to the device. 6437 6438 if (CGM.getLangOpts().OpenMPIsDevice) { 6439 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6440 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6441 OutlinedFn->setDSOLocal(false); 6442 } else { 6443 std::string Name = getName({EntryFnName, "region_id"}); 6444 OutlinedFnID = new llvm::GlobalVariable( 6445 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6446 llvm::GlobalValue::WeakAnyLinkage, 6447 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6448 } 6449 6450 // Register the information for the entry associated with this target region. 6451 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6452 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6453 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6454 } 6455 6456 /// Checks if the expression is constant or does not have non-trivial function 6457 /// calls. 6458 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6459 // We can skip constant expressions. 6460 // We can skip expressions with trivial calls or simple expressions. 6461 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6462 !E->hasNonTrivialCall(Ctx)) && 6463 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6464 } 6465 6466 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6467 const Stmt *Body) { 6468 const Stmt *Child = Body->IgnoreContainers(); 6469 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6470 Child = nullptr; 6471 for (const Stmt *S : C->body()) { 6472 if (const auto *E = dyn_cast<Expr>(S)) { 6473 if (isTrivial(Ctx, E)) 6474 continue; 6475 } 6476 // Some of the statements can be ignored. 6477 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6478 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6479 continue; 6480 // Analyze declarations. 6481 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6482 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6483 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6484 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6485 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6486 isa<UsingDirectiveDecl>(D) || 6487 isa<OMPDeclareReductionDecl>(D) || 6488 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6489 return true; 6490 const auto *VD = dyn_cast<VarDecl>(D); 6491 if (!VD) 6492 return false; 6493 return VD->isConstexpr() || 6494 ((VD->getType().isTrivialType(Ctx) || 6495 VD->getType()->isReferenceType()) && 6496 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6497 })) 6498 continue; 6499 } 6500 // Found multiple children - cannot get the one child only. 6501 if (Child) 6502 return nullptr; 6503 Child = S; 6504 } 6505 if (Child) 6506 Child = Child->IgnoreContainers(); 6507 } 6508 return Child; 6509 } 6510 6511 /// Emit the number of teams for a target directive. Inspect the num_teams 6512 /// clause associated with a teams construct combined or closely nested 6513 /// with the target directive. 6514 /// 6515 /// Emit a team of size one for directives such as 'target parallel' that 6516 /// have no associated teams construct. 6517 /// 6518 /// Otherwise, return nullptr. 6519 static llvm::Value * 6520 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6521 const OMPExecutableDirective &D) { 6522 assert(!CGF.getLangOpts().OpenMPIsDevice && 6523 "Clauses associated with the teams directive expected to be emitted " 6524 "only for the host!"); 6525 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6526 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6527 "Expected target-based executable directive."); 6528 CGBuilderTy &Bld = CGF.Builder; 6529 switch (DirectiveKind) { 6530 case OMPD_target: { 6531 const auto *CS = D.getInnermostCapturedStmt(); 6532 const auto *Body = 6533 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6534 const Stmt *ChildStmt = 6535 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6536 if (const auto *NestedDir = 6537 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6538 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6539 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6540 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6541 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6542 const Expr *NumTeams = 6543 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6544 llvm::Value *NumTeamsVal = 6545 CGF.EmitScalarExpr(NumTeams, 6546 /*IgnoreResultAssign*/ true); 6547 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6548 /*isSigned=*/true); 6549 } 6550 return Bld.getInt32(0); 6551 } 6552 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6553 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6554 return Bld.getInt32(1); 6555 return Bld.getInt32(0); 6556 } 6557 return nullptr; 6558 } 6559 case OMPD_target_teams: 6560 case OMPD_target_teams_distribute: 6561 case OMPD_target_teams_distribute_simd: 6562 case OMPD_target_teams_distribute_parallel_for: 6563 case OMPD_target_teams_distribute_parallel_for_simd: { 6564 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6565 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6566 const Expr *NumTeams = 6567 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6568 llvm::Value *NumTeamsVal = 6569 CGF.EmitScalarExpr(NumTeams, 6570 /*IgnoreResultAssign*/ true); 6571 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6572 /*isSigned=*/true); 6573 } 6574 return Bld.getInt32(0); 6575 } 6576 case OMPD_target_parallel: 6577 case OMPD_target_parallel_for: 6578 case OMPD_target_parallel_for_simd: 6579 case OMPD_target_simd: 6580 return Bld.getInt32(1); 6581 case OMPD_parallel: 6582 case OMPD_for: 6583 case OMPD_parallel_for: 6584 case OMPD_parallel_master: 6585 case OMPD_parallel_sections: 6586 case OMPD_for_simd: 6587 case OMPD_parallel_for_simd: 6588 case OMPD_cancel: 6589 case OMPD_cancellation_point: 6590 case OMPD_ordered: 6591 case OMPD_threadprivate: 6592 case OMPD_allocate: 6593 case OMPD_task: 6594 case OMPD_simd: 6595 case OMPD_sections: 6596 case OMPD_section: 6597 case OMPD_single: 6598 case OMPD_master: 6599 case OMPD_critical: 6600 case OMPD_taskyield: 6601 case OMPD_barrier: 6602 case OMPD_taskwait: 6603 case OMPD_taskgroup: 6604 case OMPD_atomic: 6605 case OMPD_flush: 6606 case OMPD_depobj: 6607 case OMPD_scan: 6608 case OMPD_teams: 6609 case OMPD_target_data: 6610 case OMPD_target_exit_data: 6611 case OMPD_target_enter_data: 6612 case OMPD_distribute: 6613 case OMPD_distribute_simd: 6614 case OMPD_distribute_parallel_for: 6615 case OMPD_distribute_parallel_for_simd: 6616 case OMPD_teams_distribute: 6617 case OMPD_teams_distribute_simd: 6618 case OMPD_teams_distribute_parallel_for: 6619 case OMPD_teams_distribute_parallel_for_simd: 6620 case OMPD_target_update: 6621 case OMPD_declare_simd: 6622 case OMPD_declare_variant: 6623 case OMPD_begin_declare_variant: 6624 case OMPD_end_declare_variant: 6625 case OMPD_declare_target: 6626 case OMPD_end_declare_target: 6627 case OMPD_declare_reduction: 6628 case OMPD_declare_mapper: 6629 case OMPD_taskloop: 6630 case OMPD_taskloop_simd: 6631 case OMPD_master_taskloop: 6632 case OMPD_master_taskloop_simd: 6633 case OMPD_parallel_master_taskloop: 6634 case OMPD_parallel_master_taskloop_simd: 6635 case OMPD_requires: 6636 case OMPD_unknown: 6637 break; 6638 default: 6639 break; 6640 } 6641 llvm_unreachable("Unexpected directive kind."); 6642 } 6643 6644 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6645 llvm::Value *DefaultThreadLimitVal) { 6646 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6647 CGF.getContext(), CS->getCapturedStmt()); 6648 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6649 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6650 llvm::Value *NumThreads = nullptr; 6651 llvm::Value *CondVal = nullptr; 6652 // Handle if clause. If if clause present, the number of threads is 6653 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6654 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6655 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6656 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6657 const OMPIfClause *IfClause = nullptr; 6658 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6659 if (C->getNameModifier() == OMPD_unknown || 6660 C->getNameModifier() == OMPD_parallel) { 6661 IfClause = C; 6662 break; 6663 } 6664 } 6665 if (IfClause) { 6666 const Expr *Cond = IfClause->getCondition(); 6667 bool Result; 6668 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6669 if (!Result) 6670 return CGF.Builder.getInt32(1); 6671 } else { 6672 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6673 if (const auto *PreInit = 6674 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6675 for (const auto *I : PreInit->decls()) { 6676 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6677 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6678 } else { 6679 CodeGenFunction::AutoVarEmission Emission = 6680 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6681 CGF.EmitAutoVarCleanups(Emission); 6682 } 6683 } 6684 } 6685 CondVal = CGF.EvaluateExprAsBool(Cond); 6686 } 6687 } 6688 } 6689 // Check the value of num_threads clause iff if clause was not specified 6690 // or is not evaluated to false. 6691 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6692 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6693 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6694 const auto *NumThreadsClause = 6695 Dir->getSingleClause<OMPNumThreadsClause>(); 6696 CodeGenFunction::LexicalScope Scope( 6697 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6698 if (const auto *PreInit = 6699 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6700 for (const auto *I : PreInit->decls()) { 6701 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6702 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6703 } else { 6704 CodeGenFunction::AutoVarEmission Emission = 6705 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6706 CGF.EmitAutoVarCleanups(Emission); 6707 } 6708 } 6709 } 6710 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6711 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6712 /*isSigned=*/false); 6713 if (DefaultThreadLimitVal) 6714 NumThreads = CGF.Builder.CreateSelect( 6715 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6716 DefaultThreadLimitVal, NumThreads); 6717 } else { 6718 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6719 : CGF.Builder.getInt32(0); 6720 } 6721 // Process condition of the if clause. 6722 if (CondVal) { 6723 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6724 CGF.Builder.getInt32(1)); 6725 } 6726 return NumThreads; 6727 } 6728 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6729 return CGF.Builder.getInt32(1); 6730 return DefaultThreadLimitVal; 6731 } 6732 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6733 : CGF.Builder.getInt32(0); 6734 } 6735 6736 /// Emit the number of threads for a target directive. Inspect the 6737 /// thread_limit clause associated with a teams construct combined or closely 6738 /// nested with the target directive. 6739 /// 6740 /// Emit the num_threads clause for directives such as 'target parallel' that 6741 /// have no associated teams construct. 6742 /// 6743 /// Otherwise, return nullptr. 6744 static llvm::Value * 6745 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6746 const OMPExecutableDirective &D) { 6747 assert(!CGF.getLangOpts().OpenMPIsDevice && 6748 "Clauses associated with the teams directive expected to be emitted " 6749 "only for the host!"); 6750 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6751 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6752 "Expected target-based executable directive."); 6753 CGBuilderTy &Bld = CGF.Builder; 6754 llvm::Value *ThreadLimitVal = nullptr; 6755 llvm::Value *NumThreadsVal = nullptr; 6756 switch (DirectiveKind) { 6757 case OMPD_target: { 6758 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6759 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6760 return NumThreads; 6761 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6762 CGF.getContext(), CS->getCapturedStmt()); 6763 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6764 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6765 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6766 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6767 const auto *ThreadLimitClause = 6768 Dir->getSingleClause<OMPThreadLimitClause>(); 6769 CodeGenFunction::LexicalScope Scope( 6770 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6771 if (const auto *PreInit = 6772 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6773 for (const auto *I : PreInit->decls()) { 6774 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6775 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6776 } else { 6777 CodeGenFunction::AutoVarEmission Emission = 6778 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6779 CGF.EmitAutoVarCleanups(Emission); 6780 } 6781 } 6782 } 6783 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6784 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6785 ThreadLimitVal = 6786 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6787 } 6788 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6789 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6790 CS = Dir->getInnermostCapturedStmt(); 6791 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6792 CGF.getContext(), CS->getCapturedStmt()); 6793 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6794 } 6795 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6796 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6797 CS = Dir->getInnermostCapturedStmt(); 6798 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6799 return NumThreads; 6800 } 6801 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6802 return Bld.getInt32(1); 6803 } 6804 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6805 } 6806 case OMPD_target_teams: { 6807 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6808 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6809 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6810 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6811 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6812 ThreadLimitVal = 6813 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6814 } 6815 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6816 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6817 return NumThreads; 6818 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6819 CGF.getContext(), CS->getCapturedStmt()); 6820 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6821 if (Dir->getDirectiveKind() == OMPD_distribute) { 6822 CS = Dir->getInnermostCapturedStmt(); 6823 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6824 return NumThreads; 6825 } 6826 } 6827 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6828 } 6829 case OMPD_target_teams_distribute: 6830 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6831 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6832 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6833 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6834 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6835 ThreadLimitVal = 6836 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6837 } 6838 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6839 case OMPD_target_parallel: 6840 case OMPD_target_parallel_for: 6841 case OMPD_target_parallel_for_simd: 6842 case OMPD_target_teams_distribute_parallel_for: 6843 case OMPD_target_teams_distribute_parallel_for_simd: { 6844 llvm::Value *CondVal = nullptr; 6845 // Handle if clause. If if clause present, the number of threads is 6846 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6847 if (D.hasClausesOfKind<OMPIfClause>()) { 6848 const OMPIfClause *IfClause = nullptr; 6849 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6850 if (C->getNameModifier() == OMPD_unknown || 6851 C->getNameModifier() == OMPD_parallel) { 6852 IfClause = C; 6853 break; 6854 } 6855 } 6856 if (IfClause) { 6857 const Expr *Cond = IfClause->getCondition(); 6858 bool Result; 6859 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6860 if (!Result) 6861 return Bld.getInt32(1); 6862 } else { 6863 CodeGenFunction::RunCleanupsScope Scope(CGF); 6864 CondVal = CGF.EvaluateExprAsBool(Cond); 6865 } 6866 } 6867 } 6868 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6869 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6870 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6871 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6872 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6873 ThreadLimitVal = 6874 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6875 } 6876 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6877 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6878 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6879 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6880 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6881 NumThreadsVal = 6882 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6883 ThreadLimitVal = ThreadLimitVal 6884 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6885 ThreadLimitVal), 6886 NumThreadsVal, ThreadLimitVal) 6887 : NumThreadsVal; 6888 } 6889 if (!ThreadLimitVal) 6890 ThreadLimitVal = Bld.getInt32(0); 6891 if (CondVal) 6892 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6893 return ThreadLimitVal; 6894 } 6895 case OMPD_target_teams_distribute_simd: 6896 case OMPD_target_simd: 6897 return Bld.getInt32(1); 6898 case OMPD_parallel: 6899 case OMPD_for: 6900 case OMPD_parallel_for: 6901 case OMPD_parallel_master: 6902 case OMPD_parallel_sections: 6903 case OMPD_for_simd: 6904 case OMPD_parallel_for_simd: 6905 case OMPD_cancel: 6906 case OMPD_cancellation_point: 6907 case OMPD_ordered: 6908 case OMPD_threadprivate: 6909 case OMPD_allocate: 6910 case OMPD_task: 6911 case OMPD_simd: 6912 case OMPD_sections: 6913 case OMPD_section: 6914 case OMPD_single: 6915 case OMPD_master: 6916 case OMPD_critical: 6917 case OMPD_taskyield: 6918 case OMPD_barrier: 6919 case OMPD_taskwait: 6920 case OMPD_taskgroup: 6921 case OMPD_atomic: 6922 case OMPD_flush: 6923 case OMPD_depobj: 6924 case OMPD_scan: 6925 case OMPD_teams: 6926 case OMPD_target_data: 6927 case OMPD_target_exit_data: 6928 case OMPD_target_enter_data: 6929 case OMPD_distribute: 6930 case OMPD_distribute_simd: 6931 case OMPD_distribute_parallel_for: 6932 case OMPD_distribute_parallel_for_simd: 6933 case OMPD_teams_distribute: 6934 case OMPD_teams_distribute_simd: 6935 case OMPD_teams_distribute_parallel_for: 6936 case OMPD_teams_distribute_parallel_for_simd: 6937 case OMPD_target_update: 6938 case OMPD_declare_simd: 6939 case OMPD_declare_variant: 6940 case OMPD_begin_declare_variant: 6941 case OMPD_end_declare_variant: 6942 case OMPD_declare_target: 6943 case OMPD_end_declare_target: 6944 case OMPD_declare_reduction: 6945 case OMPD_declare_mapper: 6946 case OMPD_taskloop: 6947 case OMPD_taskloop_simd: 6948 case OMPD_master_taskloop: 6949 case OMPD_master_taskloop_simd: 6950 case OMPD_parallel_master_taskloop: 6951 case OMPD_parallel_master_taskloop_simd: 6952 case OMPD_requires: 6953 case OMPD_unknown: 6954 break; 6955 default: 6956 break; 6957 } 6958 llvm_unreachable("Unsupported directive kind."); 6959 } 6960 6961 namespace { 6962 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6963 6964 // Utility to handle information from clauses associated with a given 6965 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6966 // It provides a convenient interface to obtain the information and generate 6967 // code for that information. 6968 class MappableExprsHandler { 6969 public: 6970 /// Values for bit flags used to specify the mapping type for 6971 /// offloading. 6972 enum OpenMPOffloadMappingFlags : uint64_t { 6973 /// No flags 6974 OMP_MAP_NONE = 0x0, 6975 /// Allocate memory on the device and move data from host to device. 6976 OMP_MAP_TO = 0x01, 6977 /// Allocate memory on the device and move data from device to host. 6978 OMP_MAP_FROM = 0x02, 6979 /// Always perform the requested mapping action on the element, even 6980 /// if it was already mapped before. 6981 OMP_MAP_ALWAYS = 0x04, 6982 /// Delete the element from the device environment, ignoring the 6983 /// current reference count associated with the element. 6984 OMP_MAP_DELETE = 0x08, 6985 /// The element being mapped is a pointer-pointee pair; both the 6986 /// pointer and the pointee should be mapped. 6987 OMP_MAP_PTR_AND_OBJ = 0x10, 6988 /// This flags signals that the base address of an entry should be 6989 /// passed to the target kernel as an argument. 6990 OMP_MAP_TARGET_PARAM = 0x20, 6991 /// Signal that the runtime library has to return the device pointer 6992 /// in the current position for the data being mapped. Used when we have the 6993 /// use_device_ptr or use_device_addr clause. 6994 OMP_MAP_RETURN_PARAM = 0x40, 6995 /// This flag signals that the reference being passed is a pointer to 6996 /// private data. 6997 OMP_MAP_PRIVATE = 0x80, 6998 /// Pass the element to the device by value. 6999 OMP_MAP_LITERAL = 0x100, 7000 /// Implicit map 7001 OMP_MAP_IMPLICIT = 0x200, 7002 /// Close is a hint to the runtime to allocate memory close to 7003 /// the target device. 7004 OMP_MAP_CLOSE = 0x400, 7005 /// 0x800 is reserved for compatibility with XLC. 7006 /// Produce a runtime error if the data is not already allocated. 7007 OMP_MAP_PRESENT = 0x1000, 7008 /// The 16 MSBs of the flags indicate whether the entry is member of some 7009 /// struct/class. 7010 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7011 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7012 }; 7013 7014 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7015 static unsigned getFlagMemberOffset() { 7016 unsigned Offset = 0; 7017 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7018 Remain = Remain >> 1) 7019 Offset++; 7020 return Offset; 7021 } 7022 7023 /// Class that associates information with a base pointer to be passed to the 7024 /// runtime library. 7025 class BasePointerInfo { 7026 /// The base pointer. 7027 llvm::Value *Ptr = nullptr; 7028 /// The base declaration that refers to this device pointer, or null if 7029 /// there is none. 7030 const ValueDecl *DevPtrDecl = nullptr; 7031 7032 public: 7033 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7034 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7035 llvm::Value *operator*() const { return Ptr; } 7036 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7037 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7038 }; 7039 7040 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7041 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7042 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7043 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7044 7045 /// This structure contains combined information generated for mappable 7046 /// clauses, including base pointers, pointers, sizes, map types, and 7047 /// user-defined mappers. 7048 struct MapCombinedInfoTy { 7049 MapBaseValuesArrayTy BasePointers; 7050 MapValuesArrayTy Pointers; 7051 MapValuesArrayTy Sizes; 7052 MapFlagsArrayTy Types; 7053 MapMappersArrayTy Mappers; 7054 7055 /// Append arrays in \a CurInfo. 7056 void append(MapCombinedInfoTy &CurInfo) { 7057 BasePointers.append(CurInfo.BasePointers.begin(), 7058 CurInfo.BasePointers.end()); 7059 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7060 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7061 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7062 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7063 } 7064 }; 7065 7066 /// Map between a struct and the its lowest & highest elements which have been 7067 /// mapped. 7068 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7069 /// HE(FieldIndex, Pointer)} 7070 struct StructRangeInfoTy { 7071 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7072 0, Address::invalid()}; 7073 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7074 0, Address::invalid()}; 7075 Address Base = Address::invalid(); 7076 }; 7077 7078 private: 7079 /// Kind that defines how a device pointer has to be returned. 7080 struct MapInfo { 7081 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7082 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7083 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7084 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7085 bool ReturnDevicePointer = false; 7086 bool IsImplicit = false; 7087 const ValueDecl *Mapper = nullptr; 7088 bool ForDeviceAddr = false; 7089 7090 MapInfo() = default; 7091 MapInfo( 7092 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7093 OpenMPMapClauseKind MapType, 7094 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7095 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7096 bool ReturnDevicePointer, bool IsImplicit, 7097 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false) 7098 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7099 MotionModifiers(MotionModifiers), 7100 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7101 Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {} 7102 }; 7103 7104 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7105 /// member and there is no map information about it, then emission of that 7106 /// entry is deferred until the whole struct has been processed. 7107 struct DeferredDevicePtrEntryTy { 7108 const Expr *IE = nullptr; 7109 const ValueDecl *VD = nullptr; 7110 bool ForDeviceAddr = false; 7111 7112 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7113 bool ForDeviceAddr) 7114 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7115 }; 7116 7117 /// The target directive from where the mappable clauses were extracted. It 7118 /// is either a executable directive or a user-defined mapper directive. 7119 llvm::PointerUnion<const OMPExecutableDirective *, 7120 const OMPDeclareMapperDecl *> 7121 CurDir; 7122 7123 /// Function the directive is being generated for. 7124 CodeGenFunction &CGF; 7125 7126 /// Set of all first private variables in the current directive. 7127 /// bool data is set to true if the variable is implicitly marked as 7128 /// firstprivate, false otherwise. 7129 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7130 7131 /// Map between device pointer declarations and their expression components. 7132 /// The key value for declarations in 'this' is null. 7133 llvm::DenseMap< 7134 const ValueDecl *, 7135 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7136 DevPointersMap; 7137 7138 llvm::Value *getExprTypeSize(const Expr *E) const { 7139 QualType ExprTy = E->getType().getCanonicalType(); 7140 7141 // Calculate the size for array shaping expression. 7142 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7143 llvm::Value *Size = 7144 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7145 for (const Expr *SE : OAE->getDimensions()) { 7146 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7147 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7148 CGF.getContext().getSizeType(), 7149 SE->getExprLoc()); 7150 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7151 } 7152 return Size; 7153 } 7154 7155 // Reference types are ignored for mapping purposes. 7156 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7157 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7158 7159 // Given that an array section is considered a built-in type, we need to 7160 // do the calculation based on the length of the section instead of relying 7161 // on CGF.getTypeSize(E->getType()). 7162 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7163 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7164 OAE->getBase()->IgnoreParenImpCasts()) 7165 .getCanonicalType(); 7166 7167 // If there is no length associated with the expression and lower bound is 7168 // not specified too, that means we are using the whole length of the 7169 // base. 7170 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7171 !OAE->getLowerBound()) 7172 return CGF.getTypeSize(BaseTy); 7173 7174 llvm::Value *ElemSize; 7175 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7176 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7177 } else { 7178 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7179 assert(ATy && "Expecting array type if not a pointer type."); 7180 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7181 } 7182 7183 // If we don't have a length at this point, that is because we have an 7184 // array section with a single element. 7185 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7186 return ElemSize; 7187 7188 if (const Expr *LenExpr = OAE->getLength()) { 7189 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7190 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7191 CGF.getContext().getSizeType(), 7192 LenExpr->getExprLoc()); 7193 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7194 } 7195 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7196 OAE->getLowerBound() && "expected array_section[lb:]."); 7197 // Size = sizetype - lb * elemtype; 7198 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7199 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7200 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7201 CGF.getContext().getSizeType(), 7202 OAE->getLowerBound()->getExprLoc()); 7203 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7204 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7205 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7206 LengthVal = CGF.Builder.CreateSelect( 7207 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7208 return LengthVal; 7209 } 7210 return CGF.getTypeSize(ExprTy); 7211 } 7212 7213 /// Return the corresponding bits for a given map clause modifier. Add 7214 /// a flag marking the map as a pointer if requested. Add a flag marking the 7215 /// map as the first one of a series of maps that relate to the same map 7216 /// expression. 7217 OpenMPOffloadMappingFlags getMapTypeBits( 7218 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7219 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7220 bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7221 OpenMPOffloadMappingFlags Bits = 7222 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7223 switch (MapType) { 7224 case OMPC_MAP_alloc: 7225 case OMPC_MAP_release: 7226 // alloc and release is the default behavior in the runtime library, i.e. 7227 // if we don't pass any bits alloc/release that is what the runtime is 7228 // going to do. Therefore, we don't need to signal anything for these two 7229 // type modifiers. 7230 break; 7231 case OMPC_MAP_to: 7232 Bits |= OMP_MAP_TO; 7233 break; 7234 case OMPC_MAP_from: 7235 Bits |= OMP_MAP_FROM; 7236 break; 7237 case OMPC_MAP_tofrom: 7238 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7239 break; 7240 case OMPC_MAP_delete: 7241 Bits |= OMP_MAP_DELETE; 7242 break; 7243 case OMPC_MAP_unknown: 7244 llvm_unreachable("Unexpected map type!"); 7245 } 7246 if (AddPtrFlag) 7247 Bits |= OMP_MAP_PTR_AND_OBJ; 7248 if (AddIsTargetParamFlag) 7249 Bits |= OMP_MAP_TARGET_PARAM; 7250 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7251 != MapModifiers.end()) 7252 Bits |= OMP_MAP_ALWAYS; 7253 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7254 != MapModifiers.end()) 7255 Bits |= OMP_MAP_CLOSE; 7256 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7257 != MapModifiers.end()) 7258 Bits |= OMP_MAP_PRESENT; 7259 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7260 != MotionModifiers.end()) 7261 Bits |= OMP_MAP_PRESENT; 7262 return Bits; 7263 } 7264 7265 /// Return true if the provided expression is a final array section. A 7266 /// final array section, is one whose length can't be proved to be one. 7267 bool isFinalArraySectionExpression(const Expr *E) const { 7268 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7269 7270 // It is not an array section and therefore not a unity-size one. 7271 if (!OASE) 7272 return false; 7273 7274 // An array section with no colon always refer to a single element. 7275 if (OASE->getColonLocFirst().isInvalid()) 7276 return false; 7277 7278 const Expr *Length = OASE->getLength(); 7279 7280 // If we don't have a length we have to check if the array has size 1 7281 // for this dimension. Also, we should always expect a length if the 7282 // base type is pointer. 7283 if (!Length) { 7284 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7285 OASE->getBase()->IgnoreParenImpCasts()) 7286 .getCanonicalType(); 7287 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7288 return ATy->getSize().getSExtValue() != 1; 7289 // If we don't have a constant dimension length, we have to consider 7290 // the current section as having any size, so it is not necessarily 7291 // unitary. If it happen to be unity size, that's user fault. 7292 return true; 7293 } 7294 7295 // Check if the length evaluates to 1. 7296 Expr::EvalResult Result; 7297 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7298 return true; // Can have more that size 1. 7299 7300 llvm::APSInt ConstLength = Result.Val.getInt(); 7301 return ConstLength.getSExtValue() != 1; 7302 } 7303 7304 /// Generate the base pointers, section pointers, sizes, map type bits, and 7305 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7306 /// map type, map or motion modifiers, and expression components. 7307 /// \a IsFirstComponent should be set to true if the provided set of 7308 /// components is the first associated with a capture. 7309 void generateInfoForComponentList( 7310 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7311 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7312 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7313 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7314 bool IsFirstComponentList, bool IsImplicit, 7315 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7316 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7317 OverlappedElements = llvm::None) const { 7318 // The following summarizes what has to be generated for each map and the 7319 // types below. The generated information is expressed in this order: 7320 // base pointer, section pointer, size, flags 7321 // (to add to the ones that come from the map type and modifier). 7322 // 7323 // double d; 7324 // int i[100]; 7325 // float *p; 7326 // 7327 // struct S1 { 7328 // int i; 7329 // float f[50]; 7330 // } 7331 // struct S2 { 7332 // int i; 7333 // float f[50]; 7334 // S1 s; 7335 // double *p; 7336 // struct S2 *ps; 7337 // } 7338 // S2 s; 7339 // S2 *ps; 7340 // 7341 // map(d) 7342 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7343 // 7344 // map(i) 7345 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7346 // 7347 // map(i[1:23]) 7348 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7349 // 7350 // map(p) 7351 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7352 // 7353 // map(p[1:24]) 7354 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7355 // in unified shared memory mode or for local pointers 7356 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7357 // 7358 // map(s) 7359 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7360 // 7361 // map(s.i) 7362 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7363 // 7364 // map(s.s.f) 7365 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7366 // 7367 // map(s.p) 7368 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7369 // 7370 // map(to: s.p[:22]) 7371 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7372 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7373 // &(s.p), &(s.p[0]), 22*sizeof(double), 7374 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7375 // (*) alloc space for struct members, only this is a target parameter 7376 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7377 // optimizes this entry out, same in the examples below) 7378 // (***) map the pointee (map: to) 7379 // 7380 // map(s.ps) 7381 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7382 // 7383 // map(from: s.ps->s.i) 7384 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7385 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7386 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7387 // 7388 // map(to: s.ps->ps) 7389 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7390 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7391 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7392 // 7393 // map(s.ps->ps->ps) 7394 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7395 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7396 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7397 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7398 // 7399 // map(to: s.ps->ps->s.f[:22]) 7400 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7401 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7402 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7403 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7404 // 7405 // map(ps) 7406 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7407 // 7408 // map(ps->i) 7409 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7410 // 7411 // map(ps->s.f) 7412 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7413 // 7414 // map(from: ps->p) 7415 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7416 // 7417 // map(to: ps->p[:22]) 7418 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7419 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7420 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7421 // 7422 // map(ps->ps) 7423 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7424 // 7425 // map(from: ps->ps->s.i) 7426 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7427 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7428 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7429 // 7430 // map(from: ps->ps->ps) 7431 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7432 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7433 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7434 // 7435 // map(ps->ps->ps->ps) 7436 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7437 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7438 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7439 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7440 // 7441 // map(to: ps->ps->ps->s.f[:22]) 7442 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7443 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7444 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7445 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7446 // 7447 // map(to: s.f[:22]) map(from: s.p[:33]) 7448 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7449 // sizeof(double*) (**), TARGET_PARAM 7450 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7451 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7452 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7453 // (*) allocate contiguous space needed to fit all mapped members even if 7454 // we allocate space for members not mapped (in this example, 7455 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7456 // them as well because they fall between &s.f[0] and &s.p) 7457 // 7458 // map(from: s.f[:22]) map(to: ps->p[:33]) 7459 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7460 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7461 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7462 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7463 // (*) the struct this entry pertains to is the 2nd element in the list of 7464 // arguments, hence MEMBER_OF(2) 7465 // 7466 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7467 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7468 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7469 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7470 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7471 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7472 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7473 // (*) the struct this entry pertains to is the 4th element in the list 7474 // of arguments, hence MEMBER_OF(4) 7475 7476 // Track if the map information being generated is the first for a capture. 7477 bool IsCaptureFirstInfo = IsFirstComponentList; 7478 // When the variable is on a declare target link or in a to clause with 7479 // unified memory, a reference is needed to hold the host/device address 7480 // of the variable. 7481 bool RequiresReference = false; 7482 7483 // Scan the components from the base to the complete expression. 7484 auto CI = Components.rbegin(); 7485 auto CE = Components.rend(); 7486 auto I = CI; 7487 7488 // Track if the map information being generated is the first for a list of 7489 // components. 7490 bool IsExpressionFirstInfo = true; 7491 bool FirstPointerInComplexData = false; 7492 Address BP = Address::invalid(); 7493 const Expr *AssocExpr = I->getAssociatedExpression(); 7494 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7495 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7496 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7497 7498 if (isa<MemberExpr>(AssocExpr)) { 7499 // The base is the 'this' pointer. The content of the pointer is going 7500 // to be the base of the field being mapped. 7501 BP = CGF.LoadCXXThisAddress(); 7502 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7503 (OASE && 7504 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7505 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7506 } else if (OAShE && 7507 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7508 BP = Address( 7509 CGF.EmitScalarExpr(OAShE->getBase()), 7510 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7511 } else { 7512 // The base is the reference to the variable. 7513 // BP = &Var. 7514 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7515 if (const auto *VD = 7516 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7517 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7519 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7520 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7521 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7522 RequiresReference = true; 7523 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7524 } 7525 } 7526 } 7527 7528 // If the variable is a pointer and is being dereferenced (i.e. is not 7529 // the last component), the base has to be the pointer itself, not its 7530 // reference. References are ignored for mapping purposes. 7531 QualType Ty = 7532 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7533 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7534 // No need to generate individual map information for the pointer, it 7535 // can be associated with the combined storage if shared memory mode is 7536 // active or the base declaration is not global variable. 7537 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7538 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7539 !VD || VD->hasLocalStorage()) 7540 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7541 else 7542 FirstPointerInComplexData = true; 7543 ++I; 7544 } 7545 } 7546 7547 // Track whether a component of the list should be marked as MEMBER_OF some 7548 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7549 // in a component list should be marked as MEMBER_OF, all subsequent entries 7550 // do not belong to the base struct. E.g. 7551 // struct S2 s; 7552 // s.ps->ps->ps->f[:] 7553 // (1) (2) (3) (4) 7554 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7555 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7556 // is the pointee of ps(2) which is not member of struct s, so it should not 7557 // be marked as such (it is still PTR_AND_OBJ). 7558 // The variable is initialized to false so that PTR_AND_OBJ entries which 7559 // are not struct members are not considered (e.g. array of pointers to 7560 // data). 7561 bool ShouldBeMemberOf = false; 7562 7563 // Variable keeping track of whether or not we have encountered a component 7564 // in the component list which is a member expression. Useful when we have a 7565 // pointer or a final array section, in which case it is the previous 7566 // component in the list which tells us whether we have a member expression. 7567 // E.g. X.f[:] 7568 // While processing the final array section "[:]" it is "f" which tells us 7569 // whether we are dealing with a member of a declared struct. 7570 const MemberExpr *EncounteredME = nullptr; 7571 7572 for (; I != CE; ++I) { 7573 // If the current component is member of a struct (parent struct) mark it. 7574 if (!EncounteredME) { 7575 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7576 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7577 // as MEMBER_OF the parent struct. 7578 if (EncounteredME) { 7579 ShouldBeMemberOf = true; 7580 // Do not emit as complex pointer if this is actually not array-like 7581 // expression. 7582 if (FirstPointerInComplexData) { 7583 QualType Ty = std::prev(I) 7584 ->getAssociatedDeclaration() 7585 ->getType() 7586 .getNonReferenceType(); 7587 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7588 FirstPointerInComplexData = false; 7589 } 7590 } 7591 } 7592 7593 auto Next = std::next(I); 7594 7595 // We need to generate the addresses and sizes if this is the last 7596 // component, if the component is a pointer or if it is an array section 7597 // whose length can't be proved to be one. If this is a pointer, it 7598 // becomes the base address for the following components. 7599 7600 // A final array section, is one whose length can't be proved to be one. 7601 bool IsFinalArraySection = 7602 isFinalArraySectionExpression(I->getAssociatedExpression()); 7603 7604 // Get information on whether the element is a pointer. Have to do a 7605 // special treatment for array sections given that they are built-in 7606 // types. 7607 const auto *OASE = 7608 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7609 const auto *OAShE = 7610 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7611 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7612 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7613 bool IsPointer = 7614 OAShE || 7615 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7616 .getCanonicalType() 7617 ->isAnyPointerType()) || 7618 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7619 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7620 7621 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7622 // If this is not the last component, we expect the pointer to be 7623 // associated with an array expression or member expression. 7624 assert((Next == CE || 7625 isa<MemberExpr>(Next->getAssociatedExpression()) || 7626 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7627 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7628 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7629 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7630 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7631 "Unexpected expression"); 7632 7633 Address LB = Address::invalid(); 7634 if (OAShE) { 7635 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7636 CGF.getContext().getTypeAlignInChars( 7637 OAShE->getBase()->getType())); 7638 } else { 7639 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7640 .getAddress(CGF); 7641 } 7642 7643 // If this component is a pointer inside the base struct then we don't 7644 // need to create any entry for it - it will be combined with the object 7645 // it is pointing to into a single PTR_AND_OBJ entry. 7646 bool IsMemberPointerOrAddr = 7647 (IsPointer || ForDeviceAddr) && EncounteredME && 7648 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7649 EncounteredME); 7650 if (!OverlappedElements.empty()) { 7651 // Handle base element with the info for overlapped elements. 7652 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7653 assert(Next == CE && 7654 "Expected last element for the overlapped elements."); 7655 assert(!IsPointer && 7656 "Unexpected base element with the pointer type."); 7657 // Mark the whole struct as the struct that requires allocation on the 7658 // device. 7659 PartialStruct.LowestElem = {0, LB}; 7660 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7661 I->getAssociatedExpression()->getType()); 7662 Address HB = CGF.Builder.CreateConstGEP( 7663 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7664 CGF.VoidPtrTy), 7665 TypeSize.getQuantity() - 1); 7666 PartialStruct.HighestElem = { 7667 std::numeric_limits<decltype( 7668 PartialStruct.HighestElem.first)>::max(), 7669 HB}; 7670 PartialStruct.Base = BP; 7671 // Emit data for non-overlapped data. 7672 OpenMPOffloadMappingFlags Flags = 7673 OMP_MAP_MEMBER_OF | 7674 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7675 /*AddPtrFlag=*/false, 7676 /*AddIsTargetParamFlag=*/false); 7677 LB = BP; 7678 llvm::Value *Size = nullptr; 7679 // Do bitcopy of all non-overlapped structure elements. 7680 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7681 Component : OverlappedElements) { 7682 Address ComponentLB = Address::invalid(); 7683 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7684 Component) { 7685 if (MC.getAssociatedDeclaration()) { 7686 ComponentLB = 7687 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7688 .getAddress(CGF); 7689 Size = CGF.Builder.CreatePtrDiff( 7690 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7691 CGF.EmitCastToVoidPtr(LB.getPointer())); 7692 break; 7693 } 7694 } 7695 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7696 CombinedInfo.Pointers.push_back(LB.getPointer()); 7697 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7698 Size, CGF.Int64Ty, /*isSigned=*/true)); 7699 CombinedInfo.Types.push_back(Flags); 7700 CombinedInfo.Mappers.push_back(nullptr); 7701 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7702 } 7703 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7704 CombinedInfo.Pointers.push_back(LB.getPointer()); 7705 Size = CGF.Builder.CreatePtrDiff( 7706 CGF.EmitCastToVoidPtr( 7707 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7708 CGF.EmitCastToVoidPtr(LB.getPointer())); 7709 CombinedInfo.Sizes.push_back( 7710 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7711 CombinedInfo.Types.push_back(Flags); 7712 CombinedInfo.Mappers.push_back(nullptr); 7713 break; 7714 } 7715 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7716 if (!IsMemberPointerOrAddr) { 7717 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7718 CombinedInfo.Pointers.push_back(LB.getPointer()); 7719 CombinedInfo.Sizes.push_back( 7720 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7721 7722 // If Mapper is valid, the last component inherits the mapper. 7723 bool HasMapper = Mapper && Next == CE; 7724 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7725 7726 // We need to add a pointer flag for each map that comes from the 7727 // same expression except for the first one. We also need to signal 7728 // this map is the first one that relates with the current capture 7729 // (there is a set of entries for each capture). 7730 OpenMPOffloadMappingFlags Flags = 7731 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7732 !IsExpressionFirstInfo || RequiresReference || 7733 FirstPointerInComplexData, 7734 IsCaptureFirstInfo && !RequiresReference); 7735 7736 if (!IsExpressionFirstInfo) { 7737 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7738 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7739 if (IsPointer) 7740 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7741 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7742 7743 if (ShouldBeMemberOf) { 7744 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7745 // should be later updated with the correct value of MEMBER_OF. 7746 Flags |= OMP_MAP_MEMBER_OF; 7747 // From now on, all subsequent PTR_AND_OBJ entries should not be 7748 // marked as MEMBER_OF. 7749 ShouldBeMemberOf = false; 7750 } 7751 } 7752 7753 CombinedInfo.Types.push_back(Flags); 7754 } 7755 7756 // If we have encountered a member expression so far, keep track of the 7757 // mapped member. If the parent is "*this", then the value declaration 7758 // is nullptr. 7759 if (EncounteredME) { 7760 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7761 unsigned FieldIndex = FD->getFieldIndex(); 7762 7763 // Update info about the lowest and highest elements for this struct 7764 if (!PartialStruct.Base.isValid()) { 7765 PartialStruct.LowestElem = {FieldIndex, LB}; 7766 if (IsFinalArraySection) { 7767 Address HB = 7768 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7769 .getAddress(CGF); 7770 PartialStruct.HighestElem = {FieldIndex, HB}; 7771 } else { 7772 PartialStruct.HighestElem = {FieldIndex, LB}; 7773 } 7774 PartialStruct.Base = BP; 7775 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7776 PartialStruct.LowestElem = {FieldIndex, LB}; 7777 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7778 PartialStruct.HighestElem = {FieldIndex, LB}; 7779 } 7780 } 7781 7782 // If we have a final array section, we are done with this expression. 7783 if (IsFinalArraySection) 7784 break; 7785 7786 // The pointer becomes the base for the next element. 7787 if (Next != CE) 7788 BP = LB; 7789 7790 IsExpressionFirstInfo = false; 7791 IsCaptureFirstInfo = false; 7792 FirstPointerInComplexData = false; 7793 } 7794 } 7795 } 7796 7797 /// Return the adjusted map modifiers if the declaration a capture refers to 7798 /// appears in a first-private clause. This is expected to be used only with 7799 /// directives that start with 'target'. 7800 MappableExprsHandler::OpenMPOffloadMappingFlags 7801 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7802 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7803 7804 // A first private variable captured by reference will use only the 7805 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7806 // declaration is known as first-private in this handler. 7807 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7808 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7809 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7810 return MappableExprsHandler::OMP_MAP_ALWAYS | 7811 MappableExprsHandler::OMP_MAP_TO; 7812 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7813 return MappableExprsHandler::OMP_MAP_TO | 7814 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7815 return MappableExprsHandler::OMP_MAP_PRIVATE | 7816 MappableExprsHandler::OMP_MAP_TO; 7817 } 7818 return MappableExprsHandler::OMP_MAP_TO | 7819 MappableExprsHandler::OMP_MAP_FROM; 7820 } 7821 7822 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7823 // Rotate by getFlagMemberOffset() bits. 7824 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7825 << getFlagMemberOffset()); 7826 } 7827 7828 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7829 OpenMPOffloadMappingFlags MemberOfFlag) { 7830 // If the entry is PTR_AND_OBJ but has not been marked with the special 7831 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7832 // marked as MEMBER_OF. 7833 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7834 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7835 return; 7836 7837 // Reset the placeholder value to prepare the flag for the assignment of the 7838 // proper MEMBER_OF value. 7839 Flags &= ~OMP_MAP_MEMBER_OF; 7840 Flags |= MemberOfFlag; 7841 } 7842 7843 void getPlainLayout(const CXXRecordDecl *RD, 7844 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7845 bool AsBase) const { 7846 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7847 7848 llvm::StructType *St = 7849 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7850 7851 unsigned NumElements = St->getNumElements(); 7852 llvm::SmallVector< 7853 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7854 RecordLayout(NumElements); 7855 7856 // Fill bases. 7857 for (const auto &I : RD->bases()) { 7858 if (I.isVirtual()) 7859 continue; 7860 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7861 // Ignore empty bases. 7862 if (Base->isEmpty() || CGF.getContext() 7863 .getASTRecordLayout(Base) 7864 .getNonVirtualSize() 7865 .isZero()) 7866 continue; 7867 7868 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7869 RecordLayout[FieldIndex] = Base; 7870 } 7871 // Fill in virtual bases. 7872 for (const auto &I : RD->vbases()) { 7873 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7874 // Ignore empty bases. 7875 if (Base->isEmpty()) 7876 continue; 7877 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7878 if (RecordLayout[FieldIndex]) 7879 continue; 7880 RecordLayout[FieldIndex] = Base; 7881 } 7882 // Fill in all the fields. 7883 assert(!RD->isUnion() && "Unexpected union."); 7884 for (const auto *Field : RD->fields()) { 7885 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7886 // will fill in later.) 7887 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7888 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7889 RecordLayout[FieldIndex] = Field; 7890 } 7891 } 7892 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7893 &Data : RecordLayout) { 7894 if (Data.isNull()) 7895 continue; 7896 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7897 getPlainLayout(Base, Layout, /*AsBase=*/true); 7898 else 7899 Layout.push_back(Data.get<const FieldDecl *>()); 7900 } 7901 } 7902 7903 public: 7904 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7905 : CurDir(&Dir), CGF(CGF) { 7906 // Extract firstprivate clause information. 7907 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7908 for (const auto *D : C->varlists()) 7909 FirstPrivateDecls.try_emplace( 7910 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7911 // Extract implicit firstprivates from uses_allocators clauses. 7912 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7913 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7914 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7915 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7916 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7917 /*Implicit=*/true); 7918 else if (const auto *VD = dyn_cast<VarDecl>( 7919 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7920 ->getDecl())) 7921 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7922 } 7923 } 7924 // Extract device pointer clause information. 7925 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7926 for (auto L : C->component_lists()) 7927 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 7928 } 7929 7930 /// Constructor for the declare mapper directive. 7931 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7932 : CurDir(&Dir), CGF(CGF) {} 7933 7934 /// Generate code for the combined entry if we have a partially mapped struct 7935 /// and take care of the mapping flags of the arguments corresponding to 7936 /// individual struct members. 7937 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 7938 MapFlagsArrayTy &CurTypes, 7939 const StructRangeInfoTy &PartialStruct, 7940 bool NotTargetParams = false) const { 7941 // Base is the base of the struct 7942 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 7943 // Pointer is the address of the lowest element 7944 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7945 CombinedInfo.Pointers.push_back(LB); 7946 // There should not be a mapper for a combined entry. 7947 CombinedInfo.Mappers.push_back(nullptr); 7948 // Size is (addr of {highest+1} element) - (addr of lowest element) 7949 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7950 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7951 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7952 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7953 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7954 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7955 /*isSigned=*/false); 7956 CombinedInfo.Sizes.push_back(Size); 7957 // Map type is always TARGET_PARAM, if generate info for captures. 7958 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 7959 : OMP_MAP_TARGET_PARAM); 7960 // If any element has the present modifier, then make sure the runtime 7961 // doesn't attempt to allocate the struct. 7962 if (CurTypes.end() != 7963 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 7964 return Type & OMP_MAP_PRESENT; 7965 })) 7966 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 7967 // Remove TARGET_PARAM flag from the first element 7968 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7969 7970 // All other current entries will be MEMBER_OF the combined entry 7971 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7972 // 0xFFFF in the MEMBER_OF field). 7973 OpenMPOffloadMappingFlags MemberOfFlag = 7974 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 7975 for (auto &M : CurTypes) 7976 setCorrectMemberOfFlag(M, MemberOfFlag); 7977 } 7978 7979 /// Generate all the base pointers, section pointers, sizes, map types, and 7980 /// mappers for the extracted mappable expressions (all included in \a 7981 /// CombinedInfo). Also, for each item that relates with a device pointer, a 7982 /// pair of the relevant declaration and index where it occurs is appended to 7983 /// the device pointers info array. 7984 void generateAllInfo( 7985 MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false, 7986 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 7987 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 7988 // We have to process the component lists that relate with the same 7989 // declaration in a single chunk so that we can generate the map flags 7990 // correctly. Therefore, we organize all lists in a map. 7991 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7992 7993 // Helper function to fill the information map for the different supported 7994 // clauses. 7995 auto &&InfoGen = 7996 [&Info, &SkipVarSet]( 7997 const ValueDecl *D, 7998 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7999 OpenMPMapClauseKind MapType, 8000 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8001 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8002 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8003 bool ForDeviceAddr = false) { 8004 const ValueDecl *VD = 8005 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8006 if (SkipVarSet.count(VD)) 8007 return; 8008 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 8009 ReturnDevicePointer, IsImplicit, Mapper, 8010 ForDeviceAddr); 8011 }; 8012 8013 assert(CurDir.is<const OMPExecutableDirective *>() && 8014 "Expect a executable directive"); 8015 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8016 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8017 for (const auto L : C->component_lists()) { 8018 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8019 C->getMapTypeModifiers(), llvm::None, 8020 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); 8021 } 8022 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8023 for (const auto L : C->component_lists()) { 8024 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8025 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8026 C->isImplicit(), std::get<2>(L)); 8027 } 8028 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8029 for (const auto L : C->component_lists()) { 8030 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8031 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8032 C->isImplicit(), std::get<2>(L)); 8033 } 8034 8035 // Look at the use_device_ptr clause information and mark the existing map 8036 // entries as such. If there is no map information for an entry in the 8037 // use_device_ptr list, we create one with map type 'alloc' and zero size 8038 // section. It is the user fault if that was not mapped before. If there is 8039 // no map information and the pointer is a struct member, then we defer the 8040 // emission of that entry until the whole struct has been processed. 8041 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8042 DeferredInfo; 8043 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8044 8045 for (const auto *C : 8046 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8047 for (const auto L : C->component_lists()) { 8048 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8049 std::get<1>(L); 8050 assert(!Components.empty() && 8051 "Not expecting empty list of components!"); 8052 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8053 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8054 const Expr *IE = Components.back().getAssociatedExpression(); 8055 // If the first component is a member expression, we have to look into 8056 // 'this', which maps to null in the map of map information. Otherwise 8057 // look directly for the information. 8058 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8059 8060 // We potentially have map information for this declaration already. 8061 // Look for the first set of components that refer to it. 8062 if (It != Info.end()) { 8063 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8064 return MI.Components.back().getAssociatedDeclaration() == VD; 8065 }); 8066 // If we found a map entry, signal that the pointer has to be returned 8067 // and move on to the next declaration. 8068 // Exclude cases where the base pointer is mapped as array subscript, 8069 // array section or array shaping. The base address is passed as a 8070 // pointer to base in this case and cannot be used as a base for 8071 // use_device_ptr list item. 8072 if (CI != It->second.end()) { 8073 auto PrevCI = std::next(CI->Components.rbegin()); 8074 const auto *VarD = dyn_cast<VarDecl>(VD); 8075 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8076 isa<MemberExpr>(IE) || 8077 !VD->getType().getNonReferenceType()->isPointerType() || 8078 PrevCI == CI->Components.rend() || 8079 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8080 VarD->hasLocalStorage()) { 8081 CI->ReturnDevicePointer = true; 8082 continue; 8083 } 8084 } 8085 } 8086 8087 // We didn't find any match in our map information - generate a zero 8088 // size array section - if the pointer is a struct member we defer this 8089 // action until the whole struct has been processed. 8090 if (isa<MemberExpr>(IE)) { 8091 // Insert the pointer into Info to be processed by 8092 // generateInfoForComponentList. Because it is a member pointer 8093 // without a pointee, no entry will be generated for it, therefore 8094 // we need to generate one after the whole struct has been processed. 8095 // Nonetheless, generateInfoForComponentList must be called to take 8096 // the pointer into account for the calculation of the range of the 8097 // partial struct. 8098 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8099 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8100 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8101 } else { 8102 llvm::Value *Ptr = 8103 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8104 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8105 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8106 UseDevicePtrCombinedInfo.Sizes.push_back( 8107 llvm::Constant::getNullValue(CGF.Int64Ty)); 8108 UseDevicePtrCombinedInfo.Types.push_back( 8109 OMP_MAP_RETURN_PARAM | 8110 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8111 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8112 } 8113 } 8114 } 8115 8116 // Look at the use_device_addr clause information and mark the existing map 8117 // entries as such. If there is no map information for an entry in the 8118 // use_device_addr list, we create one with map type 'alloc' and zero size 8119 // section. It is the user fault if that was not mapped before. If there is 8120 // no map information and the pointer is a struct member, then we defer the 8121 // emission of that entry until the whole struct has been processed. 8122 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8123 for (const auto *C : 8124 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8125 for (const auto L : C->component_lists()) { 8126 assert(!std::get<1>(L).empty() && 8127 "Not expecting empty list of components!"); 8128 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8129 if (!Processed.insert(VD).second) 8130 continue; 8131 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8132 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8133 // If the first component is a member expression, we have to look into 8134 // 'this', which maps to null in the map of map information. Otherwise 8135 // look directly for the information. 8136 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8137 8138 // We potentially have map information for this declaration already. 8139 // Look for the first set of components that refer to it. 8140 if (It != Info.end()) { 8141 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8142 return MI.Components.back().getAssociatedDeclaration() == VD; 8143 }); 8144 // If we found a map entry, signal that the pointer has to be returned 8145 // and move on to the next declaration. 8146 if (CI != It->second.end()) { 8147 CI->ReturnDevicePointer = true; 8148 continue; 8149 } 8150 } 8151 8152 // We didn't find any match in our map information - generate a zero 8153 // size array section - if the pointer is a struct member we defer this 8154 // action until the whole struct has been processed. 8155 if (isa<MemberExpr>(IE)) { 8156 // Insert the pointer into Info to be processed by 8157 // generateInfoForComponentList. Because it is a member pointer 8158 // without a pointee, no entry will be generated for it, therefore 8159 // we need to generate one after the whole struct has been processed. 8160 // Nonetheless, generateInfoForComponentList must be called to take 8161 // the pointer into account for the calculation of the range of the 8162 // partial struct. 8163 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8164 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8165 nullptr, /*ForDeviceAddr=*/true); 8166 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8167 } else { 8168 llvm::Value *Ptr; 8169 if (IE->isGLValue()) 8170 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8171 else 8172 Ptr = CGF.EmitScalarExpr(IE); 8173 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8174 CombinedInfo.Pointers.push_back(Ptr); 8175 CombinedInfo.Sizes.push_back( 8176 llvm::Constant::getNullValue(CGF.Int64Ty)); 8177 CombinedInfo.Types.push_back( 8178 OMP_MAP_RETURN_PARAM | 8179 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8180 CombinedInfo.Mappers.push_back(nullptr); 8181 } 8182 } 8183 } 8184 8185 for (const auto &M : Info) { 8186 // We need to know when we generate information for the first component 8187 // associated with a capture, because the mapping flags depend on it. 8188 bool IsFirstComponentList = !NotTargetParams; 8189 8190 // Temporary generated information. 8191 MapCombinedInfoTy CurInfo; 8192 StructRangeInfoTy PartialStruct; 8193 8194 for (const MapInfo &L : M.second) { 8195 assert(!L.Components.empty() && 8196 "Not expecting declaration with no component lists."); 8197 8198 // Remember the current base pointer index. 8199 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8200 generateInfoForComponentList(L.MapType, L.MapModifiers, 8201 L.MotionModifiers, L.Components, CurInfo, 8202 PartialStruct, IsFirstComponentList, 8203 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8204 8205 // If this entry relates with a device pointer, set the relevant 8206 // declaration and add the 'return pointer' flag. 8207 if (L.ReturnDevicePointer) { 8208 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8209 "Unexpected number of mapped base pointers."); 8210 8211 const ValueDecl *RelevantVD = 8212 L.Components.back().getAssociatedDeclaration(); 8213 assert(RelevantVD && 8214 "No relevant declaration related with device pointer??"); 8215 8216 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8217 RelevantVD); 8218 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8219 } 8220 IsFirstComponentList = false; 8221 } 8222 8223 // Append any pending zero-length pointers which are struct members and 8224 // used with use_device_ptr or use_device_addr. 8225 auto CI = DeferredInfo.find(M.first); 8226 if (CI != DeferredInfo.end()) { 8227 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8228 llvm::Value *BasePtr; 8229 llvm::Value *Ptr; 8230 if (L.ForDeviceAddr) { 8231 if (L.IE->isGLValue()) 8232 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8233 else 8234 Ptr = this->CGF.EmitScalarExpr(L.IE); 8235 BasePtr = Ptr; 8236 // Entry is RETURN_PARAM. Also, set the placeholder value 8237 // MEMBER_OF=FFFF so that the entry is later updated with the 8238 // correct value of MEMBER_OF. 8239 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8240 } else { 8241 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8242 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8243 L.IE->getExprLoc()); 8244 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8245 // value MEMBER_OF=FFFF so that the entry is later updated with the 8246 // correct value of MEMBER_OF. 8247 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8248 OMP_MAP_MEMBER_OF); 8249 } 8250 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8251 CurInfo.Pointers.push_back(Ptr); 8252 CurInfo.Sizes.push_back( 8253 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8254 CurInfo.Mappers.push_back(nullptr); 8255 } 8256 } 8257 8258 // If there is an entry in PartialStruct it means we have a struct with 8259 // individual members mapped. Emit an extra combined entry. 8260 if (PartialStruct.Base.isValid()) 8261 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, 8262 NotTargetParams); 8263 8264 // We need to append the results of this capture to what we already have. 8265 CombinedInfo.append(CurInfo); 8266 } 8267 // Append data for use_device_ptr clauses. 8268 CombinedInfo.append(UseDevicePtrCombinedInfo); 8269 } 8270 8271 /// Generate all the base pointers, section pointers, sizes, map types, and 8272 /// mappers for the extracted map clauses of user-defined mapper (all included 8273 /// in \a CombinedInfo). 8274 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8275 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8276 "Expect a declare mapper directive"); 8277 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8278 // We have to process the component lists that relate with the same 8279 // declaration in a single chunk so that we can generate the map flags 8280 // correctly. Therefore, we organize all lists in a map. 8281 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8282 8283 // Fill the information map for map clauses. 8284 for (const auto *C : CurMapperDir->clauselists()) { 8285 const auto *MC = cast<OMPMapClause>(C); 8286 for (const auto L : MC->component_lists()) { 8287 const ValueDecl *VD = 8288 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8289 : nullptr; 8290 // Get the corresponding user-defined mapper. 8291 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8292 MC->getMapTypeModifiers(), llvm::None, 8293 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8294 std::get<2>(L)); 8295 } 8296 } 8297 8298 for (const auto &M : Info) { 8299 // We need to know when we generate information for the first component 8300 // associated with a capture, because the mapping flags depend on it. 8301 bool IsFirstComponentList = true; 8302 8303 // Temporary generated information. 8304 MapCombinedInfoTy CurInfo; 8305 StructRangeInfoTy PartialStruct; 8306 8307 for (const MapInfo &L : M.second) { 8308 assert(!L.Components.empty() && 8309 "Not expecting declaration with no component lists."); 8310 generateInfoForComponentList(L.MapType, L.MapModifiers, 8311 L.MotionModifiers, L.Components, CurInfo, 8312 PartialStruct, IsFirstComponentList, 8313 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8314 IsFirstComponentList = false; 8315 } 8316 8317 // If there is an entry in PartialStruct it means we have a struct with 8318 // individual members mapped. Emit an extra combined entry. 8319 if (PartialStruct.Base.isValid()) 8320 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8321 8322 // We need to append the results of this capture to what we already have. 8323 CombinedInfo.append(CurInfo); 8324 } 8325 } 8326 8327 /// Emit capture info for lambdas for variables captured by reference. 8328 void generateInfoForLambdaCaptures( 8329 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8330 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8331 const auto *RD = VD->getType() 8332 .getCanonicalType() 8333 .getNonReferenceType() 8334 ->getAsCXXRecordDecl(); 8335 if (!RD || !RD->isLambda()) 8336 return; 8337 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8338 LValue VDLVal = CGF.MakeAddrLValue( 8339 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8340 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8341 FieldDecl *ThisCapture = nullptr; 8342 RD->getCaptureFields(Captures, ThisCapture); 8343 if (ThisCapture) { 8344 LValue ThisLVal = 8345 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8346 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8347 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8348 VDLVal.getPointer(CGF)); 8349 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8350 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8351 CombinedInfo.Sizes.push_back( 8352 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8353 CGF.Int64Ty, /*isSigned=*/true)); 8354 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8355 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8356 CombinedInfo.Mappers.push_back(nullptr); 8357 } 8358 for (const LambdaCapture &LC : RD->captures()) { 8359 if (!LC.capturesVariable()) 8360 continue; 8361 const VarDecl *VD = LC.getCapturedVar(); 8362 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8363 continue; 8364 auto It = Captures.find(VD); 8365 assert(It != Captures.end() && "Found lambda capture without field."); 8366 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8367 if (LC.getCaptureKind() == LCK_ByRef) { 8368 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8369 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8370 VDLVal.getPointer(CGF)); 8371 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8372 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8373 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8374 CGF.getTypeSize( 8375 VD->getType().getCanonicalType().getNonReferenceType()), 8376 CGF.Int64Ty, /*isSigned=*/true)); 8377 } else { 8378 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8379 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8380 VDLVal.getPointer(CGF)); 8381 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8382 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8383 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8384 } 8385 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8386 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8387 CombinedInfo.Mappers.push_back(nullptr); 8388 } 8389 } 8390 8391 /// Set correct indices for lambdas captures. 8392 void adjustMemberOfForLambdaCaptures( 8393 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8394 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8395 MapFlagsArrayTy &Types) const { 8396 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8397 // Set correct member_of idx for all implicit lambda captures. 8398 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8399 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8400 continue; 8401 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8402 assert(BasePtr && "Unable to find base lambda address."); 8403 int TgtIdx = -1; 8404 for (unsigned J = I; J > 0; --J) { 8405 unsigned Idx = J - 1; 8406 if (Pointers[Idx] != BasePtr) 8407 continue; 8408 TgtIdx = Idx; 8409 break; 8410 } 8411 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8412 // All other current entries will be MEMBER_OF the combined entry 8413 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8414 // 0xFFFF in the MEMBER_OF field). 8415 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8416 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8417 } 8418 } 8419 8420 /// Generate the base pointers, section pointers, sizes, map types, and 8421 /// mappers associated to a given capture (all included in \a CombinedInfo). 8422 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8423 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8424 StructRangeInfoTy &PartialStruct) const { 8425 assert(!Cap->capturesVariableArrayType() && 8426 "Not expecting to generate map info for a variable array type!"); 8427 8428 // We need to know when we generating information for the first component 8429 const ValueDecl *VD = Cap->capturesThis() 8430 ? nullptr 8431 : Cap->getCapturedVar()->getCanonicalDecl(); 8432 8433 // If this declaration appears in a is_device_ptr clause we just have to 8434 // pass the pointer by value. If it is a reference to a declaration, we just 8435 // pass its value. 8436 if (DevPointersMap.count(VD)) { 8437 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8438 CombinedInfo.Pointers.push_back(Arg); 8439 CombinedInfo.Sizes.push_back( 8440 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8441 CGF.Int64Ty, /*isSigned=*/true)); 8442 CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8443 CombinedInfo.Mappers.push_back(nullptr); 8444 return; 8445 } 8446 8447 using MapData = 8448 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8449 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8450 const ValueDecl *>; 8451 SmallVector<MapData, 4> DeclComponentLists; 8452 assert(CurDir.is<const OMPExecutableDirective *>() && 8453 "Expect a executable directive"); 8454 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8455 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8456 for (const auto L : C->decl_component_lists(VD)) { 8457 const ValueDecl *VDecl, *Mapper; 8458 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8459 std::tie(VDecl, Components, Mapper) = L; 8460 assert(VDecl == VD && "We got information for the wrong declaration??"); 8461 assert(!Components.empty() && 8462 "Not expecting declaration with no component lists."); 8463 DeclComponentLists.emplace_back(Components, C->getMapType(), 8464 C->getMapTypeModifiers(), 8465 C->isImplicit(), Mapper); 8466 } 8467 } 8468 8469 // Find overlapping elements (including the offset from the base element). 8470 llvm::SmallDenseMap< 8471 const MapData *, 8472 llvm::SmallVector< 8473 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8474 4> 8475 OverlappedData; 8476 size_t Count = 0; 8477 for (const MapData &L : DeclComponentLists) { 8478 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8479 OpenMPMapClauseKind MapType; 8480 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8481 bool IsImplicit; 8482 const ValueDecl *Mapper; 8483 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8484 ++Count; 8485 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8486 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8487 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1; 8488 auto CI = Components.rbegin(); 8489 auto CE = Components.rend(); 8490 auto SI = Components1.rbegin(); 8491 auto SE = Components1.rend(); 8492 for (; CI != CE && SI != SE; ++CI, ++SI) { 8493 if (CI->getAssociatedExpression()->getStmtClass() != 8494 SI->getAssociatedExpression()->getStmtClass()) 8495 break; 8496 // Are we dealing with different variables/fields? 8497 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8498 break; 8499 } 8500 // Found overlapping if, at least for one component, reached the head of 8501 // the components list. 8502 if (CI == CE || SI == SE) { 8503 assert((CI != CE || SI != SE) && 8504 "Unexpected full match of the mapping components."); 8505 const MapData &BaseData = CI == CE ? L : L1; 8506 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8507 SI == SE ? Components : Components1; 8508 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8509 OverlappedElements.getSecond().push_back(SubData); 8510 } 8511 } 8512 } 8513 // Sort the overlapped elements for each item. 8514 llvm::SmallVector<const FieldDecl *, 4> Layout; 8515 if (!OverlappedData.empty()) { 8516 if (const auto *CRD = 8517 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8518 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8519 else { 8520 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8521 Layout.append(RD->field_begin(), RD->field_end()); 8522 } 8523 } 8524 for (auto &Pair : OverlappedData) { 8525 llvm::sort( 8526 Pair.getSecond(), 8527 [&Layout]( 8528 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8529 OMPClauseMappableExprCommon::MappableExprComponentListRef 8530 Second) { 8531 auto CI = First.rbegin(); 8532 auto CE = First.rend(); 8533 auto SI = Second.rbegin(); 8534 auto SE = Second.rend(); 8535 for (; CI != CE && SI != SE; ++CI, ++SI) { 8536 if (CI->getAssociatedExpression()->getStmtClass() != 8537 SI->getAssociatedExpression()->getStmtClass()) 8538 break; 8539 // Are we dealing with different variables/fields? 8540 if (CI->getAssociatedDeclaration() != 8541 SI->getAssociatedDeclaration()) 8542 break; 8543 } 8544 8545 // Lists contain the same elements. 8546 if (CI == CE && SI == SE) 8547 return false; 8548 8549 // List with less elements is less than list with more elements. 8550 if (CI == CE || SI == SE) 8551 return CI == CE; 8552 8553 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8554 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8555 if (FD1->getParent() == FD2->getParent()) 8556 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8557 const auto It = 8558 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8559 return FD == FD1 || FD == FD2; 8560 }); 8561 return *It == FD1; 8562 }); 8563 } 8564 8565 // Associated with a capture, because the mapping flags depend on it. 8566 // Go through all of the elements with the overlapped elements. 8567 for (const auto &Pair : OverlappedData) { 8568 const MapData &L = *Pair.getFirst(); 8569 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8570 OpenMPMapClauseKind MapType; 8571 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8572 bool IsImplicit; 8573 const ValueDecl *Mapper; 8574 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8575 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8576 OverlappedComponents = Pair.getSecond(); 8577 bool IsFirstComponentList = true; 8578 generateInfoForComponentList( 8579 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8580 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8581 /*ForDeviceAddr=*/false, OverlappedComponents); 8582 } 8583 // Go through other elements without overlapped elements. 8584 bool IsFirstComponentList = OverlappedData.empty(); 8585 for (const MapData &L : DeclComponentLists) { 8586 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8587 OpenMPMapClauseKind MapType; 8588 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8589 bool IsImplicit; 8590 const ValueDecl *Mapper; 8591 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8592 auto It = OverlappedData.find(&L); 8593 if (It == OverlappedData.end()) 8594 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8595 Components, CombinedInfo, PartialStruct, 8596 IsFirstComponentList, IsImplicit, Mapper); 8597 IsFirstComponentList = false; 8598 } 8599 } 8600 8601 /// Generate the default map information for a given capture \a CI, 8602 /// record field declaration \a RI and captured value \a CV. 8603 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8604 const FieldDecl &RI, llvm::Value *CV, 8605 MapCombinedInfoTy &CombinedInfo) const { 8606 bool IsImplicit = true; 8607 // Do the default mapping. 8608 if (CI.capturesThis()) { 8609 CombinedInfo.BasePointers.push_back(CV); 8610 CombinedInfo.Pointers.push_back(CV); 8611 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8612 CombinedInfo.Sizes.push_back( 8613 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8614 CGF.Int64Ty, /*isSigned=*/true)); 8615 // Default map type. 8616 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8617 } else if (CI.capturesVariableByCopy()) { 8618 CombinedInfo.BasePointers.push_back(CV); 8619 CombinedInfo.Pointers.push_back(CV); 8620 if (!RI.getType()->isAnyPointerType()) { 8621 // We have to signal to the runtime captures passed by value that are 8622 // not pointers. 8623 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8624 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8625 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8626 } else { 8627 // Pointers are implicitly mapped with a zero size and no flags 8628 // (other than first map that is added for all implicit maps). 8629 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8630 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8631 } 8632 const VarDecl *VD = CI.getCapturedVar(); 8633 auto I = FirstPrivateDecls.find(VD); 8634 if (I != FirstPrivateDecls.end()) 8635 IsImplicit = I->getSecond(); 8636 } else { 8637 assert(CI.capturesVariable() && "Expected captured reference."); 8638 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8639 QualType ElementType = PtrTy->getPointeeType(); 8640 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8641 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8642 // The default map type for a scalar/complex type is 'to' because by 8643 // default the value doesn't have to be retrieved. For an aggregate 8644 // type, the default is 'tofrom'. 8645 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8646 const VarDecl *VD = CI.getCapturedVar(); 8647 auto I = FirstPrivateDecls.find(VD); 8648 if (I != FirstPrivateDecls.end() && 8649 VD->getType().isConstant(CGF.getContext())) { 8650 llvm::Constant *Addr = 8651 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8652 // Copy the value of the original variable to the new global copy. 8653 CGF.Builder.CreateMemCpy( 8654 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8655 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8656 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 8657 // Use new global variable as the base pointers. 8658 CombinedInfo.BasePointers.push_back(Addr); 8659 CombinedInfo.Pointers.push_back(Addr); 8660 } else { 8661 CombinedInfo.BasePointers.push_back(CV); 8662 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8663 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8664 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8665 AlignmentSource::Decl)); 8666 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8667 } else { 8668 CombinedInfo.Pointers.push_back(CV); 8669 } 8670 } 8671 if (I != FirstPrivateDecls.end()) 8672 IsImplicit = I->getSecond(); 8673 } 8674 // Every default map produces a single argument which is a target parameter. 8675 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 8676 8677 // Add flag stating this is an implicit map. 8678 if (IsImplicit) 8679 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 8680 8681 // No user-defined mapper for default mapping. 8682 CombinedInfo.Mappers.push_back(nullptr); 8683 } 8684 }; 8685 } // anonymous namespace 8686 8687 /// Emit the arrays used to pass the captures and map information to the 8688 /// offloading runtime library. If there is no map or capture information, 8689 /// return nullptr by reference. 8690 static void 8691 emitOffloadingArrays(CodeGenFunction &CGF, 8692 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8693 CGOpenMPRuntime::TargetDataInfo &Info) { 8694 CodeGenModule &CGM = CGF.CGM; 8695 ASTContext &Ctx = CGF.getContext(); 8696 8697 // Reset the array information. 8698 Info.clearArrayInfo(); 8699 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8700 8701 if (Info.NumberOfPtrs) { 8702 // Detect if we have any capture size requiring runtime evaluation of the 8703 // size so that a constant array could be eventually used. 8704 bool hasRuntimeEvaluationCaptureSize = false; 8705 for (llvm::Value *S : CombinedInfo.Sizes) 8706 if (!isa<llvm::Constant>(S)) { 8707 hasRuntimeEvaluationCaptureSize = true; 8708 break; 8709 } 8710 8711 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8712 QualType PointerArrayType = Ctx.getConstantArrayType( 8713 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8714 /*IndexTypeQuals=*/0); 8715 8716 Info.BasePointersArray = 8717 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8718 Info.PointersArray = 8719 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8720 Address MappersArray = 8721 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 8722 Info.MappersArray = MappersArray.getPointer(); 8723 8724 // If we don't have any VLA types or other types that require runtime 8725 // evaluation, we can use a constant array for the map sizes, otherwise we 8726 // need to fill up the arrays as we do for the pointers. 8727 QualType Int64Ty = 8728 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8729 if (hasRuntimeEvaluationCaptureSize) { 8730 QualType SizeArrayType = Ctx.getConstantArrayType( 8731 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8732 /*IndexTypeQuals=*/0); 8733 Info.SizesArray = 8734 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8735 } else { 8736 // We expect all the sizes to be constant, so we collect them to create 8737 // a constant array. 8738 SmallVector<llvm::Constant *, 16> ConstSizes; 8739 for (llvm::Value *S : CombinedInfo.Sizes) 8740 ConstSizes.push_back(cast<llvm::Constant>(S)); 8741 8742 auto *SizesArrayInit = llvm::ConstantArray::get( 8743 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8744 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8745 auto *SizesArrayGbl = new llvm::GlobalVariable( 8746 CGM.getModule(), SizesArrayInit->getType(), 8747 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8748 SizesArrayInit, Name); 8749 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8750 Info.SizesArray = SizesArrayGbl; 8751 } 8752 8753 // The map types are always constant so we don't need to generate code to 8754 // fill arrays. Instead, we create an array constant. 8755 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 8756 llvm::copy(CombinedInfo.Types, Mapping.begin()); 8757 llvm::Constant *MapTypesArrayInit = 8758 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8759 std::string MaptypesName = 8760 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8761 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8762 CGM.getModule(), MapTypesArrayInit->getType(), 8763 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8764 MapTypesArrayInit, MaptypesName); 8765 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8766 Info.MapTypesArray = MapTypesArrayGbl; 8767 8768 // If there's a present map type modifier, it must not be applied to the end 8769 // of a region, so generate a separate map type array in that case. 8770 if (Info.separateBeginEndCalls()) { 8771 bool EndMapTypesDiffer = false; 8772 for (uint64_t &Type : Mapping) { 8773 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 8774 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 8775 EndMapTypesDiffer = true; 8776 } 8777 } 8778 if (EndMapTypesDiffer) { 8779 MapTypesArrayInit = 8780 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8781 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8782 MapTypesArrayGbl = new llvm::GlobalVariable( 8783 CGM.getModule(), MapTypesArrayInit->getType(), 8784 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8785 MapTypesArrayInit, MaptypesName); 8786 MapTypesArrayGbl->setUnnamedAddr( 8787 llvm::GlobalValue::UnnamedAddr::Global); 8788 Info.MapTypesArrayEnd = MapTypesArrayGbl; 8789 } 8790 } 8791 8792 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8793 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 8794 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8795 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8796 Info.BasePointersArray, 0, I); 8797 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8798 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8799 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8800 CGF.Builder.CreateStore(BPVal, BPAddr); 8801 8802 if (Info.requiresDevicePointerInfo()) 8803 if (const ValueDecl *DevVD = 8804 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 8805 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8806 8807 llvm::Value *PVal = CombinedInfo.Pointers[I]; 8808 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8809 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8810 Info.PointersArray, 0, I); 8811 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8812 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8813 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8814 CGF.Builder.CreateStore(PVal, PAddr); 8815 8816 if (hasRuntimeEvaluationCaptureSize) { 8817 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8818 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8819 Info.SizesArray, 8820 /*Idx0=*/0, 8821 /*Idx1=*/I); 8822 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8823 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 8824 CGM.Int64Ty, 8825 /*isSigned=*/true), 8826 SAddr); 8827 } 8828 8829 // Fill up the mapper array. 8830 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 8831 if (CombinedInfo.Mappers[I]) { 8832 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8833 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8834 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 8835 Info.HasMapper = true; 8836 } 8837 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 8838 CGF.Builder.CreateStore(MFunc, MAddr); 8839 } 8840 } 8841 } 8842 8843 /// Emit the arguments to be passed to the runtime library based on the 8844 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 8845 /// ForEndCall, emit map types to be passed for the end of the region instead of 8846 /// the beginning. 8847 static void emitOffloadingArraysArgument( 8848 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8849 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8850 llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg, 8851 CGOpenMPRuntime::TargetDataInfo &Info, bool ForEndCall = false) { 8852 assert((!ForEndCall || Info.separateBeginEndCalls()) && 8853 "expected region end call to runtime only when end call is separate"); 8854 CodeGenModule &CGM = CGF.CGM; 8855 if (Info.NumberOfPtrs) { 8856 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8857 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8858 Info.BasePointersArray, 8859 /*Idx0=*/0, /*Idx1=*/0); 8860 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8861 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8862 Info.PointersArray, 8863 /*Idx0=*/0, 8864 /*Idx1=*/0); 8865 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8866 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8867 /*Idx0=*/0, /*Idx1=*/0); 8868 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8869 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8870 ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 8871 : Info.MapTypesArray, 8872 /*Idx0=*/0, 8873 /*Idx1=*/0); 8874 MappersArrayArg = 8875 Info.HasMapper 8876 ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy) 8877 : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8878 } else { 8879 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8880 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8881 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8882 MapTypesArrayArg = 8883 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8884 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8885 } 8886 } 8887 8888 /// Check for inner distribute directive. 8889 static const OMPExecutableDirective * 8890 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8891 const auto *CS = D.getInnermostCapturedStmt(); 8892 const auto *Body = 8893 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8894 const Stmt *ChildStmt = 8895 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8896 8897 if (const auto *NestedDir = 8898 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8899 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8900 switch (D.getDirectiveKind()) { 8901 case OMPD_target: 8902 if (isOpenMPDistributeDirective(DKind)) 8903 return NestedDir; 8904 if (DKind == OMPD_teams) { 8905 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8906 /*IgnoreCaptured=*/true); 8907 if (!Body) 8908 return nullptr; 8909 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8910 if (const auto *NND = 8911 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8912 DKind = NND->getDirectiveKind(); 8913 if (isOpenMPDistributeDirective(DKind)) 8914 return NND; 8915 } 8916 } 8917 return nullptr; 8918 case OMPD_target_teams: 8919 if (isOpenMPDistributeDirective(DKind)) 8920 return NestedDir; 8921 return nullptr; 8922 case OMPD_target_parallel: 8923 case OMPD_target_simd: 8924 case OMPD_target_parallel_for: 8925 case OMPD_target_parallel_for_simd: 8926 return nullptr; 8927 case OMPD_target_teams_distribute: 8928 case OMPD_target_teams_distribute_simd: 8929 case OMPD_target_teams_distribute_parallel_for: 8930 case OMPD_target_teams_distribute_parallel_for_simd: 8931 case OMPD_parallel: 8932 case OMPD_for: 8933 case OMPD_parallel_for: 8934 case OMPD_parallel_master: 8935 case OMPD_parallel_sections: 8936 case OMPD_for_simd: 8937 case OMPD_parallel_for_simd: 8938 case OMPD_cancel: 8939 case OMPD_cancellation_point: 8940 case OMPD_ordered: 8941 case OMPD_threadprivate: 8942 case OMPD_allocate: 8943 case OMPD_task: 8944 case OMPD_simd: 8945 case OMPD_sections: 8946 case OMPD_section: 8947 case OMPD_single: 8948 case OMPD_master: 8949 case OMPD_critical: 8950 case OMPD_taskyield: 8951 case OMPD_barrier: 8952 case OMPD_taskwait: 8953 case OMPD_taskgroup: 8954 case OMPD_atomic: 8955 case OMPD_flush: 8956 case OMPD_depobj: 8957 case OMPD_scan: 8958 case OMPD_teams: 8959 case OMPD_target_data: 8960 case OMPD_target_exit_data: 8961 case OMPD_target_enter_data: 8962 case OMPD_distribute: 8963 case OMPD_distribute_simd: 8964 case OMPD_distribute_parallel_for: 8965 case OMPD_distribute_parallel_for_simd: 8966 case OMPD_teams_distribute: 8967 case OMPD_teams_distribute_simd: 8968 case OMPD_teams_distribute_parallel_for: 8969 case OMPD_teams_distribute_parallel_for_simd: 8970 case OMPD_target_update: 8971 case OMPD_declare_simd: 8972 case OMPD_declare_variant: 8973 case OMPD_begin_declare_variant: 8974 case OMPD_end_declare_variant: 8975 case OMPD_declare_target: 8976 case OMPD_end_declare_target: 8977 case OMPD_declare_reduction: 8978 case OMPD_declare_mapper: 8979 case OMPD_taskloop: 8980 case OMPD_taskloop_simd: 8981 case OMPD_master_taskloop: 8982 case OMPD_master_taskloop_simd: 8983 case OMPD_parallel_master_taskloop: 8984 case OMPD_parallel_master_taskloop_simd: 8985 case OMPD_requires: 8986 case OMPD_unknown: 8987 default: 8988 llvm_unreachable("Unexpected directive."); 8989 } 8990 } 8991 8992 return nullptr; 8993 } 8994 8995 /// Emit the user-defined mapper function. The code generation follows the 8996 /// pattern in the example below. 8997 /// \code 8998 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8999 /// void *base, void *begin, 9000 /// int64_t size, int64_t type) { 9001 /// // Allocate space for an array section first. 9002 /// if (size > 1 && !maptype.IsDelete) 9003 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9004 /// size*sizeof(Ty), clearToFrom(type)); 9005 /// // Map members. 9006 /// for (unsigned i = 0; i < size; i++) { 9007 /// // For each component specified by this mapper: 9008 /// for (auto c : all_components) { 9009 /// if (c.hasMapper()) 9010 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9011 /// c.arg_type); 9012 /// else 9013 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9014 /// c.arg_begin, c.arg_size, c.arg_type); 9015 /// } 9016 /// } 9017 /// // Delete the array section. 9018 /// if (size > 1 && maptype.IsDelete) 9019 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9020 /// size*sizeof(Ty), clearToFrom(type)); 9021 /// } 9022 /// \endcode 9023 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9024 CodeGenFunction *CGF) { 9025 if (UDMMap.count(D) > 0) 9026 return; 9027 ASTContext &C = CGM.getContext(); 9028 QualType Ty = D->getType(); 9029 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9030 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9031 auto *MapperVarDecl = 9032 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9033 SourceLocation Loc = D->getLocation(); 9034 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9035 9036 // Prepare mapper function arguments and attributes. 9037 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9038 C.VoidPtrTy, ImplicitParamDecl::Other); 9039 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9040 ImplicitParamDecl::Other); 9041 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9042 C.VoidPtrTy, ImplicitParamDecl::Other); 9043 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9044 ImplicitParamDecl::Other); 9045 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9046 ImplicitParamDecl::Other); 9047 FunctionArgList Args; 9048 Args.push_back(&HandleArg); 9049 Args.push_back(&BaseArg); 9050 Args.push_back(&BeginArg); 9051 Args.push_back(&SizeArg); 9052 Args.push_back(&TypeArg); 9053 const CGFunctionInfo &FnInfo = 9054 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9055 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9056 SmallString<64> TyStr; 9057 llvm::raw_svector_ostream Out(TyStr); 9058 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9059 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9060 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9061 Name, &CGM.getModule()); 9062 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9063 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9064 // Start the mapper function code generation. 9065 CodeGenFunction MapperCGF(CGM); 9066 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9067 // Compute the starting and end addreses of array elements. 9068 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9069 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9070 C.getPointerType(Int64Ty), Loc); 9071 // Convert the size in bytes into the number of array elements. 9072 Size = MapperCGF.Builder.CreateExactUDiv( 9073 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9074 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9075 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9076 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9077 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9078 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9079 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9080 C.getPointerType(Int64Ty), Loc); 9081 // Prepare common arguments for array initiation and deletion. 9082 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9083 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9084 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9085 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9086 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9087 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9088 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9089 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9090 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9091 9092 // Emit array initiation if this is an array section and \p MapType indicates 9093 // that memory allocation is required. 9094 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9095 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9096 ElementSize, HeadBB, /*IsInit=*/true); 9097 9098 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9099 9100 // Emit the loop header block. 9101 MapperCGF.EmitBlock(HeadBB); 9102 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9103 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9104 // Evaluate whether the initial condition is satisfied. 9105 llvm::Value *IsEmpty = 9106 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9107 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9108 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9109 9110 // Emit the loop body block. 9111 MapperCGF.EmitBlock(BodyBB); 9112 llvm::BasicBlock *LastBB = BodyBB; 9113 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9114 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9115 PtrPHI->addIncoming(PtrBegin, EntryBB); 9116 Address PtrCurrent = 9117 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9118 .getAlignment() 9119 .alignmentOfArrayElement(ElementSize)); 9120 // Privatize the declared variable of mapper to be the current array element. 9121 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9122 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9123 return MapperCGF 9124 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9125 .getAddress(MapperCGF); 9126 }); 9127 (void)Scope.Privatize(); 9128 9129 // Get map clause information. Fill up the arrays with all mapped variables. 9130 MappableExprsHandler::MapCombinedInfoTy Info; 9131 MappableExprsHandler MEHandler(*D, MapperCGF); 9132 MEHandler.generateAllInfoForMapper(Info); 9133 9134 // Call the runtime API __tgt_mapper_num_components to get the number of 9135 // pre-existing components. 9136 llvm::Value *OffloadingArgs[] = {Handle}; 9137 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9138 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9139 OMPRTL___tgt_mapper_num_components), 9140 OffloadingArgs); 9141 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9142 PreviousSize, 9143 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9144 9145 // Fill up the runtime mapper handle for all components. 9146 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9147 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9148 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9149 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9150 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9151 llvm::Value *CurSizeArg = Info.Sizes[I]; 9152 9153 // Extract the MEMBER_OF field from the map type. 9154 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9155 MapperCGF.EmitBlock(MemberBB); 9156 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9157 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9158 OriMapType, 9159 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9160 llvm::BasicBlock *MemberCombineBB = 9161 MapperCGF.createBasicBlock("omp.member.combine"); 9162 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9163 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9164 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9165 // Add the number of pre-existing components to the MEMBER_OF field if it 9166 // is valid. 9167 MapperCGF.EmitBlock(MemberCombineBB); 9168 llvm::Value *CombinedMember = 9169 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9170 // Do nothing if it is not a member of previous components. 9171 MapperCGF.EmitBlock(TypeBB); 9172 llvm::PHINode *MemberMapType = 9173 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9174 MemberMapType->addIncoming(OriMapType, MemberBB); 9175 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9176 9177 // Combine the map type inherited from user-defined mapper with that 9178 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9179 // bits of the \a MapType, which is the input argument of the mapper 9180 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9181 // bits of MemberMapType. 9182 // [OpenMP 5.0], 1.2.6. map-type decay. 9183 // | alloc | to | from | tofrom | release | delete 9184 // ---------------------------------------------------------- 9185 // alloc | alloc | alloc | alloc | alloc | release | delete 9186 // to | alloc | to | alloc | to | release | delete 9187 // from | alloc | alloc | from | from | release | delete 9188 // tofrom | alloc | to | from | tofrom | release | delete 9189 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9190 MapType, 9191 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9192 MappableExprsHandler::OMP_MAP_FROM)); 9193 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9194 llvm::BasicBlock *AllocElseBB = 9195 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9196 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9197 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9198 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9199 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9200 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9201 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9202 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9203 MapperCGF.EmitBlock(AllocBB); 9204 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9205 MemberMapType, 9206 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9207 MappableExprsHandler::OMP_MAP_FROM))); 9208 MapperCGF.Builder.CreateBr(EndBB); 9209 MapperCGF.EmitBlock(AllocElseBB); 9210 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9211 LeftToFrom, 9212 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9213 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9214 // In case of to, clear OMP_MAP_FROM. 9215 MapperCGF.EmitBlock(ToBB); 9216 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9217 MemberMapType, 9218 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9219 MapperCGF.Builder.CreateBr(EndBB); 9220 MapperCGF.EmitBlock(ToElseBB); 9221 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9222 LeftToFrom, 9223 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9224 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9225 // In case of from, clear OMP_MAP_TO. 9226 MapperCGF.EmitBlock(FromBB); 9227 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9228 MemberMapType, 9229 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9230 // In case of tofrom, do nothing. 9231 MapperCGF.EmitBlock(EndBB); 9232 LastBB = EndBB; 9233 llvm::PHINode *CurMapType = 9234 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9235 CurMapType->addIncoming(AllocMapType, AllocBB); 9236 CurMapType->addIncoming(ToMapType, ToBB); 9237 CurMapType->addIncoming(FromMapType, FromBB); 9238 CurMapType->addIncoming(MemberMapType, ToElseBB); 9239 9240 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9241 CurSizeArg, CurMapType}; 9242 if (Info.Mappers[I]) { 9243 // Call the corresponding mapper function. 9244 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9245 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9246 assert(MapperFunc && "Expect a valid mapper function is available."); 9247 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9248 } else { 9249 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9250 // data structure. 9251 MapperCGF.EmitRuntimeCall( 9252 OMPBuilder.getOrCreateRuntimeFunction( 9253 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9254 OffloadingArgs); 9255 } 9256 } 9257 9258 // Update the pointer to point to the next element that needs to be mapped, 9259 // and check whether we have mapped all elements. 9260 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9261 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9262 PtrPHI->addIncoming(PtrNext, LastBB); 9263 llvm::Value *IsDone = 9264 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9265 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9266 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9267 9268 MapperCGF.EmitBlock(ExitBB); 9269 // Emit array deletion if this is an array section and \p MapType indicates 9270 // that deletion is required. 9271 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9272 ElementSize, DoneBB, /*IsInit=*/false); 9273 9274 // Emit the function exit block. 9275 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9276 MapperCGF.FinishFunction(); 9277 UDMMap.try_emplace(D, Fn); 9278 if (CGF) { 9279 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9280 Decls.second.push_back(D); 9281 } 9282 } 9283 9284 /// Emit the array initialization or deletion portion for user-defined mapper 9285 /// code generation. First, it evaluates whether an array section is mapped and 9286 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9287 /// true, and \a MapType indicates to not delete this array, array 9288 /// initialization code is generated. If \a IsInit is false, and \a MapType 9289 /// indicates to not this array, array deletion code is generated. 9290 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9291 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9292 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9293 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9294 StringRef Prefix = IsInit ? ".init" : ".del"; 9295 9296 // Evaluate if this is an array section. 9297 llvm::BasicBlock *IsDeleteBB = 9298 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9299 llvm::BasicBlock *BodyBB = 9300 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9301 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9302 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9303 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9304 9305 // Evaluate if we are going to delete this section. 9306 MapperCGF.EmitBlock(IsDeleteBB); 9307 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9308 MapType, 9309 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9310 llvm::Value *DeleteCond; 9311 if (IsInit) { 9312 DeleteCond = MapperCGF.Builder.CreateIsNull( 9313 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9314 } else { 9315 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9316 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9317 } 9318 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9319 9320 MapperCGF.EmitBlock(BodyBB); 9321 // Get the array size by multiplying element size and element number (i.e., \p 9322 // Size). 9323 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9324 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9325 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9326 // memory allocation/deletion purpose only. 9327 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9328 MapType, 9329 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9330 MappableExprsHandler::OMP_MAP_FROM))); 9331 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9332 // data structure. 9333 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9334 MapperCGF.EmitRuntimeCall( 9335 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9336 OMPRTL___tgt_push_mapper_component), 9337 OffloadingArgs); 9338 } 9339 9340 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9341 const OMPDeclareMapperDecl *D) { 9342 auto I = UDMMap.find(D); 9343 if (I != UDMMap.end()) 9344 return I->second; 9345 emitUserDefinedMapper(D); 9346 return UDMMap.lookup(D); 9347 } 9348 9349 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9350 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9351 llvm::Value *DeviceID, 9352 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9353 const OMPLoopDirective &D)> 9354 SizeEmitter) { 9355 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9356 const OMPExecutableDirective *TD = &D; 9357 // Get nested teams distribute kind directive, if any. 9358 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9359 TD = getNestedDistributeDirective(CGM.getContext(), D); 9360 if (!TD) 9361 return; 9362 const auto *LD = cast<OMPLoopDirective>(TD); 9363 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9364 PrePostActionTy &) { 9365 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9366 llvm::Value *Args[] = {DeviceID, NumIterations}; 9367 CGF.EmitRuntimeCall( 9368 OMPBuilder.getOrCreateRuntimeFunction( 9369 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9370 Args); 9371 } 9372 }; 9373 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9374 } 9375 9376 void CGOpenMPRuntime::emitTargetCall( 9377 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9378 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9379 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9380 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9381 const OMPLoopDirective &D)> 9382 SizeEmitter) { 9383 if (!CGF.HaveInsertPoint()) 9384 return; 9385 9386 assert(OutlinedFn && "Invalid outlined function!"); 9387 9388 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9389 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9390 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9391 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9392 PrePostActionTy &) { 9393 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9394 }; 9395 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9396 9397 CodeGenFunction::OMPTargetDataInfo InputInfo; 9398 llvm::Value *MapTypesArray = nullptr; 9399 // Fill up the pointer arrays and transfer execution to the device. 9400 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9401 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9402 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9403 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9404 // Reverse offloading is not supported, so just execute on the host. 9405 if (RequiresOuterTask) { 9406 CapturedVars.clear(); 9407 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9408 } 9409 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9410 return; 9411 } 9412 9413 // On top of the arrays that were filled up, the target offloading call 9414 // takes as arguments the device id as well as the host pointer. The host 9415 // pointer is used by the runtime library to identify the current target 9416 // region, so it only has to be unique and not necessarily point to 9417 // anything. It could be the pointer to the outlined function that 9418 // implements the target region, but we aren't using that so that the 9419 // compiler doesn't need to keep that, and could therefore inline the host 9420 // function if proven worthwhile during optimization. 9421 9422 // From this point on, we need to have an ID of the target region defined. 9423 assert(OutlinedFnID && "Invalid outlined function ID!"); 9424 9425 // Emit device ID if any. 9426 llvm::Value *DeviceID; 9427 if (Device.getPointer()) { 9428 assert((Device.getInt() == OMPC_DEVICE_unknown || 9429 Device.getInt() == OMPC_DEVICE_device_num) && 9430 "Expected device_num modifier."); 9431 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9432 DeviceID = 9433 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9434 } else { 9435 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9436 } 9437 9438 // Emit the number of elements in the offloading arrays. 9439 llvm::Value *PointerNum = 9440 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9441 9442 // Return value of the runtime offloading call. 9443 llvm::Value *Return; 9444 9445 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9446 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9447 9448 // Emit tripcount for the target loop-based directive. 9449 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9450 9451 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9452 // The target region is an outlined function launched by the runtime 9453 // via calls __tgt_target() or __tgt_target_teams(). 9454 // 9455 // __tgt_target() launches a target region with one team and one thread, 9456 // executing a serial region. This master thread may in turn launch 9457 // more threads within its team upon encountering a parallel region, 9458 // however, no additional teams can be launched on the device. 9459 // 9460 // __tgt_target_teams() launches a target region with one or more teams, 9461 // each with one or more threads. This call is required for target 9462 // constructs such as: 9463 // 'target teams' 9464 // 'target' / 'teams' 9465 // 'target teams distribute parallel for' 9466 // 'target parallel' 9467 // and so on. 9468 // 9469 // Note that on the host and CPU targets, the runtime implementation of 9470 // these calls simply call the outlined function without forking threads. 9471 // The outlined functions themselves have runtime calls to 9472 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9473 // the compiler in emitTeamsCall() and emitParallelCall(). 9474 // 9475 // In contrast, on the NVPTX target, the implementation of 9476 // __tgt_target_teams() launches a GPU kernel with the requested number 9477 // of teams and threads so no additional calls to the runtime are required. 9478 if (NumTeams) { 9479 // If we have NumTeams defined this means that we have an enclosed teams 9480 // region. Therefore we also expect to have NumThreads defined. These two 9481 // values should be defined in the presence of a teams directive, 9482 // regardless of having any clauses associated. If the user is using teams 9483 // but no clauses, these two values will be the default that should be 9484 // passed to the runtime library - a 32-bit integer with the value zero. 9485 assert(NumThreads && "Thread limit expression should be available along " 9486 "with number of teams."); 9487 llvm::Value *OffloadingArgs[] = {DeviceID, 9488 OutlinedFnID, 9489 PointerNum, 9490 InputInfo.BasePointersArray.getPointer(), 9491 InputInfo.PointersArray.getPointer(), 9492 InputInfo.SizesArray.getPointer(), 9493 MapTypesArray, 9494 InputInfo.MappersArray.getPointer(), 9495 NumTeams, 9496 NumThreads}; 9497 Return = CGF.EmitRuntimeCall( 9498 OMPBuilder.getOrCreateRuntimeFunction( 9499 CGM.getModule(), HasNowait 9500 ? OMPRTL___tgt_target_teams_nowait_mapper 9501 : OMPRTL___tgt_target_teams_mapper), 9502 OffloadingArgs); 9503 } else { 9504 llvm::Value *OffloadingArgs[] = {DeviceID, 9505 OutlinedFnID, 9506 PointerNum, 9507 InputInfo.BasePointersArray.getPointer(), 9508 InputInfo.PointersArray.getPointer(), 9509 InputInfo.SizesArray.getPointer(), 9510 MapTypesArray, 9511 InputInfo.MappersArray.getPointer()}; 9512 Return = CGF.EmitRuntimeCall( 9513 OMPBuilder.getOrCreateRuntimeFunction( 9514 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 9515 : OMPRTL___tgt_target_mapper), 9516 OffloadingArgs); 9517 } 9518 9519 // Check the error code and execute the host version if required. 9520 llvm::BasicBlock *OffloadFailedBlock = 9521 CGF.createBasicBlock("omp_offload.failed"); 9522 llvm::BasicBlock *OffloadContBlock = 9523 CGF.createBasicBlock("omp_offload.cont"); 9524 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9525 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9526 9527 CGF.EmitBlock(OffloadFailedBlock); 9528 if (RequiresOuterTask) { 9529 CapturedVars.clear(); 9530 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9531 } 9532 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9533 CGF.EmitBranch(OffloadContBlock); 9534 9535 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9536 }; 9537 9538 // Notify that the host version must be executed. 9539 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9540 RequiresOuterTask](CodeGenFunction &CGF, 9541 PrePostActionTy &) { 9542 if (RequiresOuterTask) { 9543 CapturedVars.clear(); 9544 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9545 } 9546 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9547 }; 9548 9549 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9550 &CapturedVars, RequiresOuterTask, 9551 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9552 // Fill up the arrays with all the captured variables. 9553 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9554 9555 // Get mappable expression information. 9556 MappableExprsHandler MEHandler(D, CGF); 9557 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9558 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9559 9560 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9561 auto CV = CapturedVars.begin(); 9562 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9563 CE = CS.capture_end(); 9564 CI != CE; ++CI, ++RI, ++CV) { 9565 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9566 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9567 9568 // VLA sizes are passed to the outlined region by copy and do not have map 9569 // information associated. 9570 if (CI->capturesVariableArrayType()) { 9571 CurInfo.BasePointers.push_back(*CV); 9572 CurInfo.Pointers.push_back(*CV); 9573 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9574 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9575 // Copy to the device as an argument. No need to retrieve it. 9576 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9577 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9578 MappableExprsHandler::OMP_MAP_IMPLICIT); 9579 CurInfo.Mappers.push_back(nullptr); 9580 } else { 9581 // If we have any information in the map clause, we use it, otherwise we 9582 // just do a default mapping. 9583 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9584 if (!CI->capturesThis()) 9585 MappedVarSet.insert(CI->getCapturedVar()); 9586 else 9587 MappedVarSet.insert(nullptr); 9588 if (CurInfo.BasePointers.empty()) 9589 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9590 // Generate correct mapping for variables captured by reference in 9591 // lambdas. 9592 if (CI->capturesVariable()) 9593 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9594 CurInfo, LambdaPointers); 9595 } 9596 // We expect to have at least an element of information for this capture. 9597 assert(!CurInfo.BasePointers.empty() && 9598 "Non-existing map pointer for capture!"); 9599 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9600 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9601 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9602 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9603 "Inconsistent map information sizes!"); 9604 9605 // If there is an entry in PartialStruct it means we have a struct with 9606 // individual members mapped. Emit an extra combined entry. 9607 if (PartialStruct.Base.isValid()) 9608 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 9609 9610 // We need to append the results of this capture to what we already have. 9611 CombinedInfo.append(CurInfo); 9612 } 9613 // Adjust MEMBER_OF flags for the lambdas captures. 9614 MEHandler.adjustMemberOfForLambdaCaptures( 9615 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 9616 CombinedInfo.Types); 9617 // Map any list items in a map clause that were not captures because they 9618 // weren't referenced within the construct. 9619 MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true, 9620 MappedVarSet); 9621 9622 TargetDataInfo Info; 9623 // Fill up the arrays and create the arguments. 9624 emitOffloadingArrays(CGF, CombinedInfo, Info); 9625 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9626 Info.PointersArray, Info.SizesArray, 9627 Info.MapTypesArray, Info.MappersArray, Info); 9628 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9629 InputInfo.BasePointersArray = 9630 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9631 InputInfo.PointersArray = 9632 Address(Info.PointersArray, CGM.getPointerAlign()); 9633 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9634 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 9635 MapTypesArray = Info.MapTypesArray; 9636 if (RequiresOuterTask) 9637 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9638 else 9639 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9640 }; 9641 9642 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9643 CodeGenFunction &CGF, PrePostActionTy &) { 9644 if (RequiresOuterTask) { 9645 CodeGenFunction::OMPTargetDataInfo InputInfo; 9646 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9647 } else { 9648 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9649 } 9650 }; 9651 9652 // If we have a target function ID it means that we need to support 9653 // offloading, otherwise, just execute on the host. We need to execute on host 9654 // regardless of the conditional in the if clause if, e.g., the user do not 9655 // specify target triples. 9656 if (OutlinedFnID) { 9657 if (IfCond) { 9658 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9659 } else { 9660 RegionCodeGenTy ThenRCG(TargetThenGen); 9661 ThenRCG(CGF); 9662 } 9663 } else { 9664 RegionCodeGenTy ElseRCG(TargetElseGen); 9665 ElseRCG(CGF); 9666 } 9667 } 9668 9669 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9670 StringRef ParentName) { 9671 if (!S) 9672 return; 9673 9674 // Codegen OMP target directives that offload compute to the device. 9675 bool RequiresDeviceCodegen = 9676 isa<OMPExecutableDirective>(S) && 9677 isOpenMPTargetExecutionDirective( 9678 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9679 9680 if (RequiresDeviceCodegen) { 9681 const auto &E = *cast<OMPExecutableDirective>(S); 9682 unsigned DeviceID; 9683 unsigned FileID; 9684 unsigned Line; 9685 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9686 FileID, Line); 9687 9688 // Is this a target region that should not be emitted as an entry point? If 9689 // so just signal we are done with this target region. 9690 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9691 ParentName, Line)) 9692 return; 9693 9694 switch (E.getDirectiveKind()) { 9695 case OMPD_target: 9696 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9697 cast<OMPTargetDirective>(E)); 9698 break; 9699 case OMPD_target_parallel: 9700 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9701 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9702 break; 9703 case OMPD_target_teams: 9704 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9705 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9706 break; 9707 case OMPD_target_teams_distribute: 9708 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9709 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9710 break; 9711 case OMPD_target_teams_distribute_simd: 9712 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9713 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9714 break; 9715 case OMPD_target_parallel_for: 9716 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9717 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9718 break; 9719 case OMPD_target_parallel_for_simd: 9720 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9721 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9722 break; 9723 case OMPD_target_simd: 9724 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9725 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9726 break; 9727 case OMPD_target_teams_distribute_parallel_for: 9728 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9729 CGM, ParentName, 9730 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9731 break; 9732 case OMPD_target_teams_distribute_parallel_for_simd: 9733 CodeGenFunction:: 9734 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9735 CGM, ParentName, 9736 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9737 break; 9738 case OMPD_parallel: 9739 case OMPD_for: 9740 case OMPD_parallel_for: 9741 case OMPD_parallel_master: 9742 case OMPD_parallel_sections: 9743 case OMPD_for_simd: 9744 case OMPD_parallel_for_simd: 9745 case OMPD_cancel: 9746 case OMPD_cancellation_point: 9747 case OMPD_ordered: 9748 case OMPD_threadprivate: 9749 case OMPD_allocate: 9750 case OMPD_task: 9751 case OMPD_simd: 9752 case OMPD_sections: 9753 case OMPD_section: 9754 case OMPD_single: 9755 case OMPD_master: 9756 case OMPD_critical: 9757 case OMPD_taskyield: 9758 case OMPD_barrier: 9759 case OMPD_taskwait: 9760 case OMPD_taskgroup: 9761 case OMPD_atomic: 9762 case OMPD_flush: 9763 case OMPD_depobj: 9764 case OMPD_scan: 9765 case OMPD_teams: 9766 case OMPD_target_data: 9767 case OMPD_target_exit_data: 9768 case OMPD_target_enter_data: 9769 case OMPD_distribute: 9770 case OMPD_distribute_simd: 9771 case OMPD_distribute_parallel_for: 9772 case OMPD_distribute_parallel_for_simd: 9773 case OMPD_teams_distribute: 9774 case OMPD_teams_distribute_simd: 9775 case OMPD_teams_distribute_parallel_for: 9776 case OMPD_teams_distribute_parallel_for_simd: 9777 case OMPD_target_update: 9778 case OMPD_declare_simd: 9779 case OMPD_declare_variant: 9780 case OMPD_begin_declare_variant: 9781 case OMPD_end_declare_variant: 9782 case OMPD_declare_target: 9783 case OMPD_end_declare_target: 9784 case OMPD_declare_reduction: 9785 case OMPD_declare_mapper: 9786 case OMPD_taskloop: 9787 case OMPD_taskloop_simd: 9788 case OMPD_master_taskloop: 9789 case OMPD_master_taskloop_simd: 9790 case OMPD_parallel_master_taskloop: 9791 case OMPD_parallel_master_taskloop_simd: 9792 case OMPD_requires: 9793 case OMPD_unknown: 9794 default: 9795 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9796 } 9797 return; 9798 } 9799 9800 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9801 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9802 return; 9803 9804 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 9805 return; 9806 } 9807 9808 // If this is a lambda function, look into its body. 9809 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9810 S = L->getBody(); 9811 9812 // Keep looking for target regions recursively. 9813 for (const Stmt *II : S->children()) 9814 scanForTargetRegionsFunctions(II, ParentName); 9815 } 9816 9817 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9818 // If emitting code for the host, we do not process FD here. Instead we do 9819 // the normal code generation. 9820 if (!CGM.getLangOpts().OpenMPIsDevice) { 9821 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9822 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9823 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9824 // Do not emit device_type(nohost) functions for the host. 9825 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9826 return true; 9827 } 9828 return false; 9829 } 9830 9831 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9832 // Try to detect target regions in the function. 9833 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9834 StringRef Name = CGM.getMangledName(GD); 9835 scanForTargetRegionsFunctions(FD->getBody(), Name); 9836 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9837 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9838 // Do not emit device_type(nohost) functions for the host. 9839 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9840 return true; 9841 } 9842 9843 // Do not to emit function if it is not marked as declare target. 9844 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9845 AlreadyEmittedTargetDecls.count(VD) == 0; 9846 } 9847 9848 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9849 if (!CGM.getLangOpts().OpenMPIsDevice) 9850 return false; 9851 9852 // Check if there are Ctors/Dtors in this declaration and look for target 9853 // regions in it. We use the complete variant to produce the kernel name 9854 // mangling. 9855 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9856 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9857 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9858 StringRef ParentName = 9859 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9860 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9861 } 9862 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9863 StringRef ParentName = 9864 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9865 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9866 } 9867 } 9868 9869 // Do not to emit variable if it is not marked as declare target. 9870 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9871 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9872 cast<VarDecl>(GD.getDecl())); 9873 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9874 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9875 HasRequiresUnifiedSharedMemory)) { 9876 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9877 return true; 9878 } 9879 return false; 9880 } 9881 9882 llvm::Constant * 9883 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9884 const VarDecl *VD) { 9885 assert(VD->getType().isConstant(CGM.getContext()) && 9886 "Expected constant variable."); 9887 StringRef VarName; 9888 llvm::Constant *Addr; 9889 llvm::GlobalValue::LinkageTypes Linkage; 9890 QualType Ty = VD->getType(); 9891 SmallString<128> Buffer; 9892 { 9893 unsigned DeviceID; 9894 unsigned FileID; 9895 unsigned Line; 9896 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9897 FileID, Line); 9898 llvm::raw_svector_ostream OS(Buffer); 9899 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9900 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9901 VarName = OS.str(); 9902 } 9903 Linkage = llvm::GlobalValue::InternalLinkage; 9904 Addr = 9905 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9906 getDefaultFirstprivateAddressSpace()); 9907 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9908 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9909 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9910 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9911 VarName, Addr, VarSize, 9912 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9913 return Addr; 9914 } 9915 9916 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9917 llvm::Constant *Addr) { 9918 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9919 !CGM.getLangOpts().OpenMPIsDevice) 9920 return; 9921 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9922 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9923 if (!Res) { 9924 if (CGM.getLangOpts().OpenMPIsDevice) { 9925 // Register non-target variables being emitted in device code (debug info 9926 // may cause this). 9927 StringRef VarName = CGM.getMangledName(VD); 9928 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9929 } 9930 return; 9931 } 9932 // Register declare target variables. 9933 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9934 StringRef VarName; 9935 CharUnits VarSize; 9936 llvm::GlobalValue::LinkageTypes Linkage; 9937 9938 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9939 !HasRequiresUnifiedSharedMemory) { 9940 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9941 VarName = CGM.getMangledName(VD); 9942 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9943 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9944 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9945 } else { 9946 VarSize = CharUnits::Zero(); 9947 } 9948 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9949 // Temp solution to prevent optimizations of the internal variables. 9950 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9951 std::string RefName = getName({VarName, "ref"}); 9952 if (!CGM.GetGlobalValue(RefName)) { 9953 llvm::Constant *AddrRef = 9954 getOrCreateInternalVariable(Addr->getType(), RefName); 9955 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9956 GVAddrRef->setConstant(/*Val=*/true); 9957 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9958 GVAddrRef->setInitializer(Addr); 9959 CGM.addCompilerUsedGlobal(GVAddrRef); 9960 } 9961 } 9962 } else { 9963 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9964 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9965 HasRequiresUnifiedSharedMemory)) && 9966 "Declare target attribute must link or to with unified memory."); 9967 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9968 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9969 else 9970 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9971 9972 if (CGM.getLangOpts().OpenMPIsDevice) { 9973 VarName = Addr->getName(); 9974 Addr = nullptr; 9975 } else { 9976 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9977 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9978 } 9979 VarSize = CGM.getPointerSize(); 9980 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9981 } 9982 9983 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9984 VarName, Addr, VarSize, Flags, Linkage); 9985 } 9986 9987 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9988 if (isa<FunctionDecl>(GD.getDecl()) || 9989 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9990 return emitTargetFunctions(GD); 9991 9992 return emitTargetGlobalVariable(GD); 9993 } 9994 9995 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9996 for (const VarDecl *VD : DeferredGlobalVariables) { 9997 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9998 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9999 if (!Res) 10000 continue; 10001 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10002 !HasRequiresUnifiedSharedMemory) { 10003 CGM.EmitGlobal(VD); 10004 } else { 10005 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10006 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10007 HasRequiresUnifiedSharedMemory)) && 10008 "Expected link clause or to clause with unified memory."); 10009 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10010 } 10011 } 10012 } 10013 10014 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10015 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10016 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10017 " Expected target-based directive."); 10018 } 10019 10020 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10021 for (const OMPClause *Clause : D->clauselists()) { 10022 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10023 HasRequiresUnifiedSharedMemory = true; 10024 } else if (const auto *AC = 10025 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10026 switch (AC->getAtomicDefaultMemOrderKind()) { 10027 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10028 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10029 break; 10030 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10031 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10032 break; 10033 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10034 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10035 break; 10036 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10037 break; 10038 } 10039 } 10040 } 10041 } 10042 10043 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10044 return RequiresAtomicOrdering; 10045 } 10046 10047 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10048 LangAS &AS) { 10049 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10050 return false; 10051 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10052 switch(A->getAllocatorType()) { 10053 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10054 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10055 // Not supported, fallback to the default mem space. 10056 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10057 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10058 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10059 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10060 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10061 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10062 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10063 AS = LangAS::Default; 10064 return true; 10065 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10066 llvm_unreachable("Expected predefined allocator for the variables with the " 10067 "static storage."); 10068 } 10069 return false; 10070 } 10071 10072 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10073 return HasRequiresUnifiedSharedMemory; 10074 } 10075 10076 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10077 CodeGenModule &CGM) 10078 : CGM(CGM) { 10079 if (CGM.getLangOpts().OpenMPIsDevice) { 10080 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10081 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10082 } 10083 } 10084 10085 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10086 if (CGM.getLangOpts().OpenMPIsDevice) 10087 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10088 } 10089 10090 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10091 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10092 return true; 10093 10094 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10095 // Do not to emit function if it is marked as declare target as it was already 10096 // emitted. 10097 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10098 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10099 if (auto *F = dyn_cast_or_null<llvm::Function>( 10100 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10101 return !F->isDeclaration(); 10102 return false; 10103 } 10104 return true; 10105 } 10106 10107 return !AlreadyEmittedTargetDecls.insert(D).second; 10108 } 10109 10110 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10111 // If we don't have entries or if we are emitting code for the device, we 10112 // don't need to do anything. 10113 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10114 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10115 (OffloadEntriesInfoManager.empty() && 10116 !HasEmittedDeclareTargetRegion && 10117 !HasEmittedTargetRegion)) 10118 return nullptr; 10119 10120 // Create and register the function that handles the requires directives. 10121 ASTContext &C = CGM.getContext(); 10122 10123 llvm::Function *RequiresRegFn; 10124 { 10125 CodeGenFunction CGF(CGM); 10126 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10127 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10128 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10129 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10130 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10131 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10132 // TODO: check for other requires clauses. 10133 // The requires directive takes effect only when a target region is 10134 // present in the compilation unit. Otherwise it is ignored and not 10135 // passed to the runtime. This avoids the runtime from throwing an error 10136 // for mismatching requires clauses across compilation units that don't 10137 // contain at least 1 target region. 10138 assert((HasEmittedTargetRegion || 10139 HasEmittedDeclareTargetRegion || 10140 !OffloadEntriesInfoManager.empty()) && 10141 "Target or declare target region expected."); 10142 if (HasRequiresUnifiedSharedMemory) 10143 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10144 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10145 CGM.getModule(), OMPRTL___tgt_register_requires), 10146 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10147 CGF.FinishFunction(); 10148 } 10149 return RequiresRegFn; 10150 } 10151 10152 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10153 const OMPExecutableDirective &D, 10154 SourceLocation Loc, 10155 llvm::Function *OutlinedFn, 10156 ArrayRef<llvm::Value *> CapturedVars) { 10157 if (!CGF.HaveInsertPoint()) 10158 return; 10159 10160 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10161 CodeGenFunction::RunCleanupsScope Scope(CGF); 10162 10163 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10164 llvm::Value *Args[] = { 10165 RTLoc, 10166 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10167 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10168 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10169 RealArgs.append(std::begin(Args), std::end(Args)); 10170 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10171 10172 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10173 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10174 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10175 } 10176 10177 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10178 const Expr *NumTeams, 10179 const Expr *ThreadLimit, 10180 SourceLocation Loc) { 10181 if (!CGF.HaveInsertPoint()) 10182 return; 10183 10184 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10185 10186 llvm::Value *NumTeamsVal = 10187 NumTeams 10188 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10189 CGF.CGM.Int32Ty, /* isSigned = */ true) 10190 : CGF.Builder.getInt32(0); 10191 10192 llvm::Value *ThreadLimitVal = 10193 ThreadLimit 10194 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10195 CGF.CGM.Int32Ty, /* isSigned = */ true) 10196 : CGF.Builder.getInt32(0); 10197 10198 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10199 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10200 ThreadLimitVal}; 10201 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10202 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10203 PushNumTeamsArgs); 10204 } 10205 10206 void CGOpenMPRuntime::emitTargetDataCalls( 10207 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10208 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10209 if (!CGF.HaveInsertPoint()) 10210 return; 10211 10212 // Action used to replace the default codegen action and turn privatization 10213 // off. 10214 PrePostActionTy NoPrivAction; 10215 10216 // Generate the code for the opening of the data environment. Capture all the 10217 // arguments of the runtime call by reference because they are used in the 10218 // closing of the region. 10219 auto &&BeginThenGen = [this, &D, Device, &Info, 10220 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10221 // Fill up the arrays with all the mapped variables. 10222 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10223 10224 // Get map clause information. 10225 MappableExprsHandler MEHandler(D, CGF); 10226 MEHandler.generateAllInfo(CombinedInfo); 10227 10228 // Fill up the arrays and create the arguments. 10229 emitOffloadingArrays(CGF, CombinedInfo, Info); 10230 10231 llvm::Value *BasePointersArrayArg = nullptr; 10232 llvm::Value *PointersArrayArg = nullptr; 10233 llvm::Value *SizesArrayArg = nullptr; 10234 llvm::Value *MapTypesArrayArg = nullptr; 10235 llvm::Value *MappersArrayArg = nullptr; 10236 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10237 SizesArrayArg, MapTypesArrayArg, 10238 MappersArrayArg, Info, /*ForEndCall=*/false); 10239 10240 // Emit device ID if any. 10241 llvm::Value *DeviceID = nullptr; 10242 if (Device) { 10243 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10244 CGF.Int64Ty, /*isSigned=*/true); 10245 } else { 10246 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10247 } 10248 10249 // Emit the number of elements in the offloading arrays. 10250 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10251 10252 llvm::Value *OffloadingArgs[] = { 10253 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10254 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10255 CGF.EmitRuntimeCall( 10256 OMPBuilder.getOrCreateRuntimeFunction( 10257 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10258 OffloadingArgs); 10259 10260 // If device pointer privatization is required, emit the body of the region 10261 // here. It will have to be duplicated: with and without privatization. 10262 if (!Info.CaptureDeviceAddrMap.empty()) 10263 CodeGen(CGF); 10264 }; 10265 10266 // Generate code for the closing of the data region. 10267 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10268 PrePostActionTy &) { 10269 assert(Info.isValid() && "Invalid data environment closing arguments."); 10270 10271 llvm::Value *BasePointersArrayArg = nullptr; 10272 llvm::Value *PointersArrayArg = nullptr; 10273 llvm::Value *SizesArrayArg = nullptr; 10274 llvm::Value *MapTypesArrayArg = nullptr; 10275 llvm::Value *MappersArrayArg = nullptr; 10276 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10277 SizesArrayArg, MapTypesArrayArg, 10278 MappersArrayArg, Info, /*ForEndCall=*/true); 10279 10280 // Emit device ID if any. 10281 llvm::Value *DeviceID = nullptr; 10282 if (Device) { 10283 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10284 CGF.Int64Ty, /*isSigned=*/true); 10285 } else { 10286 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10287 } 10288 10289 // Emit the number of elements in the offloading arrays. 10290 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10291 10292 llvm::Value *OffloadingArgs[] = { 10293 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10294 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10295 CGF.EmitRuntimeCall( 10296 OMPBuilder.getOrCreateRuntimeFunction( 10297 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10298 OffloadingArgs); 10299 }; 10300 10301 // If we need device pointer privatization, we need to emit the body of the 10302 // region with no privatization in the 'else' branch of the conditional. 10303 // Otherwise, we don't have to do anything. 10304 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10305 PrePostActionTy &) { 10306 if (!Info.CaptureDeviceAddrMap.empty()) { 10307 CodeGen.setAction(NoPrivAction); 10308 CodeGen(CGF); 10309 } 10310 }; 10311 10312 // We don't have to do anything to close the region if the if clause evaluates 10313 // to false. 10314 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10315 10316 if (IfCond) { 10317 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10318 } else { 10319 RegionCodeGenTy RCG(BeginThenGen); 10320 RCG(CGF); 10321 } 10322 10323 // If we don't require privatization of device pointers, we emit the body in 10324 // between the runtime calls. This avoids duplicating the body code. 10325 if (Info.CaptureDeviceAddrMap.empty()) { 10326 CodeGen.setAction(NoPrivAction); 10327 CodeGen(CGF); 10328 } 10329 10330 if (IfCond) { 10331 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10332 } else { 10333 RegionCodeGenTy RCG(EndThenGen); 10334 RCG(CGF); 10335 } 10336 } 10337 10338 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10339 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10340 const Expr *Device) { 10341 if (!CGF.HaveInsertPoint()) 10342 return; 10343 10344 assert((isa<OMPTargetEnterDataDirective>(D) || 10345 isa<OMPTargetExitDataDirective>(D) || 10346 isa<OMPTargetUpdateDirective>(D)) && 10347 "Expecting either target enter, exit data, or update directives."); 10348 10349 CodeGenFunction::OMPTargetDataInfo InputInfo; 10350 llvm::Value *MapTypesArray = nullptr; 10351 // Generate the code for the opening of the data environment. 10352 auto &&ThenGen = [this, &D, Device, &InputInfo, 10353 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10354 // Emit device ID if any. 10355 llvm::Value *DeviceID = nullptr; 10356 if (Device) { 10357 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10358 CGF.Int64Ty, /*isSigned=*/true); 10359 } else { 10360 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10361 } 10362 10363 // Emit the number of elements in the offloading arrays. 10364 llvm::Constant *PointerNum = 10365 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10366 10367 llvm::Value *OffloadingArgs[] = {DeviceID, 10368 PointerNum, 10369 InputInfo.BasePointersArray.getPointer(), 10370 InputInfo.PointersArray.getPointer(), 10371 InputInfo.SizesArray.getPointer(), 10372 MapTypesArray, 10373 InputInfo.MappersArray.getPointer()}; 10374 10375 // Select the right runtime function call for each standalone 10376 // directive. 10377 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10378 RuntimeFunction RTLFn; 10379 switch (D.getDirectiveKind()) { 10380 case OMPD_target_enter_data: 10381 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10382 : OMPRTL___tgt_target_data_begin_mapper; 10383 break; 10384 case OMPD_target_exit_data: 10385 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10386 : OMPRTL___tgt_target_data_end_mapper; 10387 break; 10388 case OMPD_target_update: 10389 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10390 : OMPRTL___tgt_target_data_update_mapper; 10391 break; 10392 case OMPD_parallel: 10393 case OMPD_for: 10394 case OMPD_parallel_for: 10395 case OMPD_parallel_master: 10396 case OMPD_parallel_sections: 10397 case OMPD_for_simd: 10398 case OMPD_parallel_for_simd: 10399 case OMPD_cancel: 10400 case OMPD_cancellation_point: 10401 case OMPD_ordered: 10402 case OMPD_threadprivate: 10403 case OMPD_allocate: 10404 case OMPD_task: 10405 case OMPD_simd: 10406 case OMPD_sections: 10407 case OMPD_section: 10408 case OMPD_single: 10409 case OMPD_master: 10410 case OMPD_critical: 10411 case OMPD_taskyield: 10412 case OMPD_barrier: 10413 case OMPD_taskwait: 10414 case OMPD_taskgroup: 10415 case OMPD_atomic: 10416 case OMPD_flush: 10417 case OMPD_depobj: 10418 case OMPD_scan: 10419 case OMPD_teams: 10420 case OMPD_target_data: 10421 case OMPD_distribute: 10422 case OMPD_distribute_simd: 10423 case OMPD_distribute_parallel_for: 10424 case OMPD_distribute_parallel_for_simd: 10425 case OMPD_teams_distribute: 10426 case OMPD_teams_distribute_simd: 10427 case OMPD_teams_distribute_parallel_for: 10428 case OMPD_teams_distribute_parallel_for_simd: 10429 case OMPD_declare_simd: 10430 case OMPD_declare_variant: 10431 case OMPD_begin_declare_variant: 10432 case OMPD_end_declare_variant: 10433 case OMPD_declare_target: 10434 case OMPD_end_declare_target: 10435 case OMPD_declare_reduction: 10436 case OMPD_declare_mapper: 10437 case OMPD_taskloop: 10438 case OMPD_taskloop_simd: 10439 case OMPD_master_taskloop: 10440 case OMPD_master_taskloop_simd: 10441 case OMPD_parallel_master_taskloop: 10442 case OMPD_parallel_master_taskloop_simd: 10443 case OMPD_target: 10444 case OMPD_target_simd: 10445 case OMPD_target_teams_distribute: 10446 case OMPD_target_teams_distribute_simd: 10447 case OMPD_target_teams_distribute_parallel_for: 10448 case OMPD_target_teams_distribute_parallel_for_simd: 10449 case OMPD_target_teams: 10450 case OMPD_target_parallel: 10451 case OMPD_target_parallel_for: 10452 case OMPD_target_parallel_for_simd: 10453 case OMPD_requires: 10454 case OMPD_unknown: 10455 default: 10456 llvm_unreachable("Unexpected standalone target data directive."); 10457 break; 10458 } 10459 CGF.EmitRuntimeCall( 10460 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10461 OffloadingArgs); 10462 }; 10463 10464 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10465 CodeGenFunction &CGF, PrePostActionTy &) { 10466 // Fill up the arrays with all the mapped variables. 10467 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10468 10469 // Get map clause information. 10470 MappableExprsHandler MEHandler(D, CGF); 10471 MEHandler.generateAllInfo(CombinedInfo); 10472 10473 TargetDataInfo Info; 10474 // Fill up the arrays and create the arguments. 10475 emitOffloadingArrays(CGF, CombinedInfo, Info); 10476 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10477 Info.PointersArray, Info.SizesArray, 10478 Info.MapTypesArray, Info.MappersArray, Info); 10479 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10480 InputInfo.BasePointersArray = 10481 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10482 InputInfo.PointersArray = 10483 Address(Info.PointersArray, CGM.getPointerAlign()); 10484 InputInfo.SizesArray = 10485 Address(Info.SizesArray, CGM.getPointerAlign()); 10486 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10487 MapTypesArray = Info.MapTypesArray; 10488 if (D.hasClausesOfKind<OMPDependClause>()) 10489 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10490 else 10491 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10492 }; 10493 10494 if (IfCond) { 10495 emitIfClause(CGF, IfCond, TargetThenGen, 10496 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10497 } else { 10498 RegionCodeGenTy ThenRCG(TargetThenGen); 10499 ThenRCG(CGF); 10500 } 10501 } 10502 10503 namespace { 10504 /// Kind of parameter in a function with 'declare simd' directive. 10505 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10506 /// Attribute set of the parameter. 10507 struct ParamAttrTy { 10508 ParamKindTy Kind = Vector; 10509 llvm::APSInt StrideOrArg; 10510 llvm::APSInt Alignment; 10511 }; 10512 } // namespace 10513 10514 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10515 ArrayRef<ParamAttrTy> ParamAttrs) { 10516 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10517 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10518 // of that clause. The VLEN value must be power of 2. 10519 // In other case the notion of the function`s "characteristic data type" (CDT) 10520 // is used to compute the vector length. 10521 // CDT is defined in the following order: 10522 // a) For non-void function, the CDT is the return type. 10523 // b) If the function has any non-uniform, non-linear parameters, then the 10524 // CDT is the type of the first such parameter. 10525 // c) If the CDT determined by a) or b) above is struct, union, or class 10526 // type which is pass-by-value (except for the type that maps to the 10527 // built-in complex data type), the characteristic data type is int. 10528 // d) If none of the above three cases is applicable, the CDT is int. 10529 // The VLEN is then determined based on the CDT and the size of vector 10530 // register of that ISA for which current vector version is generated. The 10531 // VLEN is computed using the formula below: 10532 // VLEN = sizeof(vector_register) / sizeof(CDT), 10533 // where vector register size specified in section 3.2.1 Registers and the 10534 // Stack Frame of original AMD64 ABI document. 10535 QualType RetType = FD->getReturnType(); 10536 if (RetType.isNull()) 10537 return 0; 10538 ASTContext &C = FD->getASTContext(); 10539 QualType CDT; 10540 if (!RetType.isNull() && !RetType->isVoidType()) { 10541 CDT = RetType; 10542 } else { 10543 unsigned Offset = 0; 10544 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10545 if (ParamAttrs[Offset].Kind == Vector) 10546 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10547 ++Offset; 10548 } 10549 if (CDT.isNull()) { 10550 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10551 if (ParamAttrs[I + Offset].Kind == Vector) { 10552 CDT = FD->getParamDecl(I)->getType(); 10553 break; 10554 } 10555 } 10556 } 10557 } 10558 if (CDT.isNull()) 10559 CDT = C.IntTy; 10560 CDT = CDT->getCanonicalTypeUnqualified(); 10561 if (CDT->isRecordType() || CDT->isUnionType()) 10562 CDT = C.IntTy; 10563 return C.getTypeSize(CDT); 10564 } 10565 10566 static void 10567 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10568 const llvm::APSInt &VLENVal, 10569 ArrayRef<ParamAttrTy> ParamAttrs, 10570 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10571 struct ISADataTy { 10572 char ISA; 10573 unsigned VecRegSize; 10574 }; 10575 ISADataTy ISAData[] = { 10576 { 10577 'b', 128 10578 }, // SSE 10579 { 10580 'c', 256 10581 }, // AVX 10582 { 10583 'd', 256 10584 }, // AVX2 10585 { 10586 'e', 512 10587 }, // AVX512 10588 }; 10589 llvm::SmallVector<char, 2> Masked; 10590 switch (State) { 10591 case OMPDeclareSimdDeclAttr::BS_Undefined: 10592 Masked.push_back('N'); 10593 Masked.push_back('M'); 10594 break; 10595 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10596 Masked.push_back('N'); 10597 break; 10598 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10599 Masked.push_back('M'); 10600 break; 10601 } 10602 for (char Mask : Masked) { 10603 for (const ISADataTy &Data : ISAData) { 10604 SmallString<256> Buffer; 10605 llvm::raw_svector_ostream Out(Buffer); 10606 Out << "_ZGV" << Data.ISA << Mask; 10607 if (!VLENVal) { 10608 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10609 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10610 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10611 } else { 10612 Out << VLENVal; 10613 } 10614 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10615 switch (ParamAttr.Kind){ 10616 case LinearWithVarStride: 10617 Out << 's' << ParamAttr.StrideOrArg; 10618 break; 10619 case Linear: 10620 Out << 'l'; 10621 if (ParamAttr.StrideOrArg != 1) 10622 Out << ParamAttr.StrideOrArg; 10623 break; 10624 case Uniform: 10625 Out << 'u'; 10626 break; 10627 case Vector: 10628 Out << 'v'; 10629 break; 10630 } 10631 if (!!ParamAttr.Alignment) 10632 Out << 'a' << ParamAttr.Alignment; 10633 } 10634 Out << '_' << Fn->getName(); 10635 Fn->addFnAttr(Out.str()); 10636 } 10637 } 10638 } 10639 10640 // This are the Functions that are needed to mangle the name of the 10641 // vector functions generated by the compiler, according to the rules 10642 // defined in the "Vector Function ABI specifications for AArch64", 10643 // available at 10644 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10645 10646 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10647 /// 10648 /// TODO: Need to implement the behavior for reference marked with a 10649 /// var or no linear modifiers (1.b in the section). For this, we 10650 /// need to extend ParamKindTy to support the linear modifiers. 10651 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10652 QT = QT.getCanonicalType(); 10653 10654 if (QT->isVoidType()) 10655 return false; 10656 10657 if (Kind == ParamKindTy::Uniform) 10658 return false; 10659 10660 if (Kind == ParamKindTy::Linear) 10661 return false; 10662 10663 // TODO: Handle linear references with modifiers 10664 10665 if (Kind == ParamKindTy::LinearWithVarStride) 10666 return false; 10667 10668 return true; 10669 } 10670 10671 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10672 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10673 QT = QT.getCanonicalType(); 10674 unsigned Size = C.getTypeSize(QT); 10675 10676 // Only scalars and complex within 16 bytes wide set PVB to true. 10677 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10678 return false; 10679 10680 if (QT->isFloatingType()) 10681 return true; 10682 10683 if (QT->isIntegerType()) 10684 return true; 10685 10686 if (QT->isPointerType()) 10687 return true; 10688 10689 // TODO: Add support for complex types (section 3.1.2, item 2). 10690 10691 return false; 10692 } 10693 10694 /// Computes the lane size (LS) of a return type or of an input parameter, 10695 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10696 /// TODO: Add support for references, section 3.2.1, item 1. 10697 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10698 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10699 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10700 if (getAArch64PBV(PTy, C)) 10701 return C.getTypeSize(PTy); 10702 } 10703 if (getAArch64PBV(QT, C)) 10704 return C.getTypeSize(QT); 10705 10706 return C.getTypeSize(C.getUIntPtrType()); 10707 } 10708 10709 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10710 // signature of the scalar function, as defined in 3.2.2 of the 10711 // AAVFABI. 10712 static std::tuple<unsigned, unsigned, bool> 10713 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10714 QualType RetType = FD->getReturnType().getCanonicalType(); 10715 10716 ASTContext &C = FD->getASTContext(); 10717 10718 bool OutputBecomesInput = false; 10719 10720 llvm::SmallVector<unsigned, 8> Sizes; 10721 if (!RetType->isVoidType()) { 10722 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10723 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10724 OutputBecomesInput = true; 10725 } 10726 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10727 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10728 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10729 } 10730 10731 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10732 // The LS of a function parameter / return value can only be a power 10733 // of 2, starting from 8 bits, up to 128. 10734 assert(std::all_of(Sizes.begin(), Sizes.end(), 10735 [](unsigned Size) { 10736 return Size == 8 || Size == 16 || Size == 32 || 10737 Size == 64 || Size == 128; 10738 }) && 10739 "Invalid size"); 10740 10741 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10742 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10743 OutputBecomesInput); 10744 } 10745 10746 /// Mangle the parameter part of the vector function name according to 10747 /// their OpenMP classification. The mangling function is defined in 10748 /// section 3.5 of the AAVFABI. 10749 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10750 SmallString<256> Buffer; 10751 llvm::raw_svector_ostream Out(Buffer); 10752 for (const auto &ParamAttr : ParamAttrs) { 10753 switch (ParamAttr.Kind) { 10754 case LinearWithVarStride: 10755 Out << "ls" << ParamAttr.StrideOrArg; 10756 break; 10757 case Linear: 10758 Out << 'l'; 10759 // Don't print the step value if it is not present or if it is 10760 // equal to 1. 10761 if (ParamAttr.StrideOrArg != 1) 10762 Out << ParamAttr.StrideOrArg; 10763 break; 10764 case Uniform: 10765 Out << 'u'; 10766 break; 10767 case Vector: 10768 Out << 'v'; 10769 break; 10770 } 10771 10772 if (!!ParamAttr.Alignment) 10773 Out << 'a' << ParamAttr.Alignment; 10774 } 10775 10776 return std::string(Out.str()); 10777 } 10778 10779 // Function used to add the attribute. The parameter `VLEN` is 10780 // templated to allow the use of "x" when targeting scalable functions 10781 // for SVE. 10782 template <typename T> 10783 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10784 char ISA, StringRef ParSeq, 10785 StringRef MangledName, bool OutputBecomesInput, 10786 llvm::Function *Fn) { 10787 SmallString<256> Buffer; 10788 llvm::raw_svector_ostream Out(Buffer); 10789 Out << Prefix << ISA << LMask << VLEN; 10790 if (OutputBecomesInput) 10791 Out << "v"; 10792 Out << ParSeq << "_" << MangledName; 10793 Fn->addFnAttr(Out.str()); 10794 } 10795 10796 // Helper function to generate the Advanced SIMD names depending on 10797 // the value of the NDS when simdlen is not present. 10798 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10799 StringRef Prefix, char ISA, 10800 StringRef ParSeq, StringRef MangledName, 10801 bool OutputBecomesInput, 10802 llvm::Function *Fn) { 10803 switch (NDS) { 10804 case 8: 10805 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10806 OutputBecomesInput, Fn); 10807 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10808 OutputBecomesInput, Fn); 10809 break; 10810 case 16: 10811 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10812 OutputBecomesInput, Fn); 10813 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10814 OutputBecomesInput, Fn); 10815 break; 10816 case 32: 10817 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10818 OutputBecomesInput, Fn); 10819 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10820 OutputBecomesInput, Fn); 10821 break; 10822 case 64: 10823 case 128: 10824 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10825 OutputBecomesInput, Fn); 10826 break; 10827 default: 10828 llvm_unreachable("Scalar type is too wide."); 10829 } 10830 } 10831 10832 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10833 static void emitAArch64DeclareSimdFunction( 10834 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10835 ArrayRef<ParamAttrTy> ParamAttrs, 10836 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10837 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10838 10839 // Get basic data for building the vector signature. 10840 const auto Data = getNDSWDS(FD, ParamAttrs); 10841 const unsigned NDS = std::get<0>(Data); 10842 const unsigned WDS = std::get<1>(Data); 10843 const bool OutputBecomesInput = std::get<2>(Data); 10844 10845 // Check the values provided via `simdlen` by the user. 10846 // 1. A `simdlen(1)` doesn't produce vector signatures, 10847 if (UserVLEN == 1) { 10848 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10849 DiagnosticsEngine::Warning, 10850 "The clause simdlen(1) has no effect when targeting aarch64."); 10851 CGM.getDiags().Report(SLoc, DiagID); 10852 return; 10853 } 10854 10855 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10856 // Advanced SIMD output. 10857 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10858 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10859 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10860 "power of 2 when targeting Advanced SIMD."); 10861 CGM.getDiags().Report(SLoc, DiagID); 10862 return; 10863 } 10864 10865 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10866 // limits. 10867 if (ISA == 's' && UserVLEN != 0) { 10868 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10869 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10870 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10871 "lanes in the architectural constraints " 10872 "for SVE (min is 128-bit, max is " 10873 "2048-bit, by steps of 128-bit)"); 10874 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10875 return; 10876 } 10877 } 10878 10879 // Sort out parameter sequence. 10880 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10881 StringRef Prefix = "_ZGV"; 10882 // Generate simdlen from user input (if any). 10883 if (UserVLEN) { 10884 if (ISA == 's') { 10885 // SVE generates only a masked function. 10886 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10887 OutputBecomesInput, Fn); 10888 } else { 10889 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10890 // Advanced SIMD generates one or two functions, depending on 10891 // the `[not]inbranch` clause. 10892 switch (State) { 10893 case OMPDeclareSimdDeclAttr::BS_Undefined: 10894 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10895 OutputBecomesInput, Fn); 10896 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10897 OutputBecomesInput, Fn); 10898 break; 10899 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10900 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10901 OutputBecomesInput, Fn); 10902 break; 10903 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10904 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10905 OutputBecomesInput, Fn); 10906 break; 10907 } 10908 } 10909 } else { 10910 // If no user simdlen is provided, follow the AAVFABI rules for 10911 // generating the vector length. 10912 if (ISA == 's') { 10913 // SVE, section 3.4.1, item 1. 10914 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10915 OutputBecomesInput, Fn); 10916 } else { 10917 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10918 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10919 // two vector names depending on the use of the clause 10920 // `[not]inbranch`. 10921 switch (State) { 10922 case OMPDeclareSimdDeclAttr::BS_Undefined: 10923 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10924 OutputBecomesInput, Fn); 10925 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10926 OutputBecomesInput, Fn); 10927 break; 10928 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10929 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10930 OutputBecomesInput, Fn); 10931 break; 10932 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10933 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10934 OutputBecomesInput, Fn); 10935 break; 10936 } 10937 } 10938 } 10939 } 10940 10941 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10942 llvm::Function *Fn) { 10943 ASTContext &C = CGM.getContext(); 10944 FD = FD->getMostRecentDecl(); 10945 // Map params to their positions in function decl. 10946 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10947 if (isa<CXXMethodDecl>(FD)) 10948 ParamPositions.try_emplace(FD, 0); 10949 unsigned ParamPos = ParamPositions.size(); 10950 for (const ParmVarDecl *P : FD->parameters()) { 10951 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10952 ++ParamPos; 10953 } 10954 while (FD) { 10955 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10956 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10957 // Mark uniform parameters. 10958 for (const Expr *E : Attr->uniforms()) { 10959 E = E->IgnoreParenImpCasts(); 10960 unsigned Pos; 10961 if (isa<CXXThisExpr>(E)) { 10962 Pos = ParamPositions[FD]; 10963 } else { 10964 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10965 ->getCanonicalDecl(); 10966 Pos = ParamPositions[PVD]; 10967 } 10968 ParamAttrs[Pos].Kind = Uniform; 10969 } 10970 // Get alignment info. 10971 auto NI = Attr->alignments_begin(); 10972 for (const Expr *E : Attr->aligneds()) { 10973 E = E->IgnoreParenImpCasts(); 10974 unsigned Pos; 10975 QualType ParmTy; 10976 if (isa<CXXThisExpr>(E)) { 10977 Pos = ParamPositions[FD]; 10978 ParmTy = E->getType(); 10979 } else { 10980 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10981 ->getCanonicalDecl(); 10982 Pos = ParamPositions[PVD]; 10983 ParmTy = PVD->getType(); 10984 } 10985 ParamAttrs[Pos].Alignment = 10986 (*NI) 10987 ? (*NI)->EvaluateKnownConstInt(C) 10988 : llvm::APSInt::getUnsigned( 10989 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10990 .getQuantity()); 10991 ++NI; 10992 } 10993 // Mark linear parameters. 10994 auto SI = Attr->steps_begin(); 10995 auto MI = Attr->modifiers_begin(); 10996 for (const Expr *E : Attr->linears()) { 10997 E = E->IgnoreParenImpCasts(); 10998 unsigned Pos; 10999 // Rescaling factor needed to compute the linear parameter 11000 // value in the mangled name. 11001 unsigned PtrRescalingFactor = 1; 11002 if (isa<CXXThisExpr>(E)) { 11003 Pos = ParamPositions[FD]; 11004 } else { 11005 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11006 ->getCanonicalDecl(); 11007 Pos = ParamPositions[PVD]; 11008 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11009 PtrRescalingFactor = CGM.getContext() 11010 .getTypeSizeInChars(P->getPointeeType()) 11011 .getQuantity(); 11012 } 11013 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11014 ParamAttr.Kind = Linear; 11015 // Assuming a stride of 1, for `linear` without modifiers. 11016 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11017 if (*SI) { 11018 Expr::EvalResult Result; 11019 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11020 if (const auto *DRE = 11021 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11022 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11023 ParamAttr.Kind = LinearWithVarStride; 11024 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11025 ParamPositions[StridePVD->getCanonicalDecl()]); 11026 } 11027 } 11028 } else { 11029 ParamAttr.StrideOrArg = Result.Val.getInt(); 11030 } 11031 } 11032 // If we are using a linear clause on a pointer, we need to 11033 // rescale the value of linear_step with the byte size of the 11034 // pointee type. 11035 if (Linear == ParamAttr.Kind) 11036 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11037 ++SI; 11038 ++MI; 11039 } 11040 llvm::APSInt VLENVal; 11041 SourceLocation ExprLoc; 11042 const Expr *VLENExpr = Attr->getSimdlen(); 11043 if (VLENExpr) { 11044 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11045 ExprLoc = VLENExpr->getExprLoc(); 11046 } 11047 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11048 if (CGM.getTriple().isX86()) { 11049 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11050 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11051 unsigned VLEN = VLENVal.getExtValue(); 11052 StringRef MangledName = Fn->getName(); 11053 if (CGM.getTarget().hasFeature("sve")) 11054 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11055 MangledName, 's', 128, Fn, ExprLoc); 11056 if (CGM.getTarget().hasFeature("neon")) 11057 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11058 MangledName, 'n', 128, Fn, ExprLoc); 11059 } 11060 } 11061 FD = FD->getPreviousDecl(); 11062 } 11063 } 11064 11065 namespace { 11066 /// Cleanup action for doacross support. 11067 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11068 public: 11069 static const int DoacrossFinArgs = 2; 11070 11071 private: 11072 llvm::FunctionCallee RTLFn; 11073 llvm::Value *Args[DoacrossFinArgs]; 11074 11075 public: 11076 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11077 ArrayRef<llvm::Value *> CallArgs) 11078 : RTLFn(RTLFn) { 11079 assert(CallArgs.size() == DoacrossFinArgs); 11080 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11081 } 11082 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11083 if (!CGF.HaveInsertPoint()) 11084 return; 11085 CGF.EmitRuntimeCall(RTLFn, Args); 11086 } 11087 }; 11088 } // namespace 11089 11090 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11091 const OMPLoopDirective &D, 11092 ArrayRef<Expr *> NumIterations) { 11093 if (!CGF.HaveInsertPoint()) 11094 return; 11095 11096 ASTContext &C = CGM.getContext(); 11097 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11098 RecordDecl *RD; 11099 if (KmpDimTy.isNull()) { 11100 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11101 // kmp_int64 lo; // lower 11102 // kmp_int64 up; // upper 11103 // kmp_int64 st; // stride 11104 // }; 11105 RD = C.buildImplicitRecord("kmp_dim"); 11106 RD->startDefinition(); 11107 addFieldToRecordDecl(C, RD, Int64Ty); 11108 addFieldToRecordDecl(C, RD, Int64Ty); 11109 addFieldToRecordDecl(C, RD, Int64Ty); 11110 RD->completeDefinition(); 11111 KmpDimTy = C.getRecordType(RD); 11112 } else { 11113 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11114 } 11115 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11116 QualType ArrayTy = 11117 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11118 11119 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11120 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11121 enum { LowerFD = 0, UpperFD, StrideFD }; 11122 // Fill dims with data. 11123 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11124 LValue DimsLVal = CGF.MakeAddrLValue( 11125 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11126 // dims.upper = num_iterations; 11127 LValue UpperLVal = CGF.EmitLValueForField( 11128 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11129 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11130 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11131 Int64Ty, NumIterations[I]->getExprLoc()); 11132 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11133 // dims.stride = 1; 11134 LValue StrideLVal = CGF.EmitLValueForField( 11135 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11136 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11137 StrideLVal); 11138 } 11139 11140 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11141 // kmp_int32 num_dims, struct kmp_dim * dims); 11142 llvm::Value *Args[] = { 11143 emitUpdateLocation(CGF, D.getBeginLoc()), 11144 getThreadID(CGF, D.getBeginLoc()), 11145 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11146 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11147 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11148 CGM.VoidPtrTy)}; 11149 11150 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11151 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11152 CGF.EmitRuntimeCall(RTLFn, Args); 11153 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11154 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11155 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11156 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11157 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11158 llvm::makeArrayRef(FiniArgs)); 11159 } 11160 11161 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11162 const OMPDependClause *C) { 11163 QualType Int64Ty = 11164 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11165 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11166 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11167 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11168 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11169 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11170 const Expr *CounterVal = C->getLoopData(I); 11171 assert(CounterVal); 11172 llvm::Value *CntVal = CGF.EmitScalarConversion( 11173 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11174 CounterVal->getExprLoc()); 11175 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11176 /*Volatile=*/false, Int64Ty); 11177 } 11178 llvm::Value *Args[] = { 11179 emitUpdateLocation(CGF, C->getBeginLoc()), 11180 getThreadID(CGF, C->getBeginLoc()), 11181 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11182 llvm::FunctionCallee RTLFn; 11183 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11184 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11185 OMPRTL___kmpc_doacross_post); 11186 } else { 11187 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11188 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11189 OMPRTL___kmpc_doacross_wait); 11190 } 11191 CGF.EmitRuntimeCall(RTLFn, Args); 11192 } 11193 11194 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11195 llvm::FunctionCallee Callee, 11196 ArrayRef<llvm::Value *> Args) const { 11197 assert(Loc.isValid() && "Outlined function call location must be valid."); 11198 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11199 11200 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11201 if (Fn->doesNotThrow()) { 11202 CGF.EmitNounwindRuntimeCall(Fn, Args); 11203 return; 11204 } 11205 } 11206 CGF.EmitRuntimeCall(Callee, Args); 11207 } 11208 11209 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11210 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11211 ArrayRef<llvm::Value *> Args) const { 11212 emitCall(CGF, Loc, OutlinedFn, Args); 11213 } 11214 11215 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11216 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11217 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11218 HasEmittedDeclareTargetRegion = true; 11219 } 11220 11221 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11222 const VarDecl *NativeParam, 11223 const VarDecl *TargetParam) const { 11224 return CGF.GetAddrOfLocalVar(NativeParam); 11225 } 11226 11227 namespace { 11228 /// Cleanup action for allocate support. 11229 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11230 public: 11231 static const int CleanupArgs = 3; 11232 11233 private: 11234 llvm::FunctionCallee RTLFn; 11235 llvm::Value *Args[CleanupArgs]; 11236 11237 public: 11238 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11239 ArrayRef<llvm::Value *> CallArgs) 11240 : RTLFn(RTLFn) { 11241 assert(CallArgs.size() == CleanupArgs && 11242 "Size of arguments does not match."); 11243 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11244 } 11245 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11246 if (!CGF.HaveInsertPoint()) 11247 return; 11248 CGF.EmitRuntimeCall(RTLFn, Args); 11249 } 11250 }; 11251 } // namespace 11252 11253 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11254 const VarDecl *VD) { 11255 if (!VD) 11256 return Address::invalid(); 11257 const VarDecl *CVD = VD->getCanonicalDecl(); 11258 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11259 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11260 // Use the default allocation. 11261 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11262 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11263 !AA->getAllocator()) 11264 return Address::invalid(); 11265 llvm::Value *Size; 11266 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11267 if (CVD->getType()->isVariablyModifiedType()) { 11268 Size = CGF.getTypeSize(CVD->getType()); 11269 // Align the size: ((size + align - 1) / align) * align 11270 Size = CGF.Builder.CreateNUWAdd( 11271 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11272 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11273 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11274 } else { 11275 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11276 Size = CGM.getSize(Sz.alignTo(Align)); 11277 } 11278 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11279 assert(AA->getAllocator() && 11280 "Expected allocator expression for non-default allocator."); 11281 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11282 // According to the standard, the original allocator type is a enum 11283 // (integer). Convert to pointer type, if required. 11284 if (Allocator->getType()->isIntegerTy()) 11285 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11286 else if (Allocator->getType()->isPointerTy()) 11287 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11288 Allocator, CGM.VoidPtrTy); 11289 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11290 11291 llvm::Value *Addr = 11292 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11293 CGM.getModule(), OMPRTL___kmpc_alloc), 11294 Args, getName({CVD->getName(), ".void.addr"})); 11295 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11296 Allocator}; 11297 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11298 CGM.getModule(), OMPRTL___kmpc_free); 11299 11300 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11301 llvm::makeArrayRef(FiniArgs)); 11302 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11303 Addr, 11304 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11305 getName({CVD->getName(), ".addr"})); 11306 return Address(Addr, Align); 11307 } 11308 if (UntiedLocalVarsStack.empty()) 11309 return Address::invalid(); 11310 const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back(); 11311 auto It = UntiedData.find(VD); 11312 if (It == UntiedData.end()) 11313 return Address::invalid(); 11314 11315 return It->second; 11316 } 11317 11318 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11319 CodeGenModule &CGM, const OMPLoopDirective &S) 11320 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11321 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11322 if (!NeedToPush) 11323 return; 11324 NontemporalDeclsSet &DS = 11325 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11326 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11327 for (const Stmt *Ref : C->private_refs()) { 11328 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11329 const ValueDecl *VD; 11330 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11331 VD = DRE->getDecl(); 11332 } else { 11333 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11334 assert((ME->isImplicitCXXThis() || 11335 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11336 "Expected member of current class."); 11337 VD = ME->getMemberDecl(); 11338 } 11339 DS.insert(VD); 11340 } 11341 } 11342 } 11343 11344 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11345 if (!NeedToPush) 11346 return; 11347 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11348 } 11349 11350 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11351 CodeGenModule &CGM, 11352 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> &LocalVars) 11353 : CGM(CGM), NeedToPush(!LocalVars.empty()) { 11354 if (!NeedToPush) 11355 return; 11356 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11357 } 11358 11359 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11360 if (!NeedToPush) 11361 return; 11362 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11363 } 11364 11365 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11366 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11367 11368 return llvm::any_of( 11369 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11370 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11371 } 11372 11373 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11374 const OMPExecutableDirective &S, 11375 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11376 const { 11377 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11378 // Vars in target/task regions must be excluded completely. 11379 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11380 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11381 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11382 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11383 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11384 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11385 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11386 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11387 } 11388 } 11389 // Exclude vars in private clauses. 11390 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11391 for (const Expr *Ref : C->varlists()) { 11392 if (!Ref->getType()->isScalarType()) 11393 continue; 11394 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11395 if (!DRE) 11396 continue; 11397 NeedToCheckForLPCs.insert(DRE->getDecl()); 11398 } 11399 } 11400 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11401 for (const Expr *Ref : C->varlists()) { 11402 if (!Ref->getType()->isScalarType()) 11403 continue; 11404 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11405 if (!DRE) 11406 continue; 11407 NeedToCheckForLPCs.insert(DRE->getDecl()); 11408 } 11409 } 11410 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11411 for (const Expr *Ref : C->varlists()) { 11412 if (!Ref->getType()->isScalarType()) 11413 continue; 11414 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11415 if (!DRE) 11416 continue; 11417 NeedToCheckForLPCs.insert(DRE->getDecl()); 11418 } 11419 } 11420 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11421 for (const Expr *Ref : C->varlists()) { 11422 if (!Ref->getType()->isScalarType()) 11423 continue; 11424 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11425 if (!DRE) 11426 continue; 11427 NeedToCheckForLPCs.insert(DRE->getDecl()); 11428 } 11429 } 11430 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11431 for (const Expr *Ref : C->varlists()) { 11432 if (!Ref->getType()->isScalarType()) 11433 continue; 11434 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11435 if (!DRE) 11436 continue; 11437 NeedToCheckForLPCs.insert(DRE->getDecl()); 11438 } 11439 } 11440 for (const Decl *VD : NeedToCheckForLPCs) { 11441 for (const LastprivateConditionalData &Data : 11442 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11443 if (Data.DeclToUniqueName.count(VD) > 0) { 11444 if (!Data.Disabled) 11445 NeedToAddForLPCsAsDisabled.insert(VD); 11446 break; 11447 } 11448 } 11449 } 11450 } 11451 11452 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11453 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11454 : CGM(CGF.CGM), 11455 Action((CGM.getLangOpts().OpenMP >= 50 && 11456 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11457 [](const OMPLastprivateClause *C) { 11458 return C->getKind() == 11459 OMPC_LASTPRIVATE_conditional; 11460 })) 11461 ? ActionToDo::PushAsLastprivateConditional 11462 : ActionToDo::DoNotPush) { 11463 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11464 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11465 return; 11466 assert(Action == ActionToDo::PushAsLastprivateConditional && 11467 "Expected a push action."); 11468 LastprivateConditionalData &Data = 11469 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11470 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11471 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11472 continue; 11473 11474 for (const Expr *Ref : C->varlists()) { 11475 Data.DeclToUniqueName.insert(std::make_pair( 11476 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11477 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11478 } 11479 } 11480 Data.IVLVal = IVLVal; 11481 Data.Fn = CGF.CurFn; 11482 } 11483 11484 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11485 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11486 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11487 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11488 if (CGM.getLangOpts().OpenMP < 50) 11489 return; 11490 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11491 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11492 if (!NeedToAddForLPCsAsDisabled.empty()) { 11493 Action = ActionToDo::DisableLastprivateConditional; 11494 LastprivateConditionalData &Data = 11495 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11496 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11497 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11498 Data.Fn = CGF.CurFn; 11499 Data.Disabled = true; 11500 } 11501 } 11502 11503 CGOpenMPRuntime::LastprivateConditionalRAII 11504 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11505 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11506 return LastprivateConditionalRAII(CGF, S); 11507 } 11508 11509 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11510 if (CGM.getLangOpts().OpenMP < 50) 11511 return; 11512 if (Action == ActionToDo::DisableLastprivateConditional) { 11513 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11514 "Expected list of disabled private vars."); 11515 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11516 } 11517 if (Action == ActionToDo::PushAsLastprivateConditional) { 11518 assert( 11519 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11520 "Expected list of lastprivate conditional vars."); 11521 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11522 } 11523 } 11524 11525 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11526 const VarDecl *VD) { 11527 ASTContext &C = CGM.getContext(); 11528 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11529 if (I == LastprivateConditionalToTypes.end()) 11530 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11531 QualType NewType; 11532 const FieldDecl *VDField; 11533 const FieldDecl *FiredField; 11534 LValue BaseLVal; 11535 auto VI = I->getSecond().find(VD); 11536 if (VI == I->getSecond().end()) { 11537 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11538 RD->startDefinition(); 11539 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11540 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11541 RD->completeDefinition(); 11542 NewType = C.getRecordType(RD); 11543 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11544 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11545 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11546 } else { 11547 NewType = std::get<0>(VI->getSecond()); 11548 VDField = std::get<1>(VI->getSecond()); 11549 FiredField = std::get<2>(VI->getSecond()); 11550 BaseLVal = std::get<3>(VI->getSecond()); 11551 } 11552 LValue FiredLVal = 11553 CGF.EmitLValueForField(BaseLVal, FiredField); 11554 CGF.EmitStoreOfScalar( 11555 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11556 FiredLVal); 11557 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11558 } 11559 11560 namespace { 11561 /// Checks if the lastprivate conditional variable is referenced in LHS. 11562 class LastprivateConditionalRefChecker final 11563 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11564 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11565 const Expr *FoundE = nullptr; 11566 const Decl *FoundD = nullptr; 11567 StringRef UniqueDeclName; 11568 LValue IVLVal; 11569 llvm::Function *FoundFn = nullptr; 11570 SourceLocation Loc; 11571 11572 public: 11573 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11574 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11575 llvm::reverse(LPM)) { 11576 auto It = D.DeclToUniqueName.find(E->getDecl()); 11577 if (It == D.DeclToUniqueName.end()) 11578 continue; 11579 if (D.Disabled) 11580 return false; 11581 FoundE = E; 11582 FoundD = E->getDecl()->getCanonicalDecl(); 11583 UniqueDeclName = It->second; 11584 IVLVal = D.IVLVal; 11585 FoundFn = D.Fn; 11586 break; 11587 } 11588 return FoundE == E; 11589 } 11590 bool VisitMemberExpr(const MemberExpr *E) { 11591 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11592 return false; 11593 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11594 llvm::reverse(LPM)) { 11595 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11596 if (It == D.DeclToUniqueName.end()) 11597 continue; 11598 if (D.Disabled) 11599 return false; 11600 FoundE = E; 11601 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11602 UniqueDeclName = It->second; 11603 IVLVal = D.IVLVal; 11604 FoundFn = D.Fn; 11605 break; 11606 } 11607 return FoundE == E; 11608 } 11609 bool VisitStmt(const Stmt *S) { 11610 for (const Stmt *Child : S->children()) { 11611 if (!Child) 11612 continue; 11613 if (const auto *E = dyn_cast<Expr>(Child)) 11614 if (!E->isGLValue()) 11615 continue; 11616 if (Visit(Child)) 11617 return true; 11618 } 11619 return false; 11620 } 11621 explicit LastprivateConditionalRefChecker( 11622 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11623 : LPM(LPM) {} 11624 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11625 getFoundData() const { 11626 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11627 } 11628 }; 11629 } // namespace 11630 11631 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11632 LValue IVLVal, 11633 StringRef UniqueDeclName, 11634 LValue LVal, 11635 SourceLocation Loc) { 11636 // Last updated loop counter for the lastprivate conditional var. 11637 // int<xx> last_iv = 0; 11638 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11639 llvm::Constant *LastIV = 11640 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11641 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11642 IVLVal.getAlignment().getAsAlign()); 11643 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11644 11645 // Last value of the lastprivate conditional. 11646 // decltype(priv_a) last_a; 11647 llvm::Constant *Last = getOrCreateInternalVariable( 11648 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11649 cast<llvm::GlobalVariable>(Last)->setAlignment( 11650 LVal.getAlignment().getAsAlign()); 11651 LValue LastLVal = 11652 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11653 11654 // Global loop counter. Required to handle inner parallel-for regions. 11655 // iv 11656 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11657 11658 // #pragma omp critical(a) 11659 // if (last_iv <= iv) { 11660 // last_iv = iv; 11661 // last_a = priv_a; 11662 // } 11663 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11664 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11665 Action.Enter(CGF); 11666 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11667 // (last_iv <= iv) ? Check if the variable is updated and store new 11668 // value in global var. 11669 llvm::Value *CmpRes; 11670 if (IVLVal.getType()->isSignedIntegerType()) { 11671 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11672 } else { 11673 assert(IVLVal.getType()->isUnsignedIntegerType() && 11674 "Loop iteration variable must be integer."); 11675 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11676 } 11677 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11678 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11679 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11680 // { 11681 CGF.EmitBlock(ThenBB); 11682 11683 // last_iv = iv; 11684 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11685 11686 // last_a = priv_a; 11687 switch (CGF.getEvaluationKind(LVal.getType())) { 11688 case TEK_Scalar: { 11689 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11690 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11691 break; 11692 } 11693 case TEK_Complex: { 11694 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11695 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11696 break; 11697 } 11698 case TEK_Aggregate: 11699 llvm_unreachable( 11700 "Aggregates are not supported in lastprivate conditional."); 11701 } 11702 // } 11703 CGF.EmitBranch(ExitBB); 11704 // There is no need to emit line number for unconditional branch. 11705 (void)ApplyDebugLocation::CreateEmpty(CGF); 11706 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11707 }; 11708 11709 if (CGM.getLangOpts().OpenMPSimd) { 11710 // Do not emit as a critical region as no parallel region could be emitted. 11711 RegionCodeGenTy ThenRCG(CodeGen); 11712 ThenRCG(CGF); 11713 } else { 11714 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11715 } 11716 } 11717 11718 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11719 const Expr *LHS) { 11720 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11721 return; 11722 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11723 if (!Checker.Visit(LHS)) 11724 return; 11725 const Expr *FoundE; 11726 const Decl *FoundD; 11727 StringRef UniqueDeclName; 11728 LValue IVLVal; 11729 llvm::Function *FoundFn; 11730 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11731 Checker.getFoundData(); 11732 if (FoundFn != CGF.CurFn) { 11733 // Special codegen for inner parallel regions. 11734 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11735 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11736 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11737 "Lastprivate conditional is not found in outer region."); 11738 QualType StructTy = std::get<0>(It->getSecond()); 11739 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11740 LValue PrivLVal = CGF.EmitLValue(FoundE); 11741 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11742 PrivLVal.getAddress(CGF), 11743 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11744 LValue BaseLVal = 11745 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11746 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11747 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11748 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11749 FiredLVal, llvm::AtomicOrdering::Unordered, 11750 /*IsVolatile=*/true, /*isInit=*/false); 11751 return; 11752 } 11753 11754 // Private address of the lastprivate conditional in the current context. 11755 // priv_a 11756 LValue LVal = CGF.EmitLValue(FoundE); 11757 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11758 FoundE->getExprLoc()); 11759 } 11760 11761 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11762 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11763 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11764 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11765 return; 11766 auto Range = llvm::reverse(LastprivateConditionalStack); 11767 auto It = llvm::find_if( 11768 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11769 if (It == Range.end() || It->Fn != CGF.CurFn) 11770 return; 11771 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11772 assert(LPCI != LastprivateConditionalToTypes.end() && 11773 "Lastprivates must be registered already."); 11774 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11775 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11776 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11777 for (const auto &Pair : It->DeclToUniqueName) { 11778 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11779 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11780 continue; 11781 auto I = LPCI->getSecond().find(Pair.first); 11782 assert(I != LPCI->getSecond().end() && 11783 "Lastprivate must be rehistered already."); 11784 // bool Cmp = priv_a.Fired != 0; 11785 LValue BaseLVal = std::get<3>(I->getSecond()); 11786 LValue FiredLVal = 11787 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11788 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11789 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11790 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11791 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11792 // if (Cmp) { 11793 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11794 CGF.EmitBlock(ThenBB); 11795 Address Addr = CGF.GetAddrOfLocalVar(VD); 11796 LValue LVal; 11797 if (VD->getType()->isReferenceType()) 11798 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11799 AlignmentSource::Decl); 11800 else 11801 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11802 AlignmentSource::Decl); 11803 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11804 D.getBeginLoc()); 11805 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11806 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11807 // } 11808 } 11809 } 11810 11811 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11812 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11813 SourceLocation Loc) { 11814 if (CGF.getLangOpts().OpenMP < 50) 11815 return; 11816 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11817 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11818 "Unknown lastprivate conditional variable."); 11819 StringRef UniqueName = It->second; 11820 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11821 // The variable was not updated in the region - exit. 11822 if (!GV) 11823 return; 11824 LValue LPLVal = CGF.MakeAddrLValue( 11825 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11826 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11827 CGF.EmitStoreOfScalar(Res, PrivLVal); 11828 } 11829 11830 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11831 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11832 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11833 llvm_unreachable("Not supported in SIMD-only mode"); 11834 } 11835 11836 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11837 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11838 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11839 llvm_unreachable("Not supported in SIMD-only mode"); 11840 } 11841 11842 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11843 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11844 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11845 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11846 bool Tied, unsigned &NumberOfParts) { 11847 llvm_unreachable("Not supported in SIMD-only mode"); 11848 } 11849 11850 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11851 SourceLocation Loc, 11852 llvm::Function *OutlinedFn, 11853 ArrayRef<llvm::Value *> CapturedVars, 11854 const Expr *IfCond) { 11855 llvm_unreachable("Not supported in SIMD-only mode"); 11856 } 11857 11858 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11859 CodeGenFunction &CGF, StringRef CriticalName, 11860 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11861 const Expr *Hint) { 11862 llvm_unreachable("Not supported in SIMD-only mode"); 11863 } 11864 11865 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11866 const RegionCodeGenTy &MasterOpGen, 11867 SourceLocation Loc) { 11868 llvm_unreachable("Not supported in SIMD-only mode"); 11869 } 11870 11871 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11872 SourceLocation Loc) { 11873 llvm_unreachable("Not supported in SIMD-only mode"); 11874 } 11875 11876 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11877 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11878 SourceLocation Loc) { 11879 llvm_unreachable("Not supported in SIMD-only mode"); 11880 } 11881 11882 void CGOpenMPSIMDRuntime::emitSingleRegion( 11883 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11884 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11885 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11886 ArrayRef<const Expr *> AssignmentOps) { 11887 llvm_unreachable("Not supported in SIMD-only mode"); 11888 } 11889 11890 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11891 const RegionCodeGenTy &OrderedOpGen, 11892 SourceLocation Loc, 11893 bool IsThreads) { 11894 llvm_unreachable("Not supported in SIMD-only mode"); 11895 } 11896 11897 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11898 SourceLocation Loc, 11899 OpenMPDirectiveKind Kind, 11900 bool EmitChecks, 11901 bool ForceSimpleCall) { 11902 llvm_unreachable("Not supported in SIMD-only mode"); 11903 } 11904 11905 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11906 CodeGenFunction &CGF, SourceLocation Loc, 11907 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11908 bool Ordered, const DispatchRTInput &DispatchValues) { 11909 llvm_unreachable("Not supported in SIMD-only mode"); 11910 } 11911 11912 void CGOpenMPSIMDRuntime::emitForStaticInit( 11913 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11914 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11915 llvm_unreachable("Not supported in SIMD-only mode"); 11916 } 11917 11918 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11919 CodeGenFunction &CGF, SourceLocation Loc, 11920 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11921 llvm_unreachable("Not supported in SIMD-only mode"); 11922 } 11923 11924 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11925 SourceLocation Loc, 11926 unsigned IVSize, 11927 bool IVSigned) { 11928 llvm_unreachable("Not supported in SIMD-only mode"); 11929 } 11930 11931 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11932 SourceLocation Loc, 11933 OpenMPDirectiveKind DKind) { 11934 llvm_unreachable("Not supported in SIMD-only mode"); 11935 } 11936 11937 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11938 SourceLocation Loc, 11939 unsigned IVSize, bool IVSigned, 11940 Address IL, Address LB, 11941 Address UB, Address ST) { 11942 llvm_unreachable("Not supported in SIMD-only mode"); 11943 } 11944 11945 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11946 llvm::Value *NumThreads, 11947 SourceLocation Loc) { 11948 llvm_unreachable("Not supported in SIMD-only mode"); 11949 } 11950 11951 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11952 ProcBindKind ProcBind, 11953 SourceLocation Loc) { 11954 llvm_unreachable("Not supported in SIMD-only mode"); 11955 } 11956 11957 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11958 const VarDecl *VD, 11959 Address VDAddr, 11960 SourceLocation Loc) { 11961 llvm_unreachable("Not supported in SIMD-only mode"); 11962 } 11963 11964 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11965 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11966 CodeGenFunction *CGF) { 11967 llvm_unreachable("Not supported in SIMD-only mode"); 11968 } 11969 11970 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11971 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11972 llvm_unreachable("Not supported in SIMD-only mode"); 11973 } 11974 11975 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11976 ArrayRef<const Expr *> Vars, 11977 SourceLocation Loc, 11978 llvm::AtomicOrdering AO) { 11979 llvm_unreachable("Not supported in SIMD-only mode"); 11980 } 11981 11982 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11983 const OMPExecutableDirective &D, 11984 llvm::Function *TaskFunction, 11985 QualType SharedsTy, Address Shareds, 11986 const Expr *IfCond, 11987 const OMPTaskDataTy &Data) { 11988 llvm_unreachable("Not supported in SIMD-only mode"); 11989 } 11990 11991 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11992 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11993 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11994 const Expr *IfCond, const OMPTaskDataTy &Data) { 11995 llvm_unreachable("Not supported in SIMD-only mode"); 11996 } 11997 11998 void CGOpenMPSIMDRuntime::emitReduction( 11999 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12000 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12001 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12002 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12003 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12004 ReductionOps, Options); 12005 } 12006 12007 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12008 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12009 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12010 llvm_unreachable("Not supported in SIMD-only mode"); 12011 } 12012 12013 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12014 SourceLocation Loc, 12015 bool IsWorksharingReduction) { 12016 llvm_unreachable("Not supported in SIMD-only mode"); 12017 } 12018 12019 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12020 SourceLocation Loc, 12021 ReductionCodeGen &RCG, 12022 unsigned N) { 12023 llvm_unreachable("Not supported in SIMD-only mode"); 12024 } 12025 12026 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12027 SourceLocation Loc, 12028 llvm::Value *ReductionsPtr, 12029 LValue SharedLVal) { 12030 llvm_unreachable("Not supported in SIMD-only mode"); 12031 } 12032 12033 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12034 SourceLocation Loc) { 12035 llvm_unreachable("Not supported in SIMD-only mode"); 12036 } 12037 12038 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12039 CodeGenFunction &CGF, SourceLocation Loc, 12040 OpenMPDirectiveKind CancelRegion) { 12041 llvm_unreachable("Not supported in SIMD-only mode"); 12042 } 12043 12044 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12045 SourceLocation Loc, const Expr *IfCond, 12046 OpenMPDirectiveKind CancelRegion) { 12047 llvm_unreachable("Not supported in SIMD-only mode"); 12048 } 12049 12050 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12051 const OMPExecutableDirective &D, StringRef ParentName, 12052 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12053 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12054 llvm_unreachable("Not supported in SIMD-only mode"); 12055 } 12056 12057 void CGOpenMPSIMDRuntime::emitTargetCall( 12058 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12059 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12060 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12061 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12062 const OMPLoopDirective &D)> 12063 SizeEmitter) { 12064 llvm_unreachable("Not supported in SIMD-only mode"); 12065 } 12066 12067 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12068 llvm_unreachable("Not supported in SIMD-only mode"); 12069 } 12070 12071 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12072 llvm_unreachable("Not supported in SIMD-only mode"); 12073 } 12074 12075 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12076 return false; 12077 } 12078 12079 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12080 const OMPExecutableDirective &D, 12081 SourceLocation Loc, 12082 llvm::Function *OutlinedFn, 12083 ArrayRef<llvm::Value *> CapturedVars) { 12084 llvm_unreachable("Not supported in SIMD-only mode"); 12085 } 12086 12087 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12088 const Expr *NumTeams, 12089 const Expr *ThreadLimit, 12090 SourceLocation Loc) { 12091 llvm_unreachable("Not supported in SIMD-only mode"); 12092 } 12093 12094 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12095 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12096 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12097 llvm_unreachable("Not supported in SIMD-only mode"); 12098 } 12099 12100 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12101 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12102 const Expr *Device) { 12103 llvm_unreachable("Not supported in SIMD-only mode"); 12104 } 12105 12106 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12107 const OMPLoopDirective &D, 12108 ArrayRef<Expr *> NumIterations) { 12109 llvm_unreachable("Not supported in SIMD-only mode"); 12110 } 12111 12112 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12113 const OMPDependClause *C) { 12114 llvm_unreachable("Not supported in SIMD-only mode"); 12115 } 12116 12117 const VarDecl * 12118 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12119 const VarDecl *NativeParam) const { 12120 llvm_unreachable("Not supported in SIMD-only mode"); 12121 } 12122 12123 Address 12124 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12125 const VarDecl *NativeParam, 12126 const VarDecl *TargetParam) const { 12127 llvm_unreachable("Not supported in SIMD-only mode"); 12128 } 12129