1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 573 /// region. 574 class CleanupTy final : public EHScopeStack::Cleanup { 575 PrePostActionTy *Action; 576 577 public: 578 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 579 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 580 if (!CGF.HaveInsertPoint()) 581 return; 582 Action->Exit(CGF); 583 } 584 }; 585 586 } // anonymous namespace 587 588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 589 CodeGenFunction::RunCleanupsScope Scope(CGF); 590 if (PrePostAction) { 591 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 592 Callback(CodeGen, CGF, *PrePostAction); 593 } else { 594 PrePostActionTy Action; 595 Callback(CodeGen, CGF, Action); 596 } 597 } 598 599 /// Check if the combiner is a call to UDR combiner and if it is so return the 600 /// UDR decl used for reduction. 601 static const OMPDeclareReductionDecl * 602 getReductionInit(const Expr *ReductionOp) { 603 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 604 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 605 if (const auto *DRE = 606 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 607 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 608 return DRD; 609 return nullptr; 610 } 611 612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 613 const OMPDeclareReductionDecl *DRD, 614 const Expr *InitOp, 615 Address Private, Address Original, 616 QualType Ty) { 617 if (DRD->getInitializer()) { 618 std::pair<llvm::Function *, llvm::Function *> Reduction = 619 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 620 const auto *CE = cast<CallExpr>(InitOp); 621 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 622 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 623 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 624 const auto *LHSDRE = 625 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 626 const auto *RHSDRE = 627 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 628 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 629 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 630 [=]() { return Private; }); 631 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 632 [=]() { return Original; }); 633 (void)PrivateScope.Privatize(); 634 RValue Func = RValue::get(Reduction.second); 635 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 636 CGF.EmitIgnoredExpr(InitOp); 637 } else { 638 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 639 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 640 auto *GV = new llvm::GlobalVariable( 641 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 642 llvm::GlobalValue::PrivateLinkage, Init, Name); 643 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 644 RValue InitRVal; 645 switch (CGF.getEvaluationKind(Ty)) { 646 case TEK_Scalar: 647 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 648 break; 649 case TEK_Complex: 650 InitRVal = 651 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 652 break; 653 case TEK_Aggregate: 654 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 655 break; 656 } 657 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 658 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 659 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 660 /*IsInitializer=*/false); 661 } 662 } 663 664 /// Emit initialization of arrays of complex types. 665 /// \param DestAddr Address of the array. 666 /// \param Type Type of array. 667 /// \param Init Initial expression of array. 668 /// \param SrcAddr Address of the original array. 669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 670 QualType Type, bool EmitDeclareReductionInit, 671 const Expr *Init, 672 const OMPDeclareReductionDecl *DRD, 673 Address SrcAddr = Address::invalid()) { 674 // Perform element-by-element initialization. 675 QualType ElementTy; 676 677 // Drill down to the base element type on both arrays. 678 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 679 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 680 DestAddr = 681 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 682 if (DRD) 683 SrcAddr = 684 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 685 686 llvm::Value *SrcBegin = nullptr; 687 if (DRD) 688 SrcBegin = SrcAddr.getPointer(); 689 llvm::Value *DestBegin = DestAddr.getPointer(); 690 // Cast from pointer to array type to pointer to single element. 691 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 692 // The basic structure here is a while-do loop. 693 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 694 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 695 llvm::Value *IsEmpty = 696 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 697 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 698 699 // Enter the loop body, making that address the current address. 700 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 701 CGF.EmitBlock(BodyBB); 702 703 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 704 705 llvm::PHINode *SrcElementPHI = nullptr; 706 Address SrcElementCurrent = Address::invalid(); 707 if (DRD) { 708 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 709 "omp.arraycpy.srcElementPast"); 710 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 711 SrcElementCurrent = 712 Address(SrcElementPHI, 713 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 714 } 715 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 716 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 717 DestElementPHI->addIncoming(DestBegin, EntryBB); 718 Address DestElementCurrent = 719 Address(DestElementPHI, 720 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 721 722 // Emit copy. 723 { 724 CodeGenFunction::RunCleanupsScope InitScope(CGF); 725 if (EmitDeclareReductionInit) { 726 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 727 SrcElementCurrent, ElementTy); 728 } else 729 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 730 /*IsInitializer=*/false); 731 } 732 733 if (DRD) { 734 // Shift the address forward by one element. 735 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 736 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 737 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 738 } 739 740 // Shift the address forward by one element. 741 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 742 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 743 // Check whether we've reached the end. 744 llvm::Value *Done = 745 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 746 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 747 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 748 749 // Done. 750 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 751 } 752 753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 754 return CGF.EmitOMPSharedLValue(E); 755 } 756 757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 758 const Expr *E) { 759 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 760 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 761 return LValue(); 762 } 763 764 void ReductionCodeGen::emitAggregateInitialization( 765 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 766 const OMPDeclareReductionDecl *DRD) { 767 // Emit VarDecl with copy init for arrays. 768 // Get the address of the original variable captured in current 769 // captured region. 770 const auto *PrivateVD = 771 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 772 bool EmitDeclareReductionInit = 773 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 774 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 775 EmitDeclareReductionInit, 776 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 777 : PrivateVD->getInit(), 778 DRD, SharedLVal.getAddress(CGF)); 779 } 780 781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 782 ArrayRef<const Expr *> Origs, 783 ArrayRef<const Expr *> Privates, 784 ArrayRef<const Expr *> ReductionOps) { 785 ClausesData.reserve(Shareds.size()); 786 SharedAddresses.reserve(Shareds.size()); 787 Sizes.reserve(Shareds.size()); 788 BaseDecls.reserve(Shareds.size()); 789 const auto *IOrig = Origs.begin(); 790 const auto *IPriv = Privates.begin(); 791 const auto *IRed = ReductionOps.begin(); 792 for (const Expr *Ref : Shareds) { 793 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 794 std::advance(IOrig, 1); 795 std::advance(IPriv, 1); 796 std::advance(IRed, 1); 797 } 798 } 799 800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 801 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 802 "Number of generated lvalues must be exactly N."); 803 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 804 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 805 SharedAddresses.emplace_back(First, Second); 806 if (ClausesData[N].Shared == ClausesData[N].Ref) { 807 OrigAddresses.emplace_back(First, Second); 808 } else { 809 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 810 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 811 OrigAddresses.emplace_back(First, Second); 812 } 813 } 814 815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 816 const auto *PrivateVD = 817 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 818 QualType PrivateType = PrivateVD->getType(); 819 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 820 if (!PrivateType->isVariablyModifiedType()) { 821 Sizes.emplace_back( 822 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 823 nullptr); 824 return; 825 } 826 llvm::Value *Size; 827 llvm::Value *SizeInChars; 828 auto *ElemType = 829 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 830 ->getElementType(); 831 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 832 if (AsArraySection) { 833 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 834 OrigAddresses[N].first.getPointer(CGF)); 835 Size = CGF.Builder.CreateNUWAdd( 836 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 837 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 838 } else { 839 SizeInChars = 840 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 841 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 842 } 843 Sizes.emplace_back(SizeInChars, Size); 844 CodeGenFunction::OpaqueValueMapping OpaqueMap( 845 CGF, 846 cast<OpaqueValueExpr>( 847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 848 RValue::get(Size)); 849 CGF.EmitVariablyModifiedType(PrivateType); 850 } 851 852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 853 llvm::Value *Size) { 854 const auto *PrivateVD = 855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 856 QualType PrivateType = PrivateVD->getType(); 857 if (!PrivateType->isVariablyModifiedType()) { 858 assert(!Size && !Sizes[N].second && 859 "Size should be nullptr for non-variably modified reduction " 860 "items."); 861 return; 862 } 863 CodeGenFunction::OpaqueValueMapping OpaqueMap( 864 CGF, 865 cast<OpaqueValueExpr>( 866 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 867 RValue::get(Size)); 868 CGF.EmitVariablyModifiedType(PrivateType); 869 } 870 871 void ReductionCodeGen::emitInitialization( 872 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 873 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 874 assert(SharedAddresses.size() > N && "No variable was generated"); 875 const auto *PrivateVD = 876 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 877 const OMPDeclareReductionDecl *DRD = 878 getReductionInit(ClausesData[N].ReductionOp); 879 QualType PrivateType = PrivateVD->getType(); 880 PrivateAddr = CGF.Builder.CreateElementBitCast( 881 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 882 QualType SharedType = SharedAddresses[N].first.getType(); 883 SharedLVal = CGF.MakeAddrLValue( 884 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 885 CGF.ConvertTypeForMem(SharedType)), 886 SharedType, SharedAddresses[N].first.getBaseInfo(), 887 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 888 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 889 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 890 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 891 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 892 PrivateAddr, SharedLVal.getAddress(CGF), 893 SharedLVal.getType()); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 const auto *PrivateVD = 904 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 905 QualType PrivateType = PrivateVD->getType(); 906 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 907 return DTorKind != QualType::DK_none; 908 } 909 910 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 911 Address PrivateAddr) { 912 const auto *PrivateVD = 913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 914 QualType PrivateType = PrivateVD->getType(); 915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 916 if (needCleanups(N)) { 917 PrivateAddr = CGF.Builder.CreateElementBitCast( 918 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 919 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 920 } 921 } 922 923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 924 LValue BaseLV) { 925 BaseTy = BaseTy.getNonReferenceType(); 926 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 927 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 928 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 929 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 930 } else { 931 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 932 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 933 } 934 BaseTy = BaseTy->getPointeeType(); 935 } 936 return CGF.MakeAddrLValue( 937 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 938 CGF.ConvertTypeForMem(ElTy)), 939 BaseLV.getType(), BaseLV.getBaseInfo(), 940 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 941 } 942 943 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 944 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 945 llvm::Value *Addr) { 946 Address Tmp = Address::invalid(); 947 Address TopTmp = Address::invalid(); 948 Address MostTopTmp = Address::invalid(); 949 BaseTy = BaseTy.getNonReferenceType(); 950 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 951 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 952 Tmp = CGF.CreateMemTemp(BaseTy); 953 if (TopTmp.isValid()) 954 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 955 else 956 MostTopTmp = Tmp; 957 TopTmp = Tmp; 958 BaseTy = BaseTy->getPointeeType(); 959 } 960 llvm::Type *Ty = BaseLVType; 961 if (Tmp.isValid()) 962 Ty = Tmp.getElementType(); 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 964 if (Tmp.isValid()) { 965 CGF.Builder.CreateStore(Addr, Tmp); 966 return MostTopTmp; 967 } 968 return Address(Addr, BaseLVAlignment); 969 } 970 971 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 972 const VarDecl *OrigVD = nullptr; 973 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 974 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 975 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 976 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 978 Base = TempASE->getBase()->IgnoreParenImpCasts(); 979 DE = cast<DeclRefExpr>(Base); 980 OrigVD = cast<VarDecl>(DE->getDecl()); 981 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 982 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 984 Base = TempASE->getBase()->IgnoreParenImpCasts(); 985 DE = cast<DeclRefExpr>(Base); 986 OrigVD = cast<VarDecl>(DE->getDecl()); 987 } 988 return OrigVD; 989 } 990 991 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 992 Address PrivateAddr) { 993 const DeclRefExpr *DE; 994 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 995 BaseDecls.emplace_back(OrigVD); 996 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 997 LValue BaseLValue = 998 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 999 OriginalBaseLValue); 1000 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1001 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1002 llvm::Value *PrivatePointer = 1003 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1004 PrivateAddr.getPointer(), 1005 SharedAddresses[N].first.getAddress(CGF).getType()); 1006 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1007 return castToBase(CGF, OrigVD->getType(), 1008 SharedAddresses[N].first.getType(), 1009 OriginalBaseLValue.getAddress(CGF).getType(), 1010 OriginalBaseLValue.getAlignment(), Ptr); 1011 } 1012 BaseDecls.emplace_back( 1013 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1014 return PrivateAddr; 1015 } 1016 1017 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1018 const OMPDeclareReductionDecl *DRD = 1019 getReductionInit(ClausesData[N].ReductionOp); 1020 return DRD && DRD->getInitializer(); 1021 } 1022 1023 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1024 return CGF.EmitLoadOfPointerLValue( 1025 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1026 getThreadIDVariable()->getType()->castAs<PointerType>()); 1027 } 1028 1029 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1030 if (!CGF.HaveInsertPoint()) 1031 return; 1032 // 1.2.2 OpenMP Language Terminology 1033 // Structured block - An executable statement with a single entry at the 1034 // top and a single exit at the bottom. 1035 // The point of exit cannot be a branch out of the structured block. 1036 // longjmp() and throw() must not violate the entry/exit criteria. 1037 CGF.EHStack.pushTerminate(); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OffloadEntriesInfoManager(CGM) { 1064 ASTContext &C = CGM.getContext(); 1065 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1066 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1067 RD->startDefinition(); 1068 // reserved_1 1069 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1070 // flags 1071 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1072 // reserved_2 1073 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1074 // reserved_3 1075 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1076 // psource 1077 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1078 RD->completeDefinition(); 1079 IdentQTy = C.getRecordType(RD); 1080 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1081 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1082 1083 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1084 llvm::omp::types::initializeTypes(CGM.getModule()); 1085 loadOffloadInfoMetadata(); 1086 } 1087 1088 void CGOpenMPRuntime::clear() { 1089 InternalVars.clear(); 1090 // Clean non-target variable declarations possibly used only in debug info. 1091 for (const auto &Data : EmittedNonTargetVariables) { 1092 if (!Data.getValue().pointsToAliveValue()) 1093 continue; 1094 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1095 if (!GV) 1096 continue; 1097 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1098 continue; 1099 GV->eraseFromParent(); 1100 } 1101 } 1102 1103 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1104 SmallString<128> Buffer; 1105 llvm::raw_svector_ostream OS(Buffer); 1106 StringRef Sep = FirstSeparator; 1107 for (StringRef Part : Parts) { 1108 OS << Sep << Part; 1109 Sep = Separator; 1110 } 1111 return std::string(OS.str()); 1112 } 1113 1114 static llvm::Function * 1115 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1116 const Expr *CombinerInitializer, const VarDecl *In, 1117 const VarDecl *Out, bool IsCombiner) { 1118 // void .omp_combiner.(Ty *in, Ty *out); 1119 ASTContext &C = CGM.getContext(); 1120 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1121 FunctionArgList Args; 1122 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1123 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1124 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1126 Args.push_back(&OmpOutParm); 1127 Args.push_back(&OmpInParm); 1128 const CGFunctionInfo &FnInfo = 1129 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1130 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1131 std::string Name = CGM.getOpenMPRuntime().getName( 1132 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1133 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1134 Name, &CGM.getModule()); 1135 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1136 if (CGM.getLangOpts().Optimize) { 1137 Fn->removeFnAttr(llvm::Attribute::NoInline); 1138 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1139 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1140 } 1141 CodeGenFunction CGF(CGM); 1142 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1143 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1144 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1145 Out->getLocation()); 1146 CodeGenFunction::OMPPrivateScope Scope(CGF); 1147 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1148 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1153 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1154 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1155 .getAddress(CGF); 1156 }); 1157 (void)Scope.Privatize(); 1158 if (!IsCombiner && Out->hasInit() && 1159 !CGF.isTrivialInitializer(Out->getInit())) { 1160 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1161 Out->getType().getQualifiers(), 1162 /*IsInitializer=*/true); 1163 } 1164 if (CombinerInitializer) 1165 CGF.EmitIgnoredExpr(CombinerInitializer); 1166 Scope.ForceCleanup(); 1167 CGF.FinishFunction(); 1168 return Fn; 1169 } 1170 1171 void CGOpenMPRuntime::emitUserDefinedReduction( 1172 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1173 if (UDRMap.count(D) > 0) 1174 return; 1175 llvm::Function *Combiner = emitCombinerOrInitializer( 1176 CGM, D->getType(), D->getCombiner(), 1177 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1178 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1179 /*IsCombiner=*/true); 1180 llvm::Function *Initializer = nullptr; 1181 if (const Expr *Init = D->getInitializer()) { 1182 Initializer = emitCombinerOrInitializer( 1183 CGM, D->getType(), 1184 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1185 : nullptr, 1186 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1187 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1188 /*IsCombiner=*/false); 1189 } 1190 UDRMap.try_emplace(D, Combiner, Initializer); 1191 if (CGF) { 1192 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1193 Decls.second.push_back(D); 1194 } 1195 } 1196 1197 std::pair<llvm::Function *, llvm::Function *> 1198 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1199 auto I = UDRMap.find(D); 1200 if (I != UDRMap.end()) 1201 return I->second; 1202 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1203 return UDRMap.lookup(D); 1204 } 1205 1206 namespace { 1207 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1208 // Builder if one is present. 1209 struct PushAndPopStackRAII { 1210 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1211 bool HasCancel) 1212 : OMPBuilder(OMPBuilder) { 1213 if (!OMPBuilder) 1214 return; 1215 1216 // The following callback is the crucial part of clangs cleanup process. 1217 // 1218 // NOTE: 1219 // Once the OpenMPIRBuilder is used to create parallel regions (and 1220 // similar), the cancellation destination (Dest below) is determined via 1221 // IP. That means if we have variables to finalize we split the block at IP, 1222 // use the new block (=BB) as destination to build a JumpDest (via 1223 // getJumpDestInCurrentScope(BB)) which then is fed to 1224 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1225 // to push & pop an FinalizationInfo object. 1226 // The FiniCB will still be needed but at the point where the 1227 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1228 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1229 assert(IP.getBlock()->end() == IP.getPoint() && 1230 "Clang CG should cause non-terminated block!"); 1231 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1232 CGF.Builder.restoreIP(IP); 1233 CodeGenFunction::JumpDest Dest = 1234 CGF.getOMPCancelDestination(OMPD_parallel); 1235 CGF.EmitBranchThroughCleanup(Dest); 1236 }; 1237 1238 // TODO: Remove this once we emit parallel regions through the 1239 // OpenMPIRBuilder as it can do this setup internally. 1240 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1241 {FiniCB, OMPD_parallel, HasCancel}); 1242 OMPBuilder->pushFinalizationCB(std::move(FI)); 1243 } 1244 ~PushAndPopStackRAII() { 1245 if (OMPBuilder) 1246 OMPBuilder->popFinalizationCB(); 1247 } 1248 llvm::OpenMPIRBuilder *OMPBuilder; 1249 }; 1250 } // namespace 1251 1252 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1253 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1254 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1255 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1256 assert(ThreadIDVar->getType()->isPointerType() && 1257 "thread id variable must be of type kmp_int32 *"); 1258 CodeGenFunction CGF(CGM, true); 1259 bool HasCancel = false; 1260 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1261 HasCancel = OPD->hasCancel(); 1262 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1263 HasCancel = OPD->hasCancel(); 1264 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1265 HasCancel = OPSD->hasCancel(); 1266 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1267 HasCancel = OPFD->hasCancel(); 1268 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 else if (const auto *OPFD = 1276 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1277 HasCancel = OPFD->hasCancel(); 1278 1279 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1280 // parallel region to make cancellation barriers work properly. 1281 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1282 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1283 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1284 HasCancel, OutlinedHelperName); 1285 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1286 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1287 } 1288 1289 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1290 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1292 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1293 return emitParallelOrTeamsOutlinedFunction( 1294 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1295 } 1296 1297 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1298 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1299 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1300 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1301 return emitParallelOrTeamsOutlinedFunction( 1302 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1303 } 1304 1305 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1306 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1307 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1308 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1309 bool Tied, unsigned &NumberOfParts) { 1310 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1311 PrePostActionTy &) { 1312 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1313 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1314 llvm::Value *TaskArgs[] = { 1315 UpLoc, ThreadID, 1316 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1317 TaskTVar->getType()->castAs<PointerType>()) 1318 .getPointer(CGF)}; 1319 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1320 CGM.getModule(), OMPRTL___kmpc_omp_task), 1321 TaskArgs); 1322 }; 1323 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1324 UntiedCodeGen); 1325 CodeGen.setAction(Action); 1326 assert(!ThreadIDVar->getType()->isPointerType() && 1327 "thread id variable must be of type kmp_int32 for tasks"); 1328 const OpenMPDirectiveKind Region = 1329 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1330 : OMPD_task; 1331 const CapturedStmt *CS = D.getCapturedStmt(Region); 1332 bool HasCancel = false; 1333 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1336 HasCancel = TD->hasCancel(); 1337 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1338 HasCancel = TD->hasCancel(); 1339 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1340 HasCancel = TD->hasCancel(); 1341 1342 CodeGenFunction CGF(CGM, true); 1343 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1344 InnermostKind, HasCancel, Action); 1345 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1346 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1347 if (!Tied) 1348 NumberOfParts = Action.getNumberOfParts(); 1349 return Res; 1350 } 1351 1352 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1353 const RecordDecl *RD, const CGRecordLayout &RL, 1354 ArrayRef<llvm::Constant *> Data) { 1355 llvm::StructType *StructTy = RL.getLLVMType(); 1356 unsigned PrevIdx = 0; 1357 ConstantInitBuilder CIBuilder(CGM); 1358 auto DI = Data.begin(); 1359 for (const FieldDecl *FD : RD->fields()) { 1360 unsigned Idx = RL.getLLVMFieldNo(FD); 1361 // Fill the alignment. 1362 for (unsigned I = PrevIdx; I < Idx; ++I) 1363 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1364 PrevIdx = Idx + 1; 1365 Fields.add(*DI); 1366 ++DI; 1367 } 1368 } 1369 1370 template <class... As> 1371 static llvm::GlobalVariable * 1372 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1373 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1374 As &&... Args) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantInitBuilder CIBuilder(CGM); 1378 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1379 buildStructValue(Fields, CGM, RD, RL, Data); 1380 return Fields.finishAndCreateGlobal( 1381 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1382 std::forward<As>(Args)...); 1383 } 1384 1385 template <typename T> 1386 static void 1387 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1388 ArrayRef<llvm::Constant *> Data, 1389 T &Parent) { 1390 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1391 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1392 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1393 buildStructValue(Fields, CGM, RD, RL, Data); 1394 Fields.finishAndAddTo(Parent); 1395 } 1396 1397 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1398 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1399 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1400 FlagsTy FlagsKey(Flags, Reserved2Flags); 1401 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1402 if (!Entry) { 1403 if (!DefaultOpenMPPSource) { 1404 // Initialize default location for psource field of ident_t structure of 1405 // all ident_t objects. Format is ";file;function;line;column;;". 1406 // Taken from 1407 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1408 DefaultOpenMPPSource = 1409 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1410 DefaultOpenMPPSource = 1411 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1412 } 1413 1414 llvm::Constant *Data[] = { 1415 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1416 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1417 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1418 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1419 llvm::GlobalValue *DefaultOpenMPLocation = 1420 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1421 llvm::GlobalValue::PrivateLinkage); 1422 DefaultOpenMPLocation->setUnnamedAddr( 1423 llvm::GlobalValue::UnnamedAddr::Global); 1424 1425 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1426 } 1427 return Address(Entry, Align); 1428 } 1429 1430 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1431 bool AtCurrentPoint) { 1432 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1433 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1434 1435 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1436 if (AtCurrentPoint) { 1437 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1438 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1439 } else { 1440 Elem.second.ServiceInsertPt = 1441 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1442 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1443 } 1444 } 1445 1446 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1447 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1448 if (Elem.second.ServiceInsertPt) { 1449 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1450 Elem.second.ServiceInsertPt = nullptr; 1451 Ptr->eraseFromParent(); 1452 } 1453 } 1454 1455 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1456 SourceLocation Loc, 1457 unsigned Flags) { 1458 Flags |= OMP_IDENT_KMPC; 1459 // If no debug info is generated - return global default location. 1460 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1461 Loc.isInvalid()) 1462 return getOrCreateDefaultLocation(Flags).getPointer(); 1463 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 1466 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1467 Address LocValue = Address::invalid(); 1468 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1469 if (I != OpenMPLocThreadIDMap.end()) 1470 LocValue = Address(I->second.DebugLoc, Align); 1471 1472 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1473 // GetOpenMPThreadID was called before this routine. 1474 if (!LocValue.isValid()) { 1475 // Generate "ident_t .kmpc_loc.addr;" 1476 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1477 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1478 Elem.second.DebugLoc = AI.getPointer(); 1479 LocValue = AI; 1480 1481 if (!Elem.second.ServiceInsertPt) 1482 setLocThreadIdInsertPt(CGF); 1483 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1484 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1485 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1486 CGF.getTypeSize(IdentQTy)); 1487 } 1488 1489 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1490 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1491 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1492 LValue PSource = 1493 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1494 1495 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1496 if (OMPDebugLoc == nullptr) { 1497 SmallString<128> Buffer2; 1498 llvm::raw_svector_ostream OS2(Buffer2); 1499 // Build debug location 1500 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1501 OS2 << ";" << PLoc.getFilename() << ";"; 1502 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1503 OS2 << FD->getQualifiedNameAsString(); 1504 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1505 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1506 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1507 } 1508 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1509 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1510 1511 // Our callers always pass this to a runtime function, so for 1512 // convenience, go ahead and return a naked pointer. 1513 return LocValue.getPointer(); 1514 } 1515 1516 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1517 SourceLocation Loc) { 1518 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1519 1520 llvm::Value *ThreadID = nullptr; 1521 // Check whether we've already cached a load of the thread id in this 1522 // function. 1523 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1524 if (I != OpenMPLocThreadIDMap.end()) { 1525 ThreadID = I->second.ThreadID; 1526 if (ThreadID != nullptr) 1527 return ThreadID; 1528 } 1529 // If exceptions are enabled, do not use parameter to avoid possible crash. 1530 if (auto *OMPRegionInfo = 1531 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1532 if (OMPRegionInfo->getThreadIDVariable()) { 1533 // Check if this an outlined function with thread id passed as argument. 1534 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1535 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1536 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1537 !CGF.getLangOpts().CXXExceptions || 1538 CGF.Builder.GetInsertBlock() == TopBlock || 1539 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1540 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1541 TopBlock || 1542 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1543 CGF.Builder.GetInsertBlock()) { 1544 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1545 // If value loaded in entry block, cache it and use it everywhere in 1546 // function. 1547 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1548 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1549 Elem.second.ThreadID = ThreadID; 1550 } 1551 return ThreadID; 1552 } 1553 } 1554 } 1555 1556 // This is not an outlined function region - need to call __kmpc_int32 1557 // kmpc_global_thread_num(ident_t *loc). 1558 // Generate thread id value and cache this value for use across the 1559 // function. 1560 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1561 if (!Elem.second.ServiceInsertPt) 1562 setLocThreadIdInsertPt(CGF); 1563 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1564 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1565 llvm::CallInst *Call = CGF.Builder.CreateCall( 1566 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1567 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1568 emitUpdateLocation(CGF, Loc)); 1569 Call->setCallingConv(CGF.getRuntimeCC()); 1570 Elem.second.ThreadID = Call; 1571 return Call; 1572 } 1573 1574 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1575 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1576 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1577 clearLocThreadIdInsertPt(CGF); 1578 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1579 } 1580 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1581 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1582 UDRMap.erase(D); 1583 FunctionUDRMap.erase(CGF.CurFn); 1584 } 1585 auto I = FunctionUDMMap.find(CGF.CurFn); 1586 if (I != FunctionUDMMap.end()) { 1587 for(const auto *D : I->second) 1588 UDMMap.erase(D); 1589 FunctionUDMMap.erase(I); 1590 } 1591 LastprivateConditionalToTypes.erase(CGF.CurFn); 1592 } 1593 1594 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1595 return IdentTy->getPointerTo(); 1596 } 1597 1598 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1599 if (!Kmpc_MicroTy) { 1600 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1601 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1602 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1603 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1604 } 1605 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1606 } 1607 1608 llvm::FunctionCallee 1609 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1610 assert((IVSize == 32 || IVSize == 64) && 1611 "IV size is not compatible with the omp runtime"); 1612 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1613 : "__kmpc_for_static_init_4u") 1614 : (IVSigned ? "__kmpc_for_static_init_8" 1615 : "__kmpc_for_static_init_8u"); 1616 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1617 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1618 llvm::Type *TypeParams[] = { 1619 getIdentTyPointerTy(), // loc 1620 CGM.Int32Ty, // tid 1621 CGM.Int32Ty, // schedtype 1622 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1623 PtrTy, // p_lower 1624 PtrTy, // p_upper 1625 PtrTy, // p_stride 1626 ITy, // incr 1627 ITy // chunk 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 llvm::FunctionCallee 1635 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1636 assert((IVSize == 32 || IVSize == 64) && 1637 "IV size is not compatible with the omp runtime"); 1638 StringRef Name = 1639 IVSize == 32 1640 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1641 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1642 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1643 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1644 CGM.Int32Ty, // tid 1645 CGM.Int32Ty, // schedtype 1646 ITy, // lower 1647 ITy, // upper 1648 ITy, // stride 1649 ITy // chunk 1650 }; 1651 auto *FnTy = 1652 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1653 return CGM.CreateRuntimeFunction(FnTy, Name); 1654 } 1655 1656 llvm::FunctionCallee 1657 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1658 assert((IVSize == 32 || IVSize == 64) && 1659 "IV size is not compatible with the omp runtime"); 1660 StringRef Name = 1661 IVSize == 32 1662 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1663 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1664 llvm::Type *TypeParams[] = { 1665 getIdentTyPointerTy(), // loc 1666 CGM.Int32Ty, // tid 1667 }; 1668 auto *FnTy = 1669 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1670 return CGM.CreateRuntimeFunction(FnTy, Name); 1671 } 1672 1673 llvm::FunctionCallee 1674 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1675 assert((IVSize == 32 || IVSize == 64) && 1676 "IV size is not compatible with the omp runtime"); 1677 StringRef Name = 1678 IVSize == 32 1679 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1680 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1681 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1682 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1683 llvm::Type *TypeParams[] = { 1684 getIdentTyPointerTy(), // loc 1685 CGM.Int32Ty, // tid 1686 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1687 PtrTy, // p_lower 1688 PtrTy, // p_upper 1689 PtrTy // p_stride 1690 }; 1691 auto *FnTy = 1692 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1693 return CGM.CreateRuntimeFunction(FnTy, Name); 1694 } 1695 1696 /// Obtain information that uniquely identifies a target entry. This 1697 /// consists of the file and device IDs as well as line number associated with 1698 /// the relevant entry source location. 1699 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1700 unsigned &DeviceID, unsigned &FileID, 1701 unsigned &LineNum) { 1702 SourceManager &SM = C.getSourceManager(); 1703 1704 // The loc should be always valid and have a file ID (the user cannot use 1705 // #pragma directives in macros) 1706 1707 assert(Loc.isValid() && "Source location is expected to be always valid."); 1708 1709 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1710 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1711 1712 llvm::sys::fs::UniqueID ID; 1713 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1714 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1715 << PLoc.getFilename() << EC.message(); 1716 1717 DeviceID = ID.getDevice(); 1718 FileID = ID.getFile(); 1719 LineNum = PLoc.getLine(); 1720 } 1721 1722 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1723 if (CGM.getLangOpts().OpenMPSimd) 1724 return Address::invalid(); 1725 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1726 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1727 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1728 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1729 HasRequiresUnifiedSharedMemory))) { 1730 SmallString<64> PtrName; 1731 { 1732 llvm::raw_svector_ostream OS(PtrName); 1733 OS << CGM.getMangledName(GlobalDecl(VD)); 1734 if (!VD->isExternallyVisible()) { 1735 unsigned DeviceID, FileID, Line; 1736 getTargetEntryUniqueInfo(CGM.getContext(), 1737 VD->getCanonicalDecl()->getBeginLoc(), 1738 DeviceID, FileID, Line); 1739 OS << llvm::format("_%x", FileID); 1740 } 1741 OS << "_decl_tgt_ref_ptr"; 1742 } 1743 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1744 if (!Ptr) { 1745 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1746 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1747 PtrName); 1748 1749 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1750 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1751 1752 if (!CGM.getLangOpts().OpenMPIsDevice) 1753 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1754 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1755 } 1756 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1757 } 1758 return Address::invalid(); 1759 } 1760 1761 llvm::Constant * 1762 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1763 assert(!CGM.getLangOpts().OpenMPUseTLS || 1764 !CGM.getContext().getTargetInfo().isTLSSupported()); 1765 // Lookup the entry, lazily creating it if necessary. 1766 std::string Suffix = getName({"cache", ""}); 1767 return getOrCreateInternalVariable( 1768 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1769 } 1770 1771 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1772 const VarDecl *VD, 1773 Address VDAddr, 1774 SourceLocation Loc) { 1775 if (CGM.getLangOpts().OpenMPUseTLS && 1776 CGM.getContext().getTargetInfo().isTLSSupported()) 1777 return VDAddr; 1778 1779 llvm::Type *VarTy = VDAddr.getElementType(); 1780 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1781 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1782 CGM.Int8PtrTy), 1783 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1784 getOrCreateThreadPrivateCache(VD)}; 1785 return Address(CGF.EmitRuntimeCall( 1786 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1787 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1788 Args), 1789 VDAddr.getAlignment()); 1790 } 1791 1792 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1793 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1794 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1795 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1796 // library. 1797 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1798 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1799 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1800 OMPLoc); 1801 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1802 // to register constructor/destructor for variable. 1803 llvm::Value *Args[] = { 1804 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1805 Ctor, CopyCtor, Dtor}; 1806 CGF.EmitRuntimeCall( 1807 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1808 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1809 Args); 1810 } 1811 1812 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1813 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1814 bool PerformInit, CodeGenFunction *CGF) { 1815 if (CGM.getLangOpts().OpenMPUseTLS && 1816 CGM.getContext().getTargetInfo().isTLSSupported()) 1817 return nullptr; 1818 1819 VD = VD->getDefinition(CGM.getContext()); 1820 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1821 QualType ASTTy = VD->getType(); 1822 1823 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1824 const Expr *Init = VD->getAnyInitializer(); 1825 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1826 // Generate function that re-emits the declaration's initializer into the 1827 // threadprivate copy of the variable VD 1828 CodeGenFunction CtorCGF(CGM); 1829 FunctionArgList Args; 1830 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1831 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1832 ImplicitParamDecl::Other); 1833 Args.push_back(&Dst); 1834 1835 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1836 CGM.getContext().VoidPtrTy, Args); 1837 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1838 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1839 llvm::Function *Fn = 1840 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 1841 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1842 Args, Loc, Loc); 1843 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1844 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1845 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1846 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1847 Arg = CtorCGF.Builder.CreateElementBitCast( 1848 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1849 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1850 /*IsInitializer=*/true); 1851 ArgVal = CtorCGF.EmitLoadOfScalar( 1852 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1853 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1854 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1855 CtorCGF.FinishFunction(); 1856 Ctor = Fn; 1857 } 1858 if (VD->getType().isDestructedType() != QualType::DK_none) { 1859 // Generate function that emits destructor call for the threadprivate copy 1860 // of the variable VD 1861 CodeGenFunction DtorCGF(CGM); 1862 FunctionArgList Args; 1863 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1864 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1865 ImplicitParamDecl::Other); 1866 Args.push_back(&Dst); 1867 1868 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1869 CGM.getContext().VoidTy, Args); 1870 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1871 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1872 llvm::Function *Fn = 1873 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 1874 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1875 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1876 Loc, Loc); 1877 // Create a scope with an artificial location for the body of this function. 1878 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1879 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1880 DtorCGF.GetAddrOfLocalVar(&Dst), 1881 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1882 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1883 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1884 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1885 DtorCGF.FinishFunction(); 1886 Dtor = Fn; 1887 } 1888 // Do not emit init function if it is not required. 1889 if (!Ctor && !Dtor) 1890 return nullptr; 1891 1892 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1893 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1894 /*isVarArg=*/false) 1895 ->getPointerTo(); 1896 // Copying constructor for the threadprivate variable. 1897 // Must be NULL - reserved by runtime, but currently it requires that this 1898 // parameter is always NULL. Otherwise it fires assertion. 1899 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1900 if (Ctor == nullptr) { 1901 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1902 /*isVarArg=*/false) 1903 ->getPointerTo(); 1904 Ctor = llvm::Constant::getNullValue(CtorTy); 1905 } 1906 if (Dtor == nullptr) { 1907 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1908 /*isVarArg=*/false) 1909 ->getPointerTo(); 1910 Dtor = llvm::Constant::getNullValue(DtorTy); 1911 } 1912 if (!CGF) { 1913 auto *InitFunctionTy = 1914 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1915 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1916 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1917 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1918 CodeGenFunction InitCGF(CGM); 1919 FunctionArgList ArgList; 1920 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1921 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1922 Loc, Loc); 1923 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1924 InitCGF.FinishFunction(); 1925 return InitFunction; 1926 } 1927 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1928 } 1929 return nullptr; 1930 } 1931 1932 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1933 llvm::GlobalVariable *Addr, 1934 bool PerformInit) { 1935 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1936 !CGM.getLangOpts().OpenMPIsDevice) 1937 return false; 1938 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1939 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1940 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1941 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1942 HasRequiresUnifiedSharedMemory)) 1943 return CGM.getLangOpts().OpenMPIsDevice; 1944 VD = VD->getDefinition(CGM.getContext()); 1945 assert(VD && "Unknown VarDecl"); 1946 1947 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1948 return CGM.getLangOpts().OpenMPIsDevice; 1949 1950 QualType ASTTy = VD->getType(); 1951 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1952 1953 // Produce the unique prefix to identify the new target regions. We use 1954 // the source location of the variable declaration which we know to not 1955 // conflict with any target region. 1956 unsigned DeviceID; 1957 unsigned FileID; 1958 unsigned Line; 1959 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1960 SmallString<128> Buffer, Out; 1961 { 1962 llvm::raw_svector_ostream OS(Buffer); 1963 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1964 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1965 } 1966 1967 const Expr *Init = VD->getAnyInitializer(); 1968 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1969 llvm::Constant *Ctor; 1970 llvm::Constant *ID; 1971 if (CGM.getLangOpts().OpenMPIsDevice) { 1972 // Generate function that re-emits the declaration's initializer into 1973 // the threadprivate copy of the variable VD 1974 CodeGenFunction CtorCGF(CGM); 1975 1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1978 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 1979 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1980 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1981 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1982 FunctionArgList(), Loc, Loc); 1983 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1984 CtorCGF.EmitAnyExprToMem(Init, 1985 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1986 Init->getType().getQualifiers(), 1987 /*IsInitializer=*/true); 1988 CtorCGF.FinishFunction(); 1989 Ctor = Fn; 1990 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1991 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1992 } else { 1993 Ctor = new llvm::GlobalVariable( 1994 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1995 llvm::GlobalValue::PrivateLinkage, 1996 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1997 ID = Ctor; 1998 } 1999 2000 // Register the information for the entry associated with the constructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2005 } 2006 if (VD->getType().isDestructedType() != QualType::DK_none) { 2007 llvm::Constant *Dtor; 2008 llvm::Constant *ID; 2009 if (CGM.getLangOpts().OpenMPIsDevice) { 2010 // Generate function that emits destructor call for the threadprivate 2011 // copy of the variable VD 2012 CodeGenFunction DtorCGF(CGM); 2013 2014 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2015 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2016 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2017 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2018 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2019 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2020 FunctionArgList(), Loc, Loc); 2021 // Create a scope with an artificial location for the body of this 2022 // function. 2023 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2024 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2025 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2026 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2027 DtorCGF.FinishFunction(); 2028 Dtor = Fn; 2029 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2030 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2031 } else { 2032 Dtor = new llvm::GlobalVariable( 2033 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2034 llvm::GlobalValue::PrivateLinkage, 2035 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2036 ID = Dtor; 2037 } 2038 // Register the information for the entry associated with the destructor. 2039 Out.clear(); 2040 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2041 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2042 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2043 } 2044 return CGM.getLangOpts().OpenMPIsDevice; 2045 } 2046 2047 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2048 QualType VarType, 2049 StringRef Name) { 2050 std::string Suffix = getName({"artificial", ""}); 2051 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2052 llvm::Value *GAddr = 2053 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2054 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2055 CGM.getTarget().isTLSSupported()) { 2056 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2057 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2058 } 2059 std::string CacheSuffix = getName({"cache", ""}); 2060 llvm::Value *Args[] = { 2061 emitUpdateLocation(CGF, SourceLocation()), 2062 getThreadID(CGF, SourceLocation()), 2063 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2064 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2065 /*isSigned=*/false), 2066 getOrCreateInternalVariable( 2067 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2068 return Address( 2069 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2070 CGF.EmitRuntimeCall( 2071 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2072 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2073 Args), 2074 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2075 CGM.getContext().getTypeAlignInChars(VarType)); 2076 } 2077 2078 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2079 const RegionCodeGenTy &ThenGen, 2080 const RegionCodeGenTy &ElseGen) { 2081 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2082 2083 // If the condition constant folds and can be elided, try to avoid emitting 2084 // the condition and the dead arm of the if/else. 2085 bool CondConstant; 2086 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2087 if (CondConstant) 2088 ThenGen(CGF); 2089 else 2090 ElseGen(CGF); 2091 return; 2092 } 2093 2094 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2095 // emit the conditional branch. 2096 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2097 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2098 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2099 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2100 2101 // Emit the 'then' code. 2102 CGF.EmitBlock(ThenBlock); 2103 ThenGen(CGF); 2104 CGF.EmitBranch(ContBlock); 2105 // Emit the 'else' code if present. 2106 // There is no need to emit line number for unconditional branch. 2107 (void)ApplyDebugLocation::CreateEmpty(CGF); 2108 CGF.EmitBlock(ElseBlock); 2109 ElseGen(CGF); 2110 // There is no need to emit line number for unconditional branch. 2111 (void)ApplyDebugLocation::CreateEmpty(CGF); 2112 CGF.EmitBranch(ContBlock); 2113 // Emit the continuation block for code after the if. 2114 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2115 } 2116 2117 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2118 llvm::Function *OutlinedFn, 2119 ArrayRef<llvm::Value *> CapturedVars, 2120 const Expr *IfCond) { 2121 if (!CGF.HaveInsertPoint()) 2122 return; 2123 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2124 auto &M = CGM.getModule(); 2125 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2126 PrePostActionTy &) { 2127 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2128 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2129 llvm::Value *Args[] = { 2130 RTLoc, 2131 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2132 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2133 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2134 RealArgs.append(std::begin(Args), std::end(Args)); 2135 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2136 2137 llvm::FunctionCallee RTLFn = 2138 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2139 M, OMPRTL___kmpc_fork_call); 2140 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2141 }; 2142 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2143 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 2144 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2145 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2146 // Build calls: 2147 // __kmpc_serialized_parallel(&Loc, GTid); 2148 llvm::Value *Args[] = {RTLoc, ThreadID}; 2149 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2150 M, OMPRTL___kmpc_serialized_parallel), 2151 Args); 2152 2153 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2154 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2155 Address ZeroAddrBound = 2156 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2157 /*Name=*/".bound.zero.addr"); 2158 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2159 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2160 // ThreadId for serialized parallels is 0. 2161 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2162 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2163 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2164 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2165 2166 // __kmpc_end_serialized_parallel(&Loc, GTid); 2167 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2168 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2169 M, OMPRTL___kmpc_end_serialized_parallel), 2170 EndArgs); 2171 }; 2172 if (IfCond) { 2173 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2174 } else { 2175 RegionCodeGenTy ThenRCG(ThenGen); 2176 ThenRCG(CGF); 2177 } 2178 } 2179 2180 // If we're inside an (outlined) parallel region, use the region info's 2181 // thread-ID variable (it is passed in a first argument of the outlined function 2182 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2183 // regular serial code region, get thread ID by calling kmp_int32 2184 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2185 // return the address of that temp. 2186 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2187 SourceLocation Loc) { 2188 if (auto *OMPRegionInfo = 2189 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2190 if (OMPRegionInfo->getThreadIDVariable()) 2191 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2192 2193 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2194 QualType Int32Ty = 2195 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2196 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2197 CGF.EmitStoreOfScalar(ThreadID, 2198 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2199 2200 return ThreadIDTemp; 2201 } 2202 2203 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2204 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2205 SmallString<256> Buffer; 2206 llvm::raw_svector_ostream Out(Buffer); 2207 Out << Name; 2208 StringRef RuntimeName = Out.str(); 2209 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2210 if (Elem.second) { 2211 assert(Elem.second->getType()->getPointerElementType() == Ty && 2212 "OMP internal variable has different type than requested"); 2213 return &*Elem.second; 2214 } 2215 2216 return Elem.second = new llvm::GlobalVariable( 2217 CGM.getModule(), Ty, /*IsConstant*/ false, 2218 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2219 Elem.first(), /*InsertBefore=*/nullptr, 2220 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2221 } 2222 2223 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2224 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2225 std::string Name = getName({Prefix, "var"}); 2226 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2227 } 2228 2229 namespace { 2230 /// Common pre(post)-action for different OpenMP constructs. 2231 class CommonActionTy final : public PrePostActionTy { 2232 llvm::FunctionCallee EnterCallee; 2233 ArrayRef<llvm::Value *> EnterArgs; 2234 llvm::FunctionCallee ExitCallee; 2235 ArrayRef<llvm::Value *> ExitArgs; 2236 bool Conditional; 2237 llvm::BasicBlock *ContBlock = nullptr; 2238 2239 public: 2240 CommonActionTy(llvm::FunctionCallee EnterCallee, 2241 ArrayRef<llvm::Value *> EnterArgs, 2242 llvm::FunctionCallee ExitCallee, 2243 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2244 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2245 ExitArgs(ExitArgs), Conditional(Conditional) {} 2246 void Enter(CodeGenFunction &CGF) override { 2247 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2248 if (Conditional) { 2249 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2250 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2251 ContBlock = CGF.createBasicBlock("omp_if.end"); 2252 // Generate the branch (If-stmt) 2253 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2254 CGF.EmitBlock(ThenBlock); 2255 } 2256 } 2257 void Done(CodeGenFunction &CGF) { 2258 // Emit the rest of blocks/branches 2259 CGF.EmitBranch(ContBlock); 2260 CGF.EmitBlock(ContBlock, true); 2261 } 2262 void Exit(CodeGenFunction &CGF) override { 2263 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2264 } 2265 }; 2266 } // anonymous namespace 2267 2268 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2269 StringRef CriticalName, 2270 const RegionCodeGenTy &CriticalOpGen, 2271 SourceLocation Loc, const Expr *Hint) { 2272 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2273 // CriticalOpGen(); 2274 // __kmpc_end_critical(ident_t *, gtid, Lock); 2275 // Prepare arguments and build a call to __kmpc_critical 2276 if (!CGF.HaveInsertPoint()) 2277 return; 2278 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2279 getCriticalRegionLock(CriticalName)}; 2280 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2281 std::end(Args)); 2282 if (Hint) { 2283 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2284 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2285 } 2286 CommonActionTy Action( 2287 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2288 CGM.getModule(), 2289 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2290 EnterArgs, 2291 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_critical), 2293 Args); 2294 CriticalOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2296 } 2297 2298 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2299 const RegionCodeGenTy &MasterOpGen, 2300 SourceLocation Loc) { 2301 if (!CGF.HaveInsertPoint()) 2302 return; 2303 // if(__kmpc_master(ident_t *, gtid)) { 2304 // MasterOpGen(); 2305 // __kmpc_end_master(ident_t *, gtid); 2306 // } 2307 // Prepare arguments and build a call to __kmpc_master 2308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2309 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2310 CGM.getModule(), OMPRTL___kmpc_master), 2311 Args, 2312 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2313 CGM.getModule(), OMPRTL___kmpc_end_master), 2314 Args, 2315 /*Conditional=*/true); 2316 MasterOpGen.setAction(Action); 2317 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2318 Action.Done(CGF); 2319 } 2320 2321 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2322 SourceLocation Loc) { 2323 if (!CGF.HaveInsertPoint()) 2324 return; 2325 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2326 if (OMPBuilder) { 2327 OMPBuilder->CreateTaskyield(CGF.Builder); 2328 } else { 2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2330 llvm::Value *Args[] = { 2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2333 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2335 Args); 2336 } 2337 2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2339 Region->emitUntiedSwitch(CGF); 2340 } 2341 2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &TaskgroupOpGen, 2344 SourceLocation Loc) { 2345 if (!CGF.HaveInsertPoint()) 2346 return; 2347 // __kmpc_taskgroup(ident_t *, gtid); 2348 // TaskgroupOpGen(); 2349 // __kmpc_end_taskgroup(ident_t *, gtid); 2350 // Prepare arguments and build a call to __kmpc_taskgroup 2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2352 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2353 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2354 Args, 2355 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2357 Args); 2358 TaskgroupOpGen.setAction(Action); 2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2360 } 2361 2362 /// Given an array of pointers to variables, project the address of a 2363 /// given variable. 2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2365 unsigned Index, const VarDecl *Var) { 2366 // Pull out the pointer to the variable. 2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2369 2370 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2371 Addr = CGF.Builder.CreateElementBitCast( 2372 Addr, CGF.ConvertTypeForMem(Var->getType())); 2373 return Addr; 2374 } 2375 2376 static llvm::Value *emitCopyprivateCopyFunction( 2377 CodeGenModule &CGM, llvm::Type *ArgsType, 2378 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2379 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2380 SourceLocation Loc) { 2381 ASTContext &C = CGM.getContext(); 2382 // void copy_func(void *LHSArg, void *RHSArg); 2383 FunctionArgList Args; 2384 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2387 ImplicitParamDecl::Other); 2388 Args.push_back(&LHSArg); 2389 Args.push_back(&RHSArg); 2390 const auto &CGFI = 2391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2392 std::string Name = 2393 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2395 llvm::GlobalValue::InternalLinkage, Name, 2396 &CGM.getModule()); 2397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2398 Fn->setDoesNotRecurse(); 2399 CodeGenFunction CGF(CGM); 2400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2401 // Dest = (void*[n])(LHSArg); 2402 // Src = (void*[n])(RHSArg); 2403 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2405 ArgsType), CGF.getPointerAlign()); 2406 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2410 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2411 // ... 2412 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2414 const auto *DestVar = 2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2417 2418 const auto *SrcVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2421 2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2423 QualType Type = VD->getType(); 2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2425 } 2426 CGF.FinishFunction(); 2427 return Fn; 2428 } 2429 2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2431 const RegionCodeGenTy &SingleOpGen, 2432 SourceLocation Loc, 2433 ArrayRef<const Expr *> CopyprivateVars, 2434 ArrayRef<const Expr *> SrcExprs, 2435 ArrayRef<const Expr *> DstExprs, 2436 ArrayRef<const Expr *> AssignmentOps) { 2437 if (!CGF.HaveInsertPoint()) 2438 return; 2439 assert(CopyprivateVars.size() == SrcExprs.size() && 2440 CopyprivateVars.size() == DstExprs.size() && 2441 CopyprivateVars.size() == AssignmentOps.size()); 2442 ASTContext &C = CGM.getContext(); 2443 // int32 did_it = 0; 2444 // if(__kmpc_single(ident_t *, gtid)) { 2445 // SingleOpGen(); 2446 // __kmpc_end_single(ident_t *, gtid); 2447 // did_it = 1; 2448 // } 2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2450 // <copy_func>, did_it); 2451 2452 Address DidIt = Address::invalid(); 2453 if (!CopyprivateVars.empty()) { 2454 // int32 did_it = 0; 2455 QualType KmpInt32Ty = 2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2459 } 2460 // Prepare arguments and build a call to __kmpc_single 2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2462 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2463 CGM.getModule(), OMPRTL___kmpc_single), 2464 Args, 2465 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_end_single), 2467 Args, 2468 /*Conditional=*/true); 2469 SingleOpGen.setAction(Action); 2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2471 if (DidIt.isValid()) { 2472 // did_it = 1; 2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2474 } 2475 Action.Done(CGF); 2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2477 // <copy_func>, did_it); 2478 if (DidIt.isValid()) { 2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2480 QualType CopyprivateArrayTy = C.getConstantArrayType( 2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2482 /*IndexTypeQuals=*/0); 2483 // Create a list of all private variables for copyprivate. 2484 Address CopyprivateList = 2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2488 CGF.Builder.CreateStore( 2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2491 CGF.VoidPtrTy), 2492 Elem); 2493 } 2494 // Build function that copies private values from single region to all other 2495 // threads in the corresponding parallel region. 2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2500 Address CL = 2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2502 CGF.VoidPtrTy); 2503 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2504 llvm::Value *Args[] = { 2505 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2506 getThreadID(CGF, Loc), // i32 <gtid> 2507 BufSize, // size_t <buf_size> 2508 CL.getPointer(), // void *<copyprivate list> 2509 CpyFn, // void (*) (void *, void *) <copy_func> 2510 DidItVal // i32 did_it 2511 }; 2512 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2513 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2514 Args); 2515 } 2516 } 2517 2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2519 const RegionCodeGenTy &OrderedOpGen, 2520 SourceLocation Loc, bool IsThreads) { 2521 if (!CGF.HaveInsertPoint()) 2522 return; 2523 // __kmpc_ordered(ident_t *, gtid); 2524 // OrderedOpGen(); 2525 // __kmpc_end_ordered(ident_t *, gtid); 2526 // Prepare arguments and build a call to __kmpc_ordered 2527 if (IsThreads) { 2528 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2529 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2530 CGM.getModule(), OMPRTL___kmpc_ordered), 2531 Args, 2532 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2534 Args); 2535 OrderedOpGen.setAction(Action); 2536 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2537 return; 2538 } 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 } 2541 2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2543 unsigned Flags; 2544 if (Kind == OMPD_for) 2545 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2546 else if (Kind == OMPD_sections) 2547 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2548 else if (Kind == OMPD_single) 2549 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2550 else if (Kind == OMPD_barrier) 2551 Flags = OMP_IDENT_BARRIER_EXPL; 2552 else 2553 Flags = OMP_IDENT_BARRIER_IMPL; 2554 return Flags; 2555 } 2556 2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2558 CodeGenFunction &CGF, const OMPLoopDirective &S, 2559 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2560 // Check if the loop directive is actually a doacross loop directive. In this 2561 // case choose static, 1 schedule. 2562 if (llvm::any_of( 2563 S.getClausesOfKind<OMPOrderedClause>(), 2564 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2565 ScheduleKind = OMPC_SCHEDULE_static; 2566 // Chunk size is 1 in this case. 2567 llvm::APInt ChunkSize(32, 1); 2568 ChunkExpr = IntegerLiteral::Create( 2569 CGF.getContext(), ChunkSize, 2570 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2571 SourceLocation()); 2572 } 2573 } 2574 2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2576 OpenMPDirectiveKind Kind, bool EmitChecks, 2577 bool ForceSimpleCall) { 2578 // Check if we should use the OMPBuilder 2579 auto *OMPRegionInfo = 2580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2581 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2582 if (OMPBuilder) { 2583 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 2584 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2585 return; 2586 } 2587 2588 if (!CGF.HaveInsertPoint()) 2589 return; 2590 // Build call __kmpc_cancel_barrier(loc, thread_id); 2591 // Build call __kmpc_barrier(loc, thread_id); 2592 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2593 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2594 // thread_id); 2595 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2596 getThreadID(CGF, Loc)}; 2597 if (OMPRegionInfo) { 2598 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2599 llvm::Value *Result = CGF.EmitRuntimeCall( 2600 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2601 CGM.getModule(), OMPRTL___kmpc_cancel_barrier), 2602 Args); 2603 if (EmitChecks) { 2604 // if (__kmpc_cancel_barrier()) { 2605 // exit from construct; 2606 // } 2607 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2608 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2609 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2610 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2611 CGF.EmitBlock(ExitBB); 2612 // exit from construct; 2613 CodeGenFunction::JumpDest CancelDestination = 2614 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2615 CGF.EmitBranchThroughCleanup(CancelDestination); 2616 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2617 } 2618 return; 2619 } 2620 } 2621 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2622 CGM.getModule(), OMPRTL___kmpc_barrier), 2623 Args); 2624 } 2625 2626 /// Map the OpenMP loop schedule to the runtime enumeration. 2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2628 bool Chunked, bool Ordered) { 2629 switch (ScheduleKind) { 2630 case OMPC_SCHEDULE_static: 2631 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2632 : (Ordered ? OMP_ord_static : OMP_sch_static); 2633 case OMPC_SCHEDULE_dynamic: 2634 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2635 case OMPC_SCHEDULE_guided: 2636 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2637 case OMPC_SCHEDULE_runtime: 2638 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2639 case OMPC_SCHEDULE_auto: 2640 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2641 case OMPC_SCHEDULE_unknown: 2642 assert(!Chunked && "chunk was specified but schedule kind not known"); 2643 return Ordered ? OMP_ord_static : OMP_sch_static; 2644 } 2645 llvm_unreachable("Unexpected runtime schedule"); 2646 } 2647 2648 /// Map the OpenMP distribute schedule to the runtime enumeration. 2649 static OpenMPSchedType 2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2651 // only static is allowed for dist_schedule 2652 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2653 } 2654 2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2656 bool Chunked) const { 2657 OpenMPSchedType Schedule = 2658 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2659 return Schedule == OMP_sch_static; 2660 } 2661 2662 bool CGOpenMPRuntime::isStaticNonchunked( 2663 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2664 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2665 return Schedule == OMP_dist_sch_static; 2666 } 2667 2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2669 bool Chunked) const { 2670 OpenMPSchedType Schedule = 2671 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2672 return Schedule == OMP_sch_static_chunked; 2673 } 2674 2675 bool CGOpenMPRuntime::isStaticChunked( 2676 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2677 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2678 return Schedule == OMP_dist_sch_static_chunked; 2679 } 2680 2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2682 OpenMPSchedType Schedule = 2683 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2684 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2685 return Schedule != OMP_sch_static; 2686 } 2687 2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2689 OpenMPScheduleClauseModifier M1, 2690 OpenMPScheduleClauseModifier M2) { 2691 int Modifier = 0; 2692 switch (M1) { 2693 case OMPC_SCHEDULE_MODIFIER_monotonic: 2694 Modifier = OMP_sch_modifier_monotonic; 2695 break; 2696 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2697 Modifier = OMP_sch_modifier_nonmonotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_simd: 2700 if (Schedule == OMP_sch_static_chunked) 2701 Schedule = OMP_sch_static_balanced_chunked; 2702 break; 2703 case OMPC_SCHEDULE_MODIFIER_last: 2704 case OMPC_SCHEDULE_MODIFIER_unknown: 2705 break; 2706 } 2707 switch (M2) { 2708 case OMPC_SCHEDULE_MODIFIER_monotonic: 2709 Modifier = OMP_sch_modifier_monotonic; 2710 break; 2711 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2712 Modifier = OMP_sch_modifier_nonmonotonic; 2713 break; 2714 case OMPC_SCHEDULE_MODIFIER_simd: 2715 if (Schedule == OMP_sch_static_chunked) 2716 Schedule = OMP_sch_static_balanced_chunked; 2717 break; 2718 case OMPC_SCHEDULE_MODIFIER_last: 2719 case OMPC_SCHEDULE_MODIFIER_unknown: 2720 break; 2721 } 2722 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2723 // If the static schedule kind is specified or if the ordered clause is 2724 // specified, and if the nonmonotonic modifier is not specified, the effect is 2725 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2726 // modifier is specified, the effect is as if the nonmonotonic modifier is 2727 // specified. 2728 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2729 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2730 Schedule == OMP_sch_static_balanced_chunked || 2731 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2732 Schedule == OMP_dist_sch_static_chunked || 2733 Schedule == OMP_dist_sch_static)) 2734 Modifier = OMP_sch_modifier_nonmonotonic; 2735 } 2736 return Schedule | Modifier; 2737 } 2738 2739 void CGOpenMPRuntime::emitForDispatchInit( 2740 CodeGenFunction &CGF, SourceLocation Loc, 2741 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2742 bool Ordered, const DispatchRTInput &DispatchValues) { 2743 if (!CGF.HaveInsertPoint()) 2744 return; 2745 OpenMPSchedType Schedule = getRuntimeSchedule( 2746 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2747 assert(Ordered || 2748 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2749 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2750 Schedule != OMP_sch_static_balanced_chunked)); 2751 // Call __kmpc_dispatch_init( 2752 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2753 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2754 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2755 2756 // If the Chunk was not specified in the clause - use default value 1. 2757 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2758 : CGF.Builder.getIntN(IVSize, 1); 2759 llvm::Value *Args[] = { 2760 emitUpdateLocation(CGF, Loc), 2761 getThreadID(CGF, Loc), 2762 CGF.Builder.getInt32(addMonoNonMonoModifier( 2763 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2764 DispatchValues.LB, // Lower 2765 DispatchValues.UB, // Upper 2766 CGF.Builder.getIntN(IVSize, 1), // Stride 2767 Chunk // Chunk 2768 }; 2769 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2770 } 2771 2772 static void emitForStaticInitCall( 2773 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2774 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2775 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2776 const CGOpenMPRuntime::StaticRTInput &Values) { 2777 if (!CGF.HaveInsertPoint()) 2778 return; 2779 2780 assert(!Values.Ordered); 2781 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2782 Schedule == OMP_sch_static_balanced_chunked || 2783 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2784 Schedule == OMP_dist_sch_static || 2785 Schedule == OMP_dist_sch_static_chunked); 2786 2787 // Call __kmpc_for_static_init( 2788 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2789 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2790 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2791 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2792 llvm::Value *Chunk = Values.Chunk; 2793 if (Chunk == nullptr) { 2794 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2795 Schedule == OMP_dist_sch_static) && 2796 "expected static non-chunked schedule"); 2797 // If the Chunk was not specified in the clause - use default value 1. 2798 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2799 } else { 2800 assert((Schedule == OMP_sch_static_chunked || 2801 Schedule == OMP_sch_static_balanced_chunked || 2802 Schedule == OMP_ord_static_chunked || 2803 Schedule == OMP_dist_sch_static_chunked) && 2804 "expected static chunked schedule"); 2805 } 2806 llvm::Value *Args[] = { 2807 UpdateLocation, 2808 ThreadId, 2809 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2810 M2)), // Schedule type 2811 Values.IL.getPointer(), // &isLastIter 2812 Values.LB.getPointer(), // &LB 2813 Values.UB.getPointer(), // &UB 2814 Values.ST.getPointer(), // &Stride 2815 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2816 Chunk // Chunk 2817 }; 2818 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2819 } 2820 2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2822 SourceLocation Loc, 2823 OpenMPDirectiveKind DKind, 2824 const OpenMPScheduleTy &ScheduleKind, 2825 const StaticRTInput &Values) { 2826 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2827 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2828 assert(isOpenMPWorksharingDirective(DKind) && 2829 "Expected loop-based or sections-based directive."); 2830 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2831 isOpenMPLoopDirective(DKind) 2832 ? OMP_IDENT_WORK_LOOP 2833 : OMP_IDENT_WORK_SECTIONS); 2834 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2835 llvm::FunctionCallee StaticInitFunction = 2836 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2837 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2838 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2839 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2840 } 2841 2842 void CGOpenMPRuntime::emitDistributeStaticInit( 2843 CodeGenFunction &CGF, SourceLocation Loc, 2844 OpenMPDistScheduleClauseKind SchedKind, 2845 const CGOpenMPRuntime::StaticRTInput &Values) { 2846 OpenMPSchedType ScheduleNum = 2847 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2848 llvm::Value *UpdatedLocation = 2849 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2850 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2851 llvm::FunctionCallee StaticInitFunction = 2852 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2853 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2854 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2855 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2856 } 2857 2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2859 SourceLocation Loc, 2860 OpenMPDirectiveKind DKind) { 2861 if (!CGF.HaveInsertPoint()) 2862 return; 2863 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2864 llvm::Value *Args[] = { 2865 emitUpdateLocation(CGF, Loc, 2866 isOpenMPDistributeDirective(DKind) 2867 ? OMP_IDENT_WORK_DISTRIBUTE 2868 : isOpenMPLoopDirective(DKind) 2869 ? OMP_IDENT_WORK_LOOP 2870 : OMP_IDENT_WORK_SECTIONS), 2871 getThreadID(CGF, Loc)}; 2872 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2873 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2874 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2875 Args); 2876 } 2877 2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2879 SourceLocation Loc, 2880 unsigned IVSize, 2881 bool IVSigned) { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2885 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2886 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2887 } 2888 2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2890 SourceLocation Loc, unsigned IVSize, 2891 bool IVSigned, Address IL, 2892 Address LB, Address UB, 2893 Address ST) { 2894 // Call __kmpc_dispatch_next( 2895 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2896 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2897 // kmp_int[32|64] *p_stride); 2898 llvm::Value *Args[] = { 2899 emitUpdateLocation(CGF, Loc), 2900 getThreadID(CGF, Loc), 2901 IL.getPointer(), // &isLastIter 2902 LB.getPointer(), // &Lower 2903 UB.getPointer(), // &Upper 2904 ST.getPointer() // &Stride 2905 }; 2906 llvm::Value *Call = 2907 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2908 return CGF.EmitScalarConversion( 2909 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2910 CGF.getContext().BoolTy, Loc); 2911 } 2912 2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2914 llvm::Value *NumThreads, 2915 SourceLocation Loc) { 2916 if (!CGF.HaveInsertPoint()) 2917 return; 2918 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2919 llvm::Value *Args[] = { 2920 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2921 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2922 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2923 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2924 Args); 2925 } 2926 2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2928 ProcBindKind ProcBind, 2929 SourceLocation Loc) { 2930 if (!CGF.HaveInsertPoint()) 2931 return; 2932 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2933 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2934 llvm::Value *Args[] = { 2935 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2936 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2937 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2939 Args); 2940 } 2941 2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2943 SourceLocation Loc, llvm::AtomicOrdering AO) { 2944 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2945 if (OMPBuilder) { 2946 OMPBuilder->CreateFlush(CGF.Builder); 2947 } else { 2948 if (!CGF.HaveInsertPoint()) 2949 return; 2950 // Build call void __kmpc_flush(ident_t *loc) 2951 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2952 CGM.getModule(), OMPRTL___kmpc_flush), 2953 emitUpdateLocation(CGF, Loc)); 2954 } 2955 } 2956 2957 namespace { 2958 /// Indexes of fields for type kmp_task_t. 2959 enum KmpTaskTFields { 2960 /// List of shared variables. 2961 KmpTaskTShareds, 2962 /// Task routine. 2963 KmpTaskTRoutine, 2964 /// Partition id for the untied tasks. 2965 KmpTaskTPartId, 2966 /// Function with call of destructors for private variables. 2967 Data1, 2968 /// Task priority. 2969 Data2, 2970 /// (Taskloops only) Lower bound. 2971 KmpTaskTLowerBound, 2972 /// (Taskloops only) Upper bound. 2973 KmpTaskTUpperBound, 2974 /// (Taskloops only) Stride. 2975 KmpTaskTStride, 2976 /// (Taskloops only) Is last iteration flag. 2977 KmpTaskTLastIter, 2978 /// (Taskloops only) Reduction data. 2979 KmpTaskTReductions, 2980 }; 2981 } // anonymous namespace 2982 2983 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2984 return OffloadEntriesTargetRegion.empty() && 2985 OffloadEntriesDeviceGlobalVar.empty(); 2986 } 2987 2988 /// Initialize target region entry. 2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2990 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2991 StringRef ParentName, unsigned LineNum, 2992 unsigned Order) { 2993 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2994 "only required for the device " 2995 "code generation."); 2996 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2997 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2998 OMPTargetRegionEntryTargetRegion); 2999 ++OffloadingEntriesNum; 3000 } 3001 3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3003 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3004 StringRef ParentName, unsigned LineNum, 3005 llvm::Constant *Addr, llvm::Constant *ID, 3006 OMPTargetRegionEntryKind Flags) { 3007 // If we are emitting code for a target, the entry is already initialized, 3008 // only has to be registered. 3009 if (CGM.getLangOpts().OpenMPIsDevice) { 3010 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3011 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3012 DiagnosticsEngine::Error, 3013 "Unable to find target region on line '%0' in the device code."); 3014 CGM.getDiags().Report(DiagID) << LineNum; 3015 return; 3016 } 3017 auto &Entry = 3018 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3019 assert(Entry.isValid() && "Entry not initialized!"); 3020 Entry.setAddress(Addr); 3021 Entry.setID(ID); 3022 Entry.setFlags(Flags); 3023 } else { 3024 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3025 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3026 ++OffloadingEntriesNum; 3027 } 3028 } 3029 3030 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3031 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3032 unsigned LineNum) const { 3033 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3034 if (PerDevice == OffloadEntriesTargetRegion.end()) 3035 return false; 3036 auto PerFile = PerDevice->second.find(FileID); 3037 if (PerFile == PerDevice->second.end()) 3038 return false; 3039 auto PerParentName = PerFile->second.find(ParentName); 3040 if (PerParentName == PerFile->second.end()) 3041 return false; 3042 auto PerLine = PerParentName->second.find(LineNum); 3043 if (PerLine == PerParentName->second.end()) 3044 return false; 3045 // Fail if this entry is already registered. 3046 if (PerLine->second.getAddress() || PerLine->second.getID()) 3047 return false; 3048 return true; 3049 } 3050 3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3052 const OffloadTargetRegionEntryInfoActTy &Action) { 3053 // Scan all target region entries and perform the provided action. 3054 for (const auto &D : OffloadEntriesTargetRegion) 3055 for (const auto &F : D.second) 3056 for (const auto &P : F.second) 3057 for (const auto &L : P.second) 3058 Action(D.first, F.first, P.first(), L.first, L.second); 3059 } 3060 3061 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3062 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3063 OMPTargetGlobalVarEntryKind Flags, 3064 unsigned Order) { 3065 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3066 "only required for the device " 3067 "code generation."); 3068 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3069 ++OffloadingEntriesNum; 3070 } 3071 3072 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3073 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3074 CharUnits VarSize, 3075 OMPTargetGlobalVarEntryKind Flags, 3076 llvm::GlobalValue::LinkageTypes Linkage) { 3077 if (CGM.getLangOpts().OpenMPIsDevice) { 3078 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3079 assert(Entry.isValid() && Entry.getFlags() == Flags && 3080 "Entry not initialized!"); 3081 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3082 "Resetting with the new address."); 3083 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3084 if (Entry.getVarSize().isZero()) { 3085 Entry.setVarSize(VarSize); 3086 Entry.setLinkage(Linkage); 3087 } 3088 return; 3089 } 3090 Entry.setVarSize(VarSize); 3091 Entry.setLinkage(Linkage); 3092 Entry.setAddress(Addr); 3093 } else { 3094 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3095 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3096 assert(Entry.isValid() && Entry.getFlags() == Flags && 3097 "Entry not initialized!"); 3098 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3099 "Resetting with the new address."); 3100 if (Entry.getVarSize().isZero()) { 3101 Entry.setVarSize(VarSize); 3102 Entry.setLinkage(Linkage); 3103 } 3104 return; 3105 } 3106 OffloadEntriesDeviceGlobalVar.try_emplace( 3107 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3108 ++OffloadingEntriesNum; 3109 } 3110 } 3111 3112 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3113 actOnDeviceGlobalVarEntriesInfo( 3114 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3115 // Scan all target region entries and perform the provided action. 3116 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3117 Action(E.getKey(), E.getValue()); 3118 } 3119 3120 void CGOpenMPRuntime::createOffloadEntry( 3121 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3122 llvm::GlobalValue::LinkageTypes Linkage) { 3123 StringRef Name = Addr->getName(); 3124 llvm::Module &M = CGM.getModule(); 3125 llvm::LLVMContext &C = M.getContext(); 3126 3127 // Create constant string with the name. 3128 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3129 3130 std::string StringName = getName({"omp_offloading", "entry_name"}); 3131 auto *Str = new llvm::GlobalVariable( 3132 M, StrPtrInit->getType(), /*isConstant=*/true, 3133 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3134 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3135 3136 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3137 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3138 llvm::ConstantInt::get(CGM.SizeTy, Size), 3139 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3140 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3141 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3142 llvm::GlobalVariable *Entry = createGlobalStruct( 3143 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3144 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3145 3146 // The entry has to be created in the section the linker expects it to be. 3147 Entry->setSection("omp_offloading_entries"); 3148 } 3149 3150 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3151 // Emit the offloading entries and metadata so that the device codegen side 3152 // can easily figure out what to emit. The produced metadata looks like 3153 // this: 3154 // 3155 // !omp_offload.info = !{!1, ...} 3156 // 3157 // Right now we only generate metadata for function that contain target 3158 // regions. 3159 3160 // If we are in simd mode or there are no entries, we don't need to do 3161 // anything. 3162 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3163 return; 3164 3165 llvm::Module &M = CGM.getModule(); 3166 llvm::LLVMContext &C = M.getContext(); 3167 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3168 SourceLocation, StringRef>, 3169 16> 3170 OrderedEntries(OffloadEntriesInfoManager.size()); 3171 llvm::SmallVector<StringRef, 16> ParentFunctions( 3172 OffloadEntriesInfoManager.size()); 3173 3174 // Auxiliary methods to create metadata values and strings. 3175 auto &&GetMDInt = [this](unsigned V) { 3176 return llvm::ConstantAsMetadata::get( 3177 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3178 }; 3179 3180 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3181 3182 // Create the offloading info metadata node. 3183 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3184 3185 // Create function that emits metadata for each target region entry; 3186 auto &&TargetRegionMetadataEmitter = 3187 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3188 &GetMDString]( 3189 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3190 unsigned Line, 3191 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3192 // Generate metadata for target regions. Each entry of this metadata 3193 // contains: 3194 // - Entry 0 -> Kind of this type of metadata (0). 3195 // - Entry 1 -> Device ID of the file where the entry was identified. 3196 // - Entry 2 -> File ID of the file where the entry was identified. 3197 // - Entry 3 -> Mangled name of the function where the entry was 3198 // identified. 3199 // - Entry 4 -> Line in the file where the entry was identified. 3200 // - Entry 5 -> Order the entry was created. 3201 // The first element of the metadata node is the kind. 3202 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3203 GetMDInt(FileID), GetMDString(ParentName), 3204 GetMDInt(Line), GetMDInt(E.getOrder())}; 3205 3206 SourceLocation Loc; 3207 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3208 E = CGM.getContext().getSourceManager().fileinfo_end(); 3209 I != E; ++I) { 3210 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3211 I->getFirst()->getUniqueID().getFile() == FileID) { 3212 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3213 I->getFirst(), Line, 1); 3214 break; 3215 } 3216 } 3217 // Save this entry in the right position of the ordered entries array. 3218 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3219 ParentFunctions[E.getOrder()] = ParentName; 3220 3221 // Add metadata to the named metadata node. 3222 MD->addOperand(llvm::MDNode::get(C, Ops)); 3223 }; 3224 3225 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3226 TargetRegionMetadataEmitter); 3227 3228 // Create function that emits metadata for each device global variable entry; 3229 auto &&DeviceGlobalVarMetadataEmitter = 3230 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3231 MD](StringRef MangledName, 3232 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3233 &E) { 3234 // Generate metadata for global variables. Each entry of this metadata 3235 // contains: 3236 // - Entry 0 -> Kind of this type of metadata (1). 3237 // - Entry 1 -> Mangled name of the variable. 3238 // - Entry 2 -> Declare target kind. 3239 // - Entry 3 -> Order the entry was created. 3240 // The first element of the metadata node is the kind. 3241 llvm::Metadata *Ops[] = { 3242 GetMDInt(E.getKind()), GetMDString(MangledName), 3243 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3244 3245 // Save this entry in the right position of the ordered entries array. 3246 OrderedEntries[E.getOrder()] = 3247 std::make_tuple(&E, SourceLocation(), MangledName); 3248 3249 // Add metadata to the named metadata node. 3250 MD->addOperand(llvm::MDNode::get(C, Ops)); 3251 }; 3252 3253 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3254 DeviceGlobalVarMetadataEmitter); 3255 3256 for (const auto &E : OrderedEntries) { 3257 assert(std::get<0>(E) && "All ordered entries must exist!"); 3258 if (const auto *CE = 3259 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3260 std::get<0>(E))) { 3261 if (!CE->getID() || !CE->getAddress()) { 3262 // Do not blame the entry if the parent funtion is not emitted. 3263 StringRef FnName = ParentFunctions[CE->getOrder()]; 3264 if (!CGM.GetGlobalValue(FnName)) 3265 continue; 3266 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3267 DiagnosticsEngine::Error, 3268 "Offloading entry for target region in %0 is incorrect: either the " 3269 "address or the ID is invalid."); 3270 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3271 continue; 3272 } 3273 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3274 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3275 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3276 OffloadEntryInfoDeviceGlobalVar>( 3277 std::get<0>(E))) { 3278 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3279 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3280 CE->getFlags()); 3281 switch (Flags) { 3282 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3283 if (CGM.getLangOpts().OpenMPIsDevice && 3284 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3285 continue; 3286 if (!CE->getAddress()) { 3287 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3288 DiagnosticsEngine::Error, "Offloading entry for declare target " 3289 "variable %0 is incorrect: the " 3290 "address is invalid."); 3291 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3292 continue; 3293 } 3294 // The vaiable has no definition - no need to add the entry. 3295 if (CE->getVarSize().isZero()) 3296 continue; 3297 break; 3298 } 3299 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3300 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3301 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3302 "Declaret target link address is set."); 3303 if (CGM.getLangOpts().OpenMPIsDevice) 3304 continue; 3305 if (!CE->getAddress()) { 3306 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3307 DiagnosticsEngine::Error, 3308 "Offloading entry for declare target variable is incorrect: the " 3309 "address is invalid."); 3310 CGM.getDiags().Report(DiagID); 3311 continue; 3312 } 3313 break; 3314 } 3315 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3316 CE->getVarSize().getQuantity(), Flags, 3317 CE->getLinkage()); 3318 } else { 3319 llvm_unreachable("Unsupported entry kind."); 3320 } 3321 } 3322 } 3323 3324 /// Loads all the offload entries information from the host IR 3325 /// metadata. 3326 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3327 // If we are in target mode, load the metadata from the host IR. This code has 3328 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3329 3330 if (!CGM.getLangOpts().OpenMPIsDevice) 3331 return; 3332 3333 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3334 return; 3335 3336 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3337 if (auto EC = Buf.getError()) { 3338 CGM.getDiags().Report(diag::err_cannot_open_file) 3339 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3340 return; 3341 } 3342 3343 llvm::LLVMContext C; 3344 auto ME = expectedToErrorOrAndEmitErrors( 3345 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3346 3347 if (auto EC = ME.getError()) { 3348 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3349 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3350 CGM.getDiags().Report(DiagID) 3351 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3352 return; 3353 } 3354 3355 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3356 if (!MD) 3357 return; 3358 3359 for (llvm::MDNode *MN : MD->operands()) { 3360 auto &&GetMDInt = [MN](unsigned Idx) { 3361 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3362 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3363 }; 3364 3365 auto &&GetMDString = [MN](unsigned Idx) { 3366 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3367 return V->getString(); 3368 }; 3369 3370 switch (GetMDInt(0)) { 3371 default: 3372 llvm_unreachable("Unexpected metadata!"); 3373 break; 3374 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3375 OffloadingEntryInfoTargetRegion: 3376 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3377 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3378 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3379 /*Order=*/GetMDInt(5)); 3380 break; 3381 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3382 OffloadingEntryInfoDeviceGlobalVar: 3383 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3384 /*MangledName=*/GetMDString(1), 3385 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3386 /*Flags=*/GetMDInt(2)), 3387 /*Order=*/GetMDInt(3)); 3388 break; 3389 } 3390 } 3391 } 3392 3393 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3394 if (!KmpRoutineEntryPtrTy) { 3395 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3396 ASTContext &C = CGM.getContext(); 3397 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3398 FunctionProtoType::ExtProtoInfo EPI; 3399 KmpRoutineEntryPtrQTy = C.getPointerType( 3400 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3401 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3402 } 3403 } 3404 3405 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3406 // Make sure the type of the entry is already created. This is the type we 3407 // have to create: 3408 // struct __tgt_offload_entry{ 3409 // void *addr; // Pointer to the offload entry info. 3410 // // (function or global) 3411 // char *name; // Name of the function or global. 3412 // size_t size; // Size of the entry info (0 if it a function). 3413 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3414 // int32_t reserved; // Reserved, to use by the runtime library. 3415 // }; 3416 if (TgtOffloadEntryQTy.isNull()) { 3417 ASTContext &C = CGM.getContext(); 3418 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3419 RD->startDefinition(); 3420 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3421 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3422 addFieldToRecordDecl(C, RD, C.getSizeType()); 3423 addFieldToRecordDecl( 3424 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3425 addFieldToRecordDecl( 3426 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3427 RD->completeDefinition(); 3428 RD->addAttr(PackedAttr::CreateImplicit(C)); 3429 TgtOffloadEntryQTy = C.getRecordType(RD); 3430 } 3431 return TgtOffloadEntryQTy; 3432 } 3433 3434 namespace { 3435 struct PrivateHelpersTy { 3436 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3437 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3438 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3439 PrivateElemInit(PrivateElemInit) {} 3440 const Expr *OriginalRef = nullptr; 3441 const VarDecl *Original = nullptr; 3442 const VarDecl *PrivateCopy = nullptr; 3443 const VarDecl *PrivateElemInit = nullptr; 3444 }; 3445 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3446 } // anonymous namespace 3447 3448 static RecordDecl * 3449 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3450 if (!Privates.empty()) { 3451 ASTContext &C = CGM.getContext(); 3452 // Build struct .kmp_privates_t. { 3453 // /* private vars */ 3454 // }; 3455 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3456 RD->startDefinition(); 3457 for (const auto &Pair : Privates) { 3458 const VarDecl *VD = Pair.second.Original; 3459 QualType Type = VD->getType().getNonReferenceType(); 3460 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3461 if (VD->hasAttrs()) { 3462 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3463 E(VD->getAttrs().end()); 3464 I != E; ++I) 3465 FD->addAttr(*I); 3466 } 3467 } 3468 RD->completeDefinition(); 3469 return RD; 3470 } 3471 return nullptr; 3472 } 3473 3474 static RecordDecl * 3475 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3476 QualType KmpInt32Ty, 3477 QualType KmpRoutineEntryPointerQTy) { 3478 ASTContext &C = CGM.getContext(); 3479 // Build struct kmp_task_t { 3480 // void * shareds; 3481 // kmp_routine_entry_t routine; 3482 // kmp_int32 part_id; 3483 // kmp_cmplrdata_t data1; 3484 // kmp_cmplrdata_t data2; 3485 // For taskloops additional fields: 3486 // kmp_uint64 lb; 3487 // kmp_uint64 ub; 3488 // kmp_int64 st; 3489 // kmp_int32 liter; 3490 // void * reductions; 3491 // }; 3492 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3493 UD->startDefinition(); 3494 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3495 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3496 UD->completeDefinition(); 3497 QualType KmpCmplrdataTy = C.getRecordType(UD); 3498 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3499 RD->startDefinition(); 3500 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3501 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3502 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3503 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3504 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3505 if (isOpenMPTaskLoopDirective(Kind)) { 3506 QualType KmpUInt64Ty = 3507 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3508 QualType KmpInt64Ty = 3509 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3510 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3511 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3512 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3513 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3514 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3515 } 3516 RD->completeDefinition(); 3517 return RD; 3518 } 3519 3520 static RecordDecl * 3521 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3522 ArrayRef<PrivateDataTy> Privates) { 3523 ASTContext &C = CGM.getContext(); 3524 // Build struct kmp_task_t_with_privates { 3525 // kmp_task_t task_data; 3526 // .kmp_privates_t. privates; 3527 // }; 3528 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3529 RD->startDefinition(); 3530 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3531 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3532 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3533 RD->completeDefinition(); 3534 return RD; 3535 } 3536 3537 /// Emit a proxy function which accepts kmp_task_t as the second 3538 /// argument. 3539 /// \code 3540 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3541 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3542 /// For taskloops: 3543 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3544 /// tt->reductions, tt->shareds); 3545 /// return 0; 3546 /// } 3547 /// \endcode 3548 static llvm::Function * 3549 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3550 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3551 QualType KmpTaskTWithPrivatesPtrQTy, 3552 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3553 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3554 llvm::Value *TaskPrivatesMap) { 3555 ASTContext &C = CGM.getContext(); 3556 FunctionArgList Args; 3557 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3558 ImplicitParamDecl::Other); 3559 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3560 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3561 ImplicitParamDecl::Other); 3562 Args.push_back(&GtidArg); 3563 Args.push_back(&TaskTypeArg); 3564 const auto &TaskEntryFnInfo = 3565 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3566 llvm::FunctionType *TaskEntryTy = 3567 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3568 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3569 auto *TaskEntry = llvm::Function::Create( 3570 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3571 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3572 TaskEntry->setDoesNotRecurse(); 3573 CodeGenFunction CGF(CGM); 3574 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3575 Loc, Loc); 3576 3577 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3578 // tt, 3579 // For taskloops: 3580 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3581 // tt->task_data.shareds); 3582 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3583 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3584 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3585 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3586 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3587 const auto *KmpTaskTWithPrivatesQTyRD = 3588 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3589 LValue Base = 3590 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3591 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3592 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3593 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3594 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3595 3596 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3597 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3598 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3599 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3600 CGF.ConvertTypeForMem(SharedsPtrTy)); 3601 3602 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3603 llvm::Value *PrivatesParam; 3604 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3605 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3606 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3607 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3608 } else { 3609 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3610 } 3611 3612 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3613 TaskPrivatesMap, 3614 CGF.Builder 3615 .CreatePointerBitCastOrAddrSpaceCast( 3616 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3617 .getPointer()}; 3618 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3619 std::end(CommonArgs)); 3620 if (isOpenMPTaskLoopDirective(Kind)) { 3621 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3622 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3623 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3624 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3625 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3626 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3627 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3628 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3629 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3630 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3631 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3632 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3633 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3634 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3635 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3636 CallArgs.push_back(LBParam); 3637 CallArgs.push_back(UBParam); 3638 CallArgs.push_back(StParam); 3639 CallArgs.push_back(LIParam); 3640 CallArgs.push_back(RParam); 3641 } 3642 CallArgs.push_back(SharedsParam); 3643 3644 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3645 CallArgs); 3646 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3647 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3648 CGF.FinishFunction(); 3649 return TaskEntry; 3650 } 3651 3652 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3653 SourceLocation Loc, 3654 QualType KmpInt32Ty, 3655 QualType KmpTaskTWithPrivatesPtrQTy, 3656 QualType KmpTaskTWithPrivatesQTy) { 3657 ASTContext &C = CGM.getContext(); 3658 FunctionArgList Args; 3659 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3660 ImplicitParamDecl::Other); 3661 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3662 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3663 ImplicitParamDecl::Other); 3664 Args.push_back(&GtidArg); 3665 Args.push_back(&TaskTypeArg); 3666 const auto &DestructorFnInfo = 3667 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3668 llvm::FunctionType *DestructorFnTy = 3669 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3670 std::string Name = 3671 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3672 auto *DestructorFn = 3673 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3674 Name, &CGM.getModule()); 3675 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3676 DestructorFnInfo); 3677 DestructorFn->setDoesNotRecurse(); 3678 CodeGenFunction CGF(CGM); 3679 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3680 Args, Loc, Loc); 3681 3682 LValue Base = CGF.EmitLoadOfPointerLValue( 3683 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3684 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3685 const auto *KmpTaskTWithPrivatesQTyRD = 3686 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3687 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3688 Base = CGF.EmitLValueForField(Base, *FI); 3689 for (const auto *Field : 3690 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3691 if (QualType::DestructionKind DtorKind = 3692 Field->getType().isDestructedType()) { 3693 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3694 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3695 } 3696 } 3697 CGF.FinishFunction(); 3698 return DestructorFn; 3699 } 3700 3701 /// Emit a privates mapping function for correct handling of private and 3702 /// firstprivate variables. 3703 /// \code 3704 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3705 /// **noalias priv1,..., <tyn> **noalias privn) { 3706 /// *priv1 = &.privates.priv1; 3707 /// ...; 3708 /// *privn = &.privates.privn; 3709 /// } 3710 /// \endcode 3711 static llvm::Value * 3712 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3713 ArrayRef<const Expr *> PrivateVars, 3714 ArrayRef<const Expr *> FirstprivateVars, 3715 ArrayRef<const Expr *> LastprivateVars, 3716 QualType PrivatesQTy, 3717 ArrayRef<PrivateDataTy> Privates) { 3718 ASTContext &C = CGM.getContext(); 3719 FunctionArgList Args; 3720 ImplicitParamDecl TaskPrivatesArg( 3721 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3722 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3723 ImplicitParamDecl::Other); 3724 Args.push_back(&TaskPrivatesArg); 3725 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3726 unsigned Counter = 1; 3727 for (const Expr *E : PrivateVars) { 3728 Args.push_back(ImplicitParamDecl::Create( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(C.getPointerType(E->getType())) 3731 .withConst() 3732 .withRestrict(), 3733 ImplicitParamDecl::Other)); 3734 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3735 PrivateVarsPos[VD] = Counter; 3736 ++Counter; 3737 } 3738 for (const Expr *E : FirstprivateVars) { 3739 Args.push_back(ImplicitParamDecl::Create( 3740 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3741 C.getPointerType(C.getPointerType(E->getType())) 3742 .withConst() 3743 .withRestrict(), 3744 ImplicitParamDecl::Other)); 3745 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3746 PrivateVarsPos[VD] = Counter; 3747 ++Counter; 3748 } 3749 for (const Expr *E : LastprivateVars) { 3750 Args.push_back(ImplicitParamDecl::Create( 3751 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3752 C.getPointerType(C.getPointerType(E->getType())) 3753 .withConst() 3754 .withRestrict(), 3755 ImplicitParamDecl::Other)); 3756 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3757 PrivateVarsPos[VD] = Counter; 3758 ++Counter; 3759 } 3760 const auto &TaskPrivatesMapFnInfo = 3761 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3762 llvm::FunctionType *TaskPrivatesMapTy = 3763 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3764 std::string Name = 3765 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3766 auto *TaskPrivatesMap = llvm::Function::Create( 3767 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3768 &CGM.getModule()); 3769 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3770 TaskPrivatesMapFnInfo); 3771 if (CGM.getLangOpts().Optimize) { 3772 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3773 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3774 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3775 } 3776 CodeGenFunction CGF(CGM); 3777 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3778 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3779 3780 // *privi = &.privates.privi; 3781 LValue Base = CGF.EmitLoadOfPointerLValue( 3782 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3783 TaskPrivatesArg.getType()->castAs<PointerType>()); 3784 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3785 Counter = 0; 3786 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3787 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3788 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3789 LValue RefLVal = 3790 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3791 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3792 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3793 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3794 ++Counter; 3795 } 3796 CGF.FinishFunction(); 3797 return TaskPrivatesMap; 3798 } 3799 3800 /// Emit initialization for private variables in task-based directives. 3801 static void emitPrivatesInit(CodeGenFunction &CGF, 3802 const OMPExecutableDirective &D, 3803 Address KmpTaskSharedsPtr, LValue TDBase, 3804 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3805 QualType SharedsTy, QualType SharedsPtrTy, 3806 const OMPTaskDataTy &Data, 3807 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3808 ASTContext &C = CGF.getContext(); 3809 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3810 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3811 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3812 ? OMPD_taskloop 3813 : OMPD_task; 3814 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3815 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3816 LValue SrcBase; 3817 bool IsTargetTask = 3818 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3819 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3820 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3821 // PointersArray and SizesArray. The original variables for these arrays are 3822 // not captured and we get their addresses explicitly. 3823 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3824 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3825 SrcBase = CGF.MakeAddrLValue( 3826 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3827 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3828 SharedsTy); 3829 } 3830 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3831 for (const PrivateDataTy &Pair : Privates) { 3832 const VarDecl *VD = Pair.second.PrivateCopy; 3833 const Expr *Init = VD->getAnyInitializer(); 3834 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3835 !CGF.isTrivialInitializer(Init)))) { 3836 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3837 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3838 const VarDecl *OriginalVD = Pair.second.Original; 3839 // Check if the variable is the target-based BasePointersArray, 3840 // PointersArray or SizesArray. 3841 LValue SharedRefLValue; 3842 QualType Type = PrivateLValue.getType(); 3843 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3844 if (IsTargetTask && !SharedField) { 3845 assert(isa<ImplicitParamDecl>(OriginalVD) && 3846 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3847 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3848 ->getNumParams() == 0 && 3849 isa<TranslationUnitDecl>( 3850 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3851 ->getDeclContext()) && 3852 "Expected artificial target data variable."); 3853 SharedRefLValue = 3854 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3855 } else if (ForDup) { 3856 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3857 SharedRefLValue = CGF.MakeAddrLValue( 3858 Address(SharedRefLValue.getPointer(CGF), 3859 C.getDeclAlign(OriginalVD)), 3860 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3861 SharedRefLValue.getTBAAInfo()); 3862 } else if (CGF.LambdaCaptureFields.count( 3863 Pair.second.Original->getCanonicalDecl()) > 0 || 3864 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3865 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3866 } else { 3867 // Processing for implicitly captured variables. 3868 InlinedOpenMPRegionRAII Region( 3869 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3870 /*HasCancel=*/false); 3871 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3872 } 3873 if (Type->isArrayType()) { 3874 // Initialize firstprivate array. 3875 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3876 // Perform simple memcpy. 3877 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3878 } else { 3879 // Initialize firstprivate array using element-by-element 3880 // initialization. 3881 CGF.EmitOMPAggregateAssign( 3882 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3883 Type, 3884 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3885 Address SrcElement) { 3886 // Clean up any temporaries needed by the initialization. 3887 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3888 InitScope.addPrivate( 3889 Elem, [SrcElement]() -> Address { return SrcElement; }); 3890 (void)InitScope.Privatize(); 3891 // Emit initialization for single element. 3892 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3893 CGF, &CapturesInfo); 3894 CGF.EmitAnyExprToMem(Init, DestElement, 3895 Init->getType().getQualifiers(), 3896 /*IsInitializer=*/false); 3897 }); 3898 } 3899 } else { 3900 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3901 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3902 return SharedRefLValue.getAddress(CGF); 3903 }); 3904 (void)InitScope.Privatize(); 3905 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3906 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3907 /*capturedByInit=*/false); 3908 } 3909 } else { 3910 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3911 } 3912 } 3913 ++FI; 3914 } 3915 } 3916 3917 /// Check if duplication function is required for taskloops. 3918 static bool checkInitIsRequired(CodeGenFunction &CGF, 3919 ArrayRef<PrivateDataTy> Privates) { 3920 bool InitRequired = false; 3921 for (const PrivateDataTy &Pair : Privates) { 3922 const VarDecl *VD = Pair.second.PrivateCopy; 3923 const Expr *Init = VD->getAnyInitializer(); 3924 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3925 !CGF.isTrivialInitializer(Init)); 3926 if (InitRequired) 3927 break; 3928 } 3929 return InitRequired; 3930 } 3931 3932 3933 /// Emit task_dup function (for initialization of 3934 /// private/firstprivate/lastprivate vars and last_iter flag) 3935 /// \code 3936 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3937 /// lastpriv) { 3938 /// // setup lastprivate flag 3939 /// task_dst->last = lastpriv; 3940 /// // could be constructor calls here... 3941 /// } 3942 /// \endcode 3943 static llvm::Value * 3944 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3945 const OMPExecutableDirective &D, 3946 QualType KmpTaskTWithPrivatesPtrQTy, 3947 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3948 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3949 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3950 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3951 ASTContext &C = CGM.getContext(); 3952 FunctionArgList Args; 3953 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3954 KmpTaskTWithPrivatesPtrQTy, 3955 ImplicitParamDecl::Other); 3956 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3957 KmpTaskTWithPrivatesPtrQTy, 3958 ImplicitParamDecl::Other); 3959 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3960 ImplicitParamDecl::Other); 3961 Args.push_back(&DstArg); 3962 Args.push_back(&SrcArg); 3963 Args.push_back(&LastprivArg); 3964 const auto &TaskDupFnInfo = 3965 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3966 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3967 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3968 auto *TaskDup = llvm::Function::Create( 3969 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3970 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3971 TaskDup->setDoesNotRecurse(); 3972 CodeGenFunction CGF(CGM); 3973 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3974 Loc); 3975 3976 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3977 CGF.GetAddrOfLocalVar(&DstArg), 3978 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3979 // task_dst->liter = lastpriv; 3980 if (WithLastIter) { 3981 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3982 LValue Base = CGF.EmitLValueForField( 3983 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3984 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3985 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3986 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3987 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3988 } 3989 3990 // Emit initial values for private copies (if any). 3991 assert(!Privates.empty()); 3992 Address KmpTaskSharedsPtr = Address::invalid(); 3993 if (!Data.FirstprivateVars.empty()) { 3994 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3995 CGF.GetAddrOfLocalVar(&SrcArg), 3996 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3997 LValue Base = CGF.EmitLValueForField( 3998 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3999 KmpTaskSharedsPtr = Address( 4000 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4001 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4002 KmpTaskTShareds)), 4003 Loc), 4004 CGM.getNaturalTypeAlignment(SharedsTy)); 4005 } 4006 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4007 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4008 CGF.FinishFunction(); 4009 return TaskDup; 4010 } 4011 4012 /// Checks if destructor function is required to be generated. 4013 /// \return true if cleanups are required, false otherwise. 4014 static bool 4015 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4016 bool NeedsCleanup = false; 4017 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4018 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4019 for (const FieldDecl *FD : PrivateRD->fields()) { 4020 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4021 if (NeedsCleanup) 4022 break; 4023 } 4024 return NeedsCleanup; 4025 } 4026 4027 namespace { 4028 /// Loop generator for OpenMP iterator expression. 4029 class OMPIteratorGeneratorScope final 4030 : public CodeGenFunction::OMPPrivateScope { 4031 CodeGenFunction &CGF; 4032 const OMPIteratorExpr *E = nullptr; 4033 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4034 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4035 OMPIteratorGeneratorScope() = delete; 4036 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4037 4038 public: 4039 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4040 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4041 if (!E) 4042 return; 4043 SmallVector<llvm::Value *, 4> Uppers; 4044 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4045 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4046 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4047 addPrivate(VD, [&CGF, VD]() { 4048 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4049 }); 4050 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4051 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4052 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4053 "counter.addr"); 4054 }); 4055 } 4056 Privatize(); 4057 4058 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4059 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4060 LValue CLVal = 4061 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4062 HelperData.CounterVD->getType()); 4063 // Counter = 0; 4064 CGF.EmitStoreOfScalar( 4065 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4066 CLVal); 4067 CodeGenFunction::JumpDest &ContDest = 4068 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4069 CodeGenFunction::JumpDest &ExitDest = 4070 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4071 // N = <number-of_iterations>; 4072 llvm::Value *N = Uppers[I]; 4073 // cont: 4074 // if (Counter < N) goto body; else goto exit; 4075 CGF.EmitBlock(ContDest.getBlock()); 4076 auto *CVal = 4077 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4078 llvm::Value *Cmp = 4079 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4080 ? CGF.Builder.CreateICmpSLT(CVal, N) 4081 : CGF.Builder.CreateICmpULT(CVal, N); 4082 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4083 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4084 // body: 4085 CGF.EmitBlock(BodyBB); 4086 // Iteri = Begini + Counter * Stepi; 4087 CGF.EmitIgnoredExpr(HelperData.Update); 4088 } 4089 } 4090 ~OMPIteratorGeneratorScope() { 4091 if (!E) 4092 return; 4093 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4094 // Counter = Counter + 1; 4095 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4096 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4097 // goto cont; 4098 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4099 // exit: 4100 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4101 } 4102 } 4103 }; 4104 } // namespace 4105 4106 static std::pair<llvm::Value *, llvm::Value *> 4107 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4108 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4109 llvm::Value *Addr; 4110 if (OASE) { 4111 const Expr *Base = OASE->getBase(); 4112 Addr = CGF.EmitScalarExpr(Base); 4113 } else { 4114 Addr = CGF.EmitLValue(E).getPointer(CGF); 4115 } 4116 llvm::Value *SizeVal; 4117 QualType Ty = E->getType(); 4118 if (OASE) { 4119 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4120 for (const Expr *SE : OASE->getDimensions()) { 4121 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4122 Sz = CGF.EmitScalarConversion( 4123 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4124 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4125 } 4126 } else if (const auto *ASE = 4127 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4128 LValue UpAddrLVal = 4129 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4130 llvm::Value *UpAddr = 4131 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4132 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4133 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4134 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4135 } else { 4136 SizeVal = CGF.getTypeSize(Ty); 4137 } 4138 return std::make_pair(Addr, SizeVal); 4139 } 4140 4141 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4142 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4143 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4144 if (KmpTaskAffinityInfoTy.isNull()) { 4145 RecordDecl *KmpAffinityInfoRD = 4146 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4147 KmpAffinityInfoRD->startDefinition(); 4148 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4149 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4150 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4151 KmpAffinityInfoRD->completeDefinition(); 4152 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4153 } 4154 } 4155 4156 CGOpenMPRuntime::TaskResultTy 4157 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4158 const OMPExecutableDirective &D, 4159 llvm::Function *TaskFunction, QualType SharedsTy, 4160 Address Shareds, const OMPTaskDataTy &Data) { 4161 ASTContext &C = CGM.getContext(); 4162 llvm::SmallVector<PrivateDataTy, 4> Privates; 4163 // Aggregate privates and sort them by the alignment. 4164 const auto *I = Data.PrivateCopies.begin(); 4165 for (const Expr *E : Data.PrivateVars) { 4166 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4167 Privates.emplace_back( 4168 C.getDeclAlign(VD), 4169 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4170 /*PrivateElemInit=*/nullptr)); 4171 ++I; 4172 } 4173 I = Data.FirstprivateCopies.begin(); 4174 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4175 for (const Expr *E : Data.FirstprivateVars) { 4176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4177 Privates.emplace_back( 4178 C.getDeclAlign(VD), 4179 PrivateHelpersTy( 4180 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4181 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4182 ++I; 4183 ++IElemInitRef; 4184 } 4185 I = Data.LastprivateCopies.begin(); 4186 for (const Expr *E : Data.LastprivateVars) { 4187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4188 Privates.emplace_back( 4189 C.getDeclAlign(VD), 4190 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4191 /*PrivateElemInit=*/nullptr)); 4192 ++I; 4193 } 4194 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4195 return L.first > R.first; 4196 }); 4197 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4198 // Build type kmp_routine_entry_t (if not built yet). 4199 emitKmpRoutineEntryT(KmpInt32Ty); 4200 // Build type kmp_task_t (if not built yet). 4201 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4202 if (SavedKmpTaskloopTQTy.isNull()) { 4203 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4204 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4205 } 4206 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4207 } else { 4208 assert((D.getDirectiveKind() == OMPD_task || 4209 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4210 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4211 "Expected taskloop, task or target directive"); 4212 if (SavedKmpTaskTQTy.isNull()) { 4213 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4214 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4215 } 4216 KmpTaskTQTy = SavedKmpTaskTQTy; 4217 } 4218 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4219 // Build particular struct kmp_task_t for the given task. 4220 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4221 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4222 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4223 QualType KmpTaskTWithPrivatesPtrQTy = 4224 C.getPointerType(KmpTaskTWithPrivatesQTy); 4225 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4226 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4227 KmpTaskTWithPrivatesTy->getPointerTo(); 4228 llvm::Value *KmpTaskTWithPrivatesTySize = 4229 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4230 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4231 4232 // Emit initial values for private copies (if any). 4233 llvm::Value *TaskPrivatesMap = nullptr; 4234 llvm::Type *TaskPrivatesMapTy = 4235 std::next(TaskFunction->arg_begin(), 3)->getType(); 4236 if (!Privates.empty()) { 4237 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4238 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4239 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4240 FI->getType(), Privates); 4241 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4242 TaskPrivatesMap, TaskPrivatesMapTy); 4243 } else { 4244 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4245 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4246 } 4247 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4248 // kmp_task_t *tt); 4249 llvm::Function *TaskEntry = emitProxyTaskFunction( 4250 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4251 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4252 TaskPrivatesMap); 4253 4254 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4255 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4256 // kmp_routine_entry_t *task_entry); 4257 // Task flags. Format is taken from 4258 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4259 // description of kmp_tasking_flags struct. 4260 enum { 4261 TiedFlag = 0x1, 4262 FinalFlag = 0x2, 4263 DestructorsFlag = 0x8, 4264 PriorityFlag = 0x20, 4265 DetachableFlag = 0x40, 4266 }; 4267 unsigned Flags = Data.Tied ? TiedFlag : 0; 4268 bool NeedsCleanup = false; 4269 if (!Privates.empty()) { 4270 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4271 if (NeedsCleanup) 4272 Flags = Flags | DestructorsFlag; 4273 } 4274 if (Data.Priority.getInt()) 4275 Flags = Flags | PriorityFlag; 4276 if (D.hasClausesOfKind<OMPDetachClause>()) 4277 Flags = Flags | DetachableFlag; 4278 llvm::Value *TaskFlags = 4279 Data.Final.getPointer() 4280 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4281 CGF.Builder.getInt32(FinalFlag), 4282 CGF.Builder.getInt32(/*C=*/0)) 4283 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4284 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4285 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4286 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4287 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4288 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4289 TaskEntry, KmpRoutineEntryPtrTy)}; 4290 llvm::Value *NewTask; 4291 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4292 // Check if we have any device clause associated with the directive. 4293 const Expr *Device = nullptr; 4294 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4295 Device = C->getDevice(); 4296 // Emit device ID if any otherwise use default value. 4297 llvm::Value *DeviceID; 4298 if (Device) 4299 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4300 CGF.Int64Ty, /*isSigned=*/true); 4301 else 4302 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4303 AllocArgs.push_back(DeviceID); 4304 NewTask = CGF.EmitRuntimeCall( 4305 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4306 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4307 AllocArgs); 4308 } else { 4309 NewTask = 4310 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4311 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4312 AllocArgs); 4313 } 4314 // Emit detach clause initialization. 4315 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4316 // task_descriptor); 4317 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4318 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4319 LValue EvtLVal = CGF.EmitLValue(Evt); 4320 4321 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4322 // int gtid, kmp_task_t *task); 4323 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4324 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4325 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4326 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4327 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4328 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4329 {Loc, Tid, NewTask}); 4330 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4331 Evt->getExprLoc()); 4332 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4333 } 4334 // Process affinity clauses. 4335 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4336 // Process list of affinity data. 4337 ASTContext &C = CGM.getContext(); 4338 Address AffinitiesArray = Address::invalid(); 4339 // Calculate number of elements to form the array of affinity data. 4340 llvm::Value *NumOfElements = nullptr; 4341 unsigned NumAffinities = 0; 4342 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4343 if (const Expr *Modifier = C->getModifier()) { 4344 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4345 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4346 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4347 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4348 NumOfElements = 4349 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4350 } 4351 } else { 4352 NumAffinities += C->varlist_size(); 4353 } 4354 } 4355 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4356 // Fields ids in kmp_task_affinity_info record. 4357 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4358 4359 QualType KmpTaskAffinityInfoArrayTy; 4360 if (NumOfElements) { 4361 NumOfElements = CGF.Builder.CreateNUWAdd( 4362 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4363 OpaqueValueExpr OVE( 4364 Loc, 4365 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4366 VK_RValue); 4367 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4368 RValue::get(NumOfElements)); 4369 KmpTaskAffinityInfoArrayTy = 4370 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4371 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4372 // Properly emit variable-sized array. 4373 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4374 ImplicitParamDecl::Other); 4375 CGF.EmitVarDecl(*PD); 4376 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4377 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4378 /*isSigned=*/false); 4379 } else { 4380 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4381 KmpTaskAffinityInfoTy, 4382 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4383 ArrayType::Normal, /*IndexTypeQuals=*/0); 4384 AffinitiesArray = 4385 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4386 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4387 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4388 /*isSigned=*/false); 4389 } 4390 4391 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4392 // Fill array by elements without iterators. 4393 unsigned Pos = 0; 4394 bool HasIterator = false; 4395 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4396 if (C->getModifier()) { 4397 HasIterator = true; 4398 continue; 4399 } 4400 for (const Expr *E : C->varlists()) { 4401 llvm::Value *Addr; 4402 llvm::Value *Size; 4403 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4404 LValue Base = 4405 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4406 KmpTaskAffinityInfoTy); 4407 // affs[i].base_addr = &<Affinities[i].second>; 4408 LValue BaseAddrLVal = CGF.EmitLValueForField( 4409 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4410 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4411 BaseAddrLVal); 4412 // affs[i].len = sizeof(<Affinities[i].second>); 4413 LValue LenLVal = CGF.EmitLValueForField( 4414 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4415 CGF.EmitStoreOfScalar(Size, LenLVal); 4416 ++Pos; 4417 } 4418 } 4419 LValue PosLVal; 4420 if (HasIterator) { 4421 PosLVal = CGF.MakeAddrLValue( 4422 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4423 C.getSizeType()); 4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4425 } 4426 // Process elements with iterators. 4427 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4428 const Expr *Modifier = C->getModifier(); 4429 if (!Modifier) 4430 continue; 4431 OMPIteratorGeneratorScope IteratorScope( 4432 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4433 for (const Expr *E : C->varlists()) { 4434 llvm::Value *Addr; 4435 llvm::Value *Size; 4436 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4437 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4438 LValue Base = CGF.MakeAddrLValue( 4439 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4440 AffinitiesArray.getAlignment()), 4441 KmpTaskAffinityInfoTy); 4442 // affs[i].base_addr = &<Affinities[i].second>; 4443 LValue BaseAddrLVal = CGF.EmitLValueForField( 4444 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4445 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4446 BaseAddrLVal); 4447 // affs[i].len = sizeof(<Affinities[i].second>); 4448 LValue LenLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4450 CGF.EmitStoreOfScalar(Size, LenLVal); 4451 Idx = CGF.Builder.CreateNUWAdd( 4452 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4453 CGF.EmitStoreOfScalar(Idx, PosLVal); 4454 } 4455 } 4456 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4457 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4458 // naffins, kmp_task_affinity_info_t *affin_list); 4459 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4460 llvm::Value *GTid = getThreadID(CGF, Loc); 4461 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4462 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4463 // FIXME: Emit the function and ignore its result for now unless the 4464 // runtime function is properly implemented. 4465 (void)CGF.EmitRuntimeCall( 4466 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4467 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4468 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4469 } 4470 llvm::Value *NewTaskNewTaskTTy = 4471 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4472 NewTask, KmpTaskTWithPrivatesPtrTy); 4473 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4474 KmpTaskTWithPrivatesQTy); 4475 LValue TDBase = 4476 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4477 // Fill the data in the resulting kmp_task_t record. 4478 // Copy shareds if there are any. 4479 Address KmpTaskSharedsPtr = Address::invalid(); 4480 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4481 KmpTaskSharedsPtr = 4482 Address(CGF.EmitLoadOfScalar( 4483 CGF.EmitLValueForField( 4484 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4485 KmpTaskTShareds)), 4486 Loc), 4487 CGM.getNaturalTypeAlignment(SharedsTy)); 4488 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4489 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4490 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4491 } 4492 // Emit initial values for private copies (if any). 4493 TaskResultTy Result; 4494 if (!Privates.empty()) { 4495 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4496 SharedsTy, SharedsPtrTy, Data, Privates, 4497 /*ForDup=*/false); 4498 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4499 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4500 Result.TaskDupFn = emitTaskDupFunction( 4501 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4502 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4503 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4504 } 4505 } 4506 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4507 enum { Priority = 0, Destructors = 1 }; 4508 // Provide pointer to function with destructors for privates. 4509 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4510 const RecordDecl *KmpCmplrdataUD = 4511 (*FI)->getType()->getAsUnionType()->getDecl(); 4512 if (NeedsCleanup) { 4513 llvm::Value *DestructorFn = emitDestructorsFunction( 4514 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4515 KmpTaskTWithPrivatesQTy); 4516 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4517 LValue DestructorsLV = CGF.EmitLValueForField( 4518 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4519 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4520 DestructorFn, KmpRoutineEntryPtrTy), 4521 DestructorsLV); 4522 } 4523 // Set priority. 4524 if (Data.Priority.getInt()) { 4525 LValue Data2LV = CGF.EmitLValueForField( 4526 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4527 LValue PriorityLV = CGF.EmitLValueForField( 4528 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4529 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4530 } 4531 Result.NewTask = NewTask; 4532 Result.TaskEntry = TaskEntry; 4533 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4534 Result.TDBase = TDBase; 4535 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4536 return Result; 4537 } 4538 4539 namespace { 4540 /// Dependence kind for RTL. 4541 enum RTLDependenceKindTy { 4542 DepIn = 0x01, 4543 DepInOut = 0x3, 4544 DepMutexInOutSet = 0x4 4545 }; 4546 /// Fields ids in kmp_depend_info record. 4547 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4548 } // namespace 4549 4550 /// Translates internal dependency kind into the runtime kind. 4551 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4552 RTLDependenceKindTy DepKind; 4553 switch (K) { 4554 case OMPC_DEPEND_in: 4555 DepKind = DepIn; 4556 break; 4557 // Out and InOut dependencies must use the same code. 4558 case OMPC_DEPEND_out: 4559 case OMPC_DEPEND_inout: 4560 DepKind = DepInOut; 4561 break; 4562 case OMPC_DEPEND_mutexinoutset: 4563 DepKind = DepMutexInOutSet; 4564 break; 4565 case OMPC_DEPEND_source: 4566 case OMPC_DEPEND_sink: 4567 case OMPC_DEPEND_depobj: 4568 case OMPC_DEPEND_unknown: 4569 llvm_unreachable("Unknown task dependence type"); 4570 } 4571 return DepKind; 4572 } 4573 4574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4575 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4576 QualType &FlagsTy) { 4577 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4578 if (KmpDependInfoTy.isNull()) { 4579 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4580 KmpDependInfoRD->startDefinition(); 4581 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4582 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4583 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4584 KmpDependInfoRD->completeDefinition(); 4585 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4586 } 4587 } 4588 4589 std::pair<llvm::Value *, LValue> 4590 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4591 SourceLocation Loc) { 4592 ASTContext &C = CGM.getContext(); 4593 QualType FlagsTy; 4594 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4595 RecordDecl *KmpDependInfoRD = 4596 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4597 LValue Base = CGF.EmitLoadOfPointerLValue( 4598 DepobjLVal.getAddress(CGF), 4599 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4600 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4601 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4602 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4603 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4604 Base.getTBAAInfo()); 4605 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4606 Addr.getPointer(), 4607 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4608 LValue NumDepsBase = CGF.MakeAddrLValue( 4609 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4610 Base.getBaseInfo(), Base.getTBAAInfo()); 4611 // NumDeps = deps[i].base_addr; 4612 LValue BaseAddrLVal = CGF.EmitLValueForField( 4613 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4614 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4615 return std::make_pair(NumDeps, Base); 4616 } 4617 4618 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4619 llvm::PointerUnion<unsigned *, LValue *> Pos, 4620 const OMPTaskDataTy::DependData &Data, 4621 Address DependenciesArray) { 4622 CodeGenModule &CGM = CGF.CGM; 4623 ASTContext &C = CGM.getContext(); 4624 QualType FlagsTy; 4625 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4626 RecordDecl *KmpDependInfoRD = 4627 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4628 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4629 4630 OMPIteratorGeneratorScope IteratorScope( 4631 CGF, cast_or_null<OMPIteratorExpr>( 4632 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4633 : nullptr)); 4634 for (const Expr *E : Data.DepExprs) { 4635 llvm::Value *Addr; 4636 llvm::Value *Size; 4637 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4638 LValue Base; 4639 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4640 Base = CGF.MakeAddrLValue( 4641 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4642 } else { 4643 LValue &PosLVal = *Pos.get<LValue *>(); 4644 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4645 Base = CGF.MakeAddrLValue( 4646 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4647 DependenciesArray.getAlignment()), 4648 KmpDependInfoTy); 4649 } 4650 // deps[i].base_addr = &<Dependencies[i].second>; 4651 LValue BaseAddrLVal = CGF.EmitLValueForField( 4652 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4653 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4654 BaseAddrLVal); 4655 // deps[i].len = sizeof(<Dependencies[i].second>); 4656 LValue LenLVal = CGF.EmitLValueForField( 4657 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4658 CGF.EmitStoreOfScalar(Size, LenLVal); 4659 // deps[i].flags = <Dependencies[i].first>; 4660 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4661 LValue FlagsLVal = CGF.EmitLValueForField( 4662 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4663 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4664 FlagsLVal); 4665 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4666 ++(*P); 4667 } else { 4668 LValue &PosLVal = *Pos.get<LValue *>(); 4669 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4670 Idx = CGF.Builder.CreateNUWAdd(Idx, 4671 llvm::ConstantInt::get(Idx->getType(), 1)); 4672 CGF.EmitStoreOfScalar(Idx, PosLVal); 4673 } 4674 } 4675 } 4676 4677 static SmallVector<llvm::Value *, 4> 4678 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4679 const OMPTaskDataTy::DependData &Data) { 4680 assert(Data.DepKind == OMPC_DEPEND_depobj && 4681 "Expected depobj dependecy kind."); 4682 SmallVector<llvm::Value *, 4> Sizes; 4683 SmallVector<LValue, 4> SizeLVals; 4684 ASTContext &C = CGF.getContext(); 4685 QualType FlagsTy; 4686 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4687 RecordDecl *KmpDependInfoRD = 4688 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4689 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4690 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4691 { 4692 OMPIteratorGeneratorScope IteratorScope( 4693 CGF, cast_or_null<OMPIteratorExpr>( 4694 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4695 : nullptr)); 4696 for (const Expr *E : Data.DepExprs) { 4697 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4698 LValue Base = CGF.EmitLoadOfPointerLValue( 4699 DepobjLVal.getAddress(CGF), 4700 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4701 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4702 Base.getAddress(CGF), KmpDependInfoPtrT); 4703 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4704 Base.getTBAAInfo()); 4705 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4706 Addr.getPointer(), 4707 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4708 LValue NumDepsBase = CGF.MakeAddrLValue( 4709 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4710 Base.getBaseInfo(), Base.getTBAAInfo()); 4711 // NumDeps = deps[i].base_addr; 4712 LValue BaseAddrLVal = CGF.EmitLValueForField( 4713 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4714 llvm::Value *NumDeps = 4715 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4716 LValue NumLVal = CGF.MakeAddrLValue( 4717 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4718 C.getUIntPtrType()); 4719 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4720 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4721 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4722 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4723 CGF.EmitStoreOfScalar(Add, NumLVal); 4724 SizeLVals.push_back(NumLVal); 4725 } 4726 } 4727 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4728 llvm::Value *Size = 4729 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4730 Sizes.push_back(Size); 4731 } 4732 return Sizes; 4733 } 4734 4735 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4736 LValue PosLVal, 4737 const OMPTaskDataTy::DependData &Data, 4738 Address DependenciesArray) { 4739 assert(Data.DepKind == OMPC_DEPEND_depobj && 4740 "Expected depobj dependecy kind."); 4741 ASTContext &C = CGF.getContext(); 4742 QualType FlagsTy; 4743 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4744 RecordDecl *KmpDependInfoRD = 4745 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4746 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4747 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4748 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4749 { 4750 OMPIteratorGeneratorScope IteratorScope( 4751 CGF, cast_or_null<OMPIteratorExpr>( 4752 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4753 : nullptr)); 4754 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4755 const Expr *E = Data.DepExprs[I]; 4756 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4757 LValue Base = CGF.EmitLoadOfPointerLValue( 4758 DepobjLVal.getAddress(CGF), 4759 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4760 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4761 Base.getAddress(CGF), KmpDependInfoPtrT); 4762 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4763 Base.getTBAAInfo()); 4764 4765 // Get number of elements in a single depobj. 4766 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4767 Addr.getPointer(), 4768 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4769 LValue NumDepsBase = CGF.MakeAddrLValue( 4770 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4771 Base.getBaseInfo(), Base.getTBAAInfo()); 4772 // NumDeps = deps[i].base_addr; 4773 LValue BaseAddrLVal = CGF.EmitLValueForField( 4774 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4775 llvm::Value *NumDeps = 4776 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4777 4778 // memcopy dependency data. 4779 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4780 ElSize, 4781 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4782 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4783 Address DepAddr = 4784 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4785 DependenciesArray.getAlignment()); 4786 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4787 4788 // Increase pos. 4789 // pos += size; 4790 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4791 CGF.EmitStoreOfScalar(Add, PosLVal); 4792 } 4793 } 4794 } 4795 4796 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4797 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4798 SourceLocation Loc) { 4799 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4800 return D.DepExprs.empty(); 4801 })) 4802 return std::make_pair(nullptr, Address::invalid()); 4803 // Process list of dependencies. 4804 ASTContext &C = CGM.getContext(); 4805 Address DependenciesArray = Address::invalid(); 4806 llvm::Value *NumOfElements = nullptr; 4807 unsigned NumDependencies = std::accumulate( 4808 Dependencies.begin(), Dependencies.end(), 0, 4809 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4810 return D.DepKind == OMPC_DEPEND_depobj 4811 ? V 4812 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4813 }); 4814 QualType FlagsTy; 4815 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4816 bool HasDepobjDeps = false; 4817 bool HasRegularWithIterators = false; 4818 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4819 llvm::Value *NumOfRegularWithIterators = 4820 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4821 // Calculate number of depobj dependecies and regular deps with the iterators. 4822 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4823 if (D.DepKind == OMPC_DEPEND_depobj) { 4824 SmallVector<llvm::Value *, 4> Sizes = 4825 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4826 for (llvm::Value *Size : Sizes) { 4827 NumOfDepobjElements = 4828 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4829 } 4830 HasDepobjDeps = true; 4831 continue; 4832 } 4833 // Include number of iterations, if any. 4834 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4835 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4836 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4837 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4838 NumOfRegularWithIterators = 4839 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4840 } 4841 HasRegularWithIterators = true; 4842 continue; 4843 } 4844 } 4845 4846 QualType KmpDependInfoArrayTy; 4847 if (HasDepobjDeps || HasRegularWithIterators) { 4848 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4849 /*isSigned=*/false); 4850 if (HasDepobjDeps) { 4851 NumOfElements = 4852 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4853 } 4854 if (HasRegularWithIterators) { 4855 NumOfElements = 4856 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4857 } 4858 OpaqueValueExpr OVE(Loc, 4859 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4860 VK_RValue); 4861 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4862 RValue::get(NumOfElements)); 4863 KmpDependInfoArrayTy = 4864 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4865 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4866 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4867 // Properly emit variable-sized array. 4868 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4869 ImplicitParamDecl::Other); 4870 CGF.EmitVarDecl(*PD); 4871 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4872 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4873 /*isSigned=*/false); 4874 } else { 4875 KmpDependInfoArrayTy = C.getConstantArrayType( 4876 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4877 ArrayType::Normal, /*IndexTypeQuals=*/0); 4878 DependenciesArray = 4879 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4880 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4881 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4882 /*isSigned=*/false); 4883 } 4884 unsigned Pos = 0; 4885 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4886 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4887 Dependencies[I].IteratorExpr) 4888 continue; 4889 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4890 DependenciesArray); 4891 } 4892 // Copy regular dependecies with iterators. 4893 LValue PosLVal = CGF.MakeAddrLValue( 4894 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4895 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4896 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4897 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4898 !Dependencies[I].IteratorExpr) 4899 continue; 4900 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4901 DependenciesArray); 4902 } 4903 // Copy final depobj arrays without iterators. 4904 if (HasDepobjDeps) { 4905 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4906 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4907 continue; 4908 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4909 DependenciesArray); 4910 } 4911 } 4912 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4913 DependenciesArray, CGF.VoidPtrTy); 4914 return std::make_pair(NumOfElements, DependenciesArray); 4915 } 4916 4917 Address CGOpenMPRuntime::emitDepobjDependClause( 4918 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4919 SourceLocation Loc) { 4920 if (Dependencies.DepExprs.empty()) 4921 return Address::invalid(); 4922 // Process list of dependencies. 4923 ASTContext &C = CGM.getContext(); 4924 Address DependenciesArray = Address::invalid(); 4925 unsigned NumDependencies = Dependencies.DepExprs.size(); 4926 QualType FlagsTy; 4927 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4928 RecordDecl *KmpDependInfoRD = 4929 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4930 4931 llvm::Value *Size; 4932 // Define type kmp_depend_info[<Dependencies.size()>]; 4933 // For depobj reserve one extra element to store the number of elements. 4934 // It is required to handle depobj(x) update(in) construct. 4935 // kmp_depend_info[<Dependencies.size()>] deps; 4936 llvm::Value *NumDepsVal; 4937 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4938 if (const auto *IE = 4939 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4940 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4941 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4942 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4943 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4944 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4945 } 4946 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4947 NumDepsVal); 4948 CharUnits SizeInBytes = 4949 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4950 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4951 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4952 NumDepsVal = 4953 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4954 } else { 4955 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4956 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4957 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4958 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4959 Size = CGM.getSize(Sz.alignTo(Align)); 4960 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4961 } 4962 // Need to allocate on the dynamic memory. 4963 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4964 // Use default allocator. 4965 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4966 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4967 4968 llvm::Value *Addr = 4969 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4970 CGM.getModule(), OMPRTL___kmpc_alloc), 4971 Args, ".dep.arr.addr"); 4972 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4973 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4974 DependenciesArray = Address(Addr, Align); 4975 // Write number of elements in the first element of array for depobj. 4976 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4977 // deps[i].base_addr = NumDependencies; 4978 LValue BaseAddrLVal = CGF.EmitLValueForField( 4979 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4980 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4981 llvm::PointerUnion<unsigned *, LValue *> Pos; 4982 unsigned Idx = 1; 4983 LValue PosLVal; 4984 if (Dependencies.IteratorExpr) { 4985 PosLVal = CGF.MakeAddrLValue( 4986 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4987 C.getSizeType()); 4988 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4989 /*IsInit=*/true); 4990 Pos = &PosLVal; 4991 } else { 4992 Pos = &Idx; 4993 } 4994 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4995 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4996 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4997 return DependenciesArray; 4998 } 4999 5000 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5001 SourceLocation Loc) { 5002 ASTContext &C = CGM.getContext(); 5003 QualType FlagsTy; 5004 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5005 LValue Base = CGF.EmitLoadOfPointerLValue( 5006 DepobjLVal.getAddress(CGF), 5007 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5008 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5009 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5010 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5011 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5012 Addr.getPointer(), 5013 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5014 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5015 CGF.VoidPtrTy); 5016 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5017 // Use default allocator. 5018 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5019 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5020 5021 // _kmpc_free(gtid, addr, nullptr); 5022 (void)CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5023 CGM.getModule(), OMPRTL___kmpc_free), 5024 Args); 5025 } 5026 5027 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5028 OpenMPDependClauseKind NewDepKind, 5029 SourceLocation Loc) { 5030 ASTContext &C = CGM.getContext(); 5031 QualType FlagsTy; 5032 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5033 RecordDecl *KmpDependInfoRD = 5034 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5035 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5036 llvm::Value *NumDeps; 5037 LValue Base; 5038 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5039 5040 Address Begin = Base.getAddress(CGF); 5041 // Cast from pointer to array type to pointer to single element. 5042 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5043 // The basic structure here is a while-do loop. 5044 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5045 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5046 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5047 CGF.EmitBlock(BodyBB); 5048 llvm::PHINode *ElementPHI = 5049 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5050 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5051 Begin = Address(ElementPHI, Begin.getAlignment()); 5052 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5053 Base.getTBAAInfo()); 5054 // deps[i].flags = NewDepKind; 5055 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5056 LValue FlagsLVal = CGF.EmitLValueForField( 5057 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5058 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5059 FlagsLVal); 5060 5061 // Shift the address forward by one element. 5062 Address ElementNext = 5063 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5064 ElementPHI->addIncoming(ElementNext.getPointer(), 5065 CGF.Builder.GetInsertBlock()); 5066 llvm::Value *IsEmpty = 5067 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5068 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5069 // Done. 5070 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5071 } 5072 5073 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5074 const OMPExecutableDirective &D, 5075 llvm::Function *TaskFunction, 5076 QualType SharedsTy, Address Shareds, 5077 const Expr *IfCond, 5078 const OMPTaskDataTy &Data) { 5079 if (!CGF.HaveInsertPoint()) 5080 return; 5081 5082 TaskResultTy Result = 5083 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5084 llvm::Value *NewTask = Result.NewTask; 5085 llvm::Function *TaskEntry = Result.TaskEntry; 5086 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5087 LValue TDBase = Result.TDBase; 5088 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5089 // Process list of dependences. 5090 Address DependenciesArray = Address::invalid(); 5091 llvm::Value *NumOfElements; 5092 std::tie(NumOfElements, DependenciesArray) = 5093 emitDependClause(CGF, Data.Dependences, Loc); 5094 5095 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5096 // libcall. 5097 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5098 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5099 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5100 // list is not empty 5101 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5102 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5103 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5104 llvm::Value *DepTaskArgs[7]; 5105 if (!Data.Dependences.empty()) { 5106 DepTaskArgs[0] = UpLoc; 5107 DepTaskArgs[1] = ThreadID; 5108 DepTaskArgs[2] = NewTask; 5109 DepTaskArgs[3] = NumOfElements; 5110 DepTaskArgs[4] = DependenciesArray.getPointer(); 5111 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5112 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5113 } 5114 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5115 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5116 if (!Data.Tied) { 5117 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5118 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5119 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5120 } 5121 if (!Data.Dependences.empty()) { 5122 CGF.EmitRuntimeCall( 5123 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5124 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5125 DepTaskArgs); 5126 } else { 5127 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5128 CGM.getModule(), OMPRTL___kmpc_omp_task), 5129 TaskArgs); 5130 } 5131 // Check if parent region is untied and build return for untied task; 5132 if (auto *Region = 5133 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5134 Region->emitUntiedSwitch(CGF); 5135 }; 5136 5137 llvm::Value *DepWaitTaskArgs[6]; 5138 if (!Data.Dependences.empty()) { 5139 DepWaitTaskArgs[0] = UpLoc; 5140 DepWaitTaskArgs[1] = ThreadID; 5141 DepWaitTaskArgs[2] = NumOfElements; 5142 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5143 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5144 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5145 } 5146 auto &M = CGM.getModule(); 5147 auto &&ElseCodeGen = [&M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5148 &Data, &DepWaitTaskArgs, 5149 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5150 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5151 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5152 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5153 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5154 // is specified. 5155 if (!Data.Dependences.empty()) 5156 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5157 M, OMPRTL___kmpc_omp_wait_deps), 5158 DepWaitTaskArgs); 5159 // Call proxy_task_entry(gtid, new_task); 5160 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5161 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5162 Action.Enter(CGF); 5163 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5164 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5165 OutlinedFnArgs); 5166 }; 5167 5168 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5169 // kmp_task_t *new_task); 5170 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5171 // kmp_task_t *new_task); 5172 RegionCodeGenTy RCG(CodeGen); 5173 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5174 M, OMPRTL___kmpc_omp_task_begin_if0), 5175 TaskArgs, 5176 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5177 M, OMPRTL___kmpc_omp_task_complete_if0), 5178 TaskArgs); 5179 RCG.setAction(Action); 5180 RCG(CGF); 5181 }; 5182 5183 if (IfCond) { 5184 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5185 } else { 5186 RegionCodeGenTy ThenRCG(ThenCodeGen); 5187 ThenRCG(CGF); 5188 } 5189 } 5190 5191 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5192 const OMPLoopDirective &D, 5193 llvm::Function *TaskFunction, 5194 QualType SharedsTy, Address Shareds, 5195 const Expr *IfCond, 5196 const OMPTaskDataTy &Data) { 5197 if (!CGF.HaveInsertPoint()) 5198 return; 5199 TaskResultTy Result = 5200 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5201 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5202 // libcall. 5203 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5204 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5205 // sched, kmp_uint64 grainsize, void *task_dup); 5206 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5207 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5208 llvm::Value *IfVal; 5209 if (IfCond) { 5210 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5211 /*isSigned=*/true); 5212 } else { 5213 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5214 } 5215 5216 LValue LBLVal = CGF.EmitLValueForField( 5217 Result.TDBase, 5218 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5219 const auto *LBVar = 5220 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5221 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5222 LBLVal.getQuals(), 5223 /*IsInitializer=*/true); 5224 LValue UBLVal = CGF.EmitLValueForField( 5225 Result.TDBase, 5226 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5227 const auto *UBVar = 5228 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5229 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5230 UBLVal.getQuals(), 5231 /*IsInitializer=*/true); 5232 LValue StLVal = CGF.EmitLValueForField( 5233 Result.TDBase, 5234 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5235 const auto *StVar = 5236 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5237 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5238 StLVal.getQuals(), 5239 /*IsInitializer=*/true); 5240 // Store reductions address. 5241 LValue RedLVal = CGF.EmitLValueForField( 5242 Result.TDBase, 5243 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5244 if (Data.Reductions) { 5245 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5246 } else { 5247 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5248 CGF.getContext().VoidPtrTy); 5249 } 5250 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5251 llvm::Value *TaskArgs[] = { 5252 UpLoc, 5253 ThreadID, 5254 Result.NewTask, 5255 IfVal, 5256 LBLVal.getPointer(CGF), 5257 UBLVal.getPointer(CGF), 5258 CGF.EmitLoadOfScalar(StLVal, Loc), 5259 llvm::ConstantInt::getSigned( 5260 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5261 llvm::ConstantInt::getSigned( 5262 CGF.IntTy, Data.Schedule.getPointer() 5263 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5264 : NoSchedule), 5265 Data.Schedule.getPointer() 5266 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5267 /*isSigned=*/false) 5268 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5269 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5270 Result.TaskDupFn, CGF.VoidPtrTy) 5271 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5272 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5273 CGM.getModule(), OMPRTL___kmpc_taskloop), 5274 TaskArgs); 5275 } 5276 5277 /// Emit reduction operation for each element of array (required for 5278 /// array sections) LHS op = RHS. 5279 /// \param Type Type of array. 5280 /// \param LHSVar Variable on the left side of the reduction operation 5281 /// (references element of array in original variable). 5282 /// \param RHSVar Variable on the right side of the reduction operation 5283 /// (references element of array in original variable). 5284 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5285 /// RHSVar. 5286 static void EmitOMPAggregateReduction( 5287 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5288 const VarDecl *RHSVar, 5289 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5290 const Expr *, const Expr *)> &RedOpGen, 5291 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5292 const Expr *UpExpr = nullptr) { 5293 // Perform element-by-element initialization. 5294 QualType ElementTy; 5295 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5296 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5297 5298 // Drill down to the base element type on both arrays. 5299 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5300 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5301 5302 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5303 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5304 // Cast from pointer to array type to pointer to single element. 5305 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5306 // The basic structure here is a while-do loop. 5307 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5308 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5309 llvm::Value *IsEmpty = 5310 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5311 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5312 5313 // Enter the loop body, making that address the current address. 5314 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5315 CGF.EmitBlock(BodyBB); 5316 5317 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5318 5319 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5320 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5321 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5322 Address RHSElementCurrent = 5323 Address(RHSElementPHI, 5324 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5325 5326 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5327 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5328 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5329 Address LHSElementCurrent = 5330 Address(LHSElementPHI, 5331 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5332 5333 // Emit copy. 5334 CodeGenFunction::OMPPrivateScope Scope(CGF); 5335 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5336 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5337 Scope.Privatize(); 5338 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5339 Scope.ForceCleanup(); 5340 5341 // Shift the address forward by one element. 5342 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5343 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5344 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5345 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5346 // Check whether we've reached the end. 5347 llvm::Value *Done = 5348 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5349 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5350 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5351 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5352 5353 // Done. 5354 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5355 } 5356 5357 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5358 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5359 /// UDR combiner function. 5360 static void emitReductionCombiner(CodeGenFunction &CGF, 5361 const Expr *ReductionOp) { 5362 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5363 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5364 if (const auto *DRE = 5365 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5366 if (const auto *DRD = 5367 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5368 std::pair<llvm::Function *, llvm::Function *> Reduction = 5369 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5370 RValue Func = RValue::get(Reduction.first); 5371 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5372 CGF.EmitIgnoredExpr(ReductionOp); 5373 return; 5374 } 5375 CGF.EmitIgnoredExpr(ReductionOp); 5376 } 5377 5378 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5379 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5380 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5381 ArrayRef<const Expr *> ReductionOps) { 5382 ASTContext &C = CGM.getContext(); 5383 5384 // void reduction_func(void *LHSArg, void *RHSArg); 5385 FunctionArgList Args; 5386 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5387 ImplicitParamDecl::Other); 5388 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5389 ImplicitParamDecl::Other); 5390 Args.push_back(&LHSArg); 5391 Args.push_back(&RHSArg); 5392 const auto &CGFI = 5393 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5394 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5395 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5396 llvm::GlobalValue::InternalLinkage, Name, 5397 &CGM.getModule()); 5398 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5399 Fn->setDoesNotRecurse(); 5400 CodeGenFunction CGF(CGM); 5401 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5402 5403 // Dst = (void*[n])(LHSArg); 5404 // Src = (void*[n])(RHSArg); 5405 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5406 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5407 ArgsType), CGF.getPointerAlign()); 5408 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5410 ArgsType), CGF.getPointerAlign()); 5411 5412 // ... 5413 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5414 // ... 5415 CodeGenFunction::OMPPrivateScope Scope(CGF); 5416 auto IPriv = Privates.begin(); 5417 unsigned Idx = 0; 5418 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5419 const auto *RHSVar = 5420 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5421 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5422 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5423 }); 5424 const auto *LHSVar = 5425 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5426 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5427 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5428 }); 5429 QualType PrivTy = (*IPriv)->getType(); 5430 if (PrivTy->isVariablyModifiedType()) { 5431 // Get array size and emit VLA type. 5432 ++Idx; 5433 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5434 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5435 const VariableArrayType *VLA = 5436 CGF.getContext().getAsVariableArrayType(PrivTy); 5437 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5438 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5439 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5440 CGF.EmitVariablyModifiedType(PrivTy); 5441 } 5442 } 5443 Scope.Privatize(); 5444 IPriv = Privates.begin(); 5445 auto ILHS = LHSExprs.begin(); 5446 auto IRHS = RHSExprs.begin(); 5447 for (const Expr *E : ReductionOps) { 5448 if ((*IPriv)->getType()->isArrayType()) { 5449 // Emit reduction for array section. 5450 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5451 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5452 EmitOMPAggregateReduction( 5453 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5454 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5455 emitReductionCombiner(CGF, E); 5456 }); 5457 } else { 5458 // Emit reduction for array subscript or single variable. 5459 emitReductionCombiner(CGF, E); 5460 } 5461 ++IPriv; 5462 ++ILHS; 5463 ++IRHS; 5464 } 5465 Scope.ForceCleanup(); 5466 CGF.FinishFunction(); 5467 return Fn; 5468 } 5469 5470 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5471 const Expr *ReductionOp, 5472 const Expr *PrivateRef, 5473 const DeclRefExpr *LHS, 5474 const DeclRefExpr *RHS) { 5475 if (PrivateRef->getType()->isArrayType()) { 5476 // Emit reduction for array section. 5477 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5478 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5479 EmitOMPAggregateReduction( 5480 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5481 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5482 emitReductionCombiner(CGF, ReductionOp); 5483 }); 5484 } else { 5485 // Emit reduction for array subscript or single variable. 5486 emitReductionCombiner(CGF, ReductionOp); 5487 } 5488 } 5489 5490 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5491 ArrayRef<const Expr *> Privates, 5492 ArrayRef<const Expr *> LHSExprs, 5493 ArrayRef<const Expr *> RHSExprs, 5494 ArrayRef<const Expr *> ReductionOps, 5495 ReductionOptionsTy Options) { 5496 if (!CGF.HaveInsertPoint()) 5497 return; 5498 5499 bool WithNowait = Options.WithNowait; 5500 bool SimpleReduction = Options.SimpleReduction; 5501 5502 // Next code should be emitted for reduction: 5503 // 5504 // static kmp_critical_name lock = { 0 }; 5505 // 5506 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5507 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5508 // ... 5509 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5510 // *(Type<n>-1*)rhs[<n>-1]); 5511 // } 5512 // 5513 // ... 5514 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5515 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5516 // RedList, reduce_func, &<lock>)) { 5517 // case 1: 5518 // ... 5519 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5520 // ... 5521 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5522 // break; 5523 // case 2: 5524 // ... 5525 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5526 // ... 5527 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5528 // break; 5529 // default:; 5530 // } 5531 // 5532 // if SimpleReduction is true, only the next code is generated: 5533 // ... 5534 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5535 // ... 5536 5537 ASTContext &C = CGM.getContext(); 5538 5539 if (SimpleReduction) { 5540 CodeGenFunction::RunCleanupsScope Scope(CGF); 5541 auto IPriv = Privates.begin(); 5542 auto ILHS = LHSExprs.begin(); 5543 auto IRHS = RHSExprs.begin(); 5544 for (const Expr *E : ReductionOps) { 5545 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5546 cast<DeclRefExpr>(*IRHS)); 5547 ++IPriv; 5548 ++ILHS; 5549 ++IRHS; 5550 } 5551 return; 5552 } 5553 5554 // 1. Build a list of reduction variables. 5555 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5556 auto Size = RHSExprs.size(); 5557 for (const Expr *E : Privates) { 5558 if (E->getType()->isVariablyModifiedType()) 5559 // Reserve place for array size. 5560 ++Size; 5561 } 5562 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5563 QualType ReductionArrayTy = 5564 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5565 /*IndexTypeQuals=*/0); 5566 Address ReductionList = 5567 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5568 auto IPriv = Privates.begin(); 5569 unsigned Idx = 0; 5570 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5571 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5572 CGF.Builder.CreateStore( 5573 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5574 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5575 Elem); 5576 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5577 // Store array size. 5578 ++Idx; 5579 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5580 llvm::Value *Size = CGF.Builder.CreateIntCast( 5581 CGF.getVLASize( 5582 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5583 .NumElts, 5584 CGF.SizeTy, /*isSigned=*/false); 5585 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5586 Elem); 5587 } 5588 } 5589 5590 // 2. Emit reduce_func(). 5591 llvm::Function *ReductionFn = emitReductionFunction( 5592 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5593 LHSExprs, RHSExprs, ReductionOps); 5594 5595 // 3. Create static kmp_critical_name lock = { 0 }; 5596 std::string Name = getName({"reduction"}); 5597 llvm::Value *Lock = getCriticalRegionLock(Name); 5598 5599 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5600 // RedList, reduce_func, &<lock>); 5601 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5602 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5603 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5604 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5605 ReductionList.getPointer(), CGF.VoidPtrTy); 5606 llvm::Value *Args[] = { 5607 IdentTLoc, // ident_t *<loc> 5608 ThreadId, // i32 <gtid> 5609 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5610 ReductionArrayTySize, // size_type sizeof(RedList) 5611 RL, // void *RedList 5612 ReductionFn, // void (*) (void *, void *) <reduce_func> 5613 Lock // kmp_critical_name *&<lock> 5614 }; 5615 llvm::Value *Res = CGF.EmitRuntimeCall( 5616 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5617 CGM.getModule(), 5618 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5619 Args); 5620 5621 // 5. Build switch(res) 5622 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5623 llvm::SwitchInst *SwInst = 5624 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5625 5626 // 6. Build case 1: 5627 // ... 5628 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5629 // ... 5630 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5631 // break; 5632 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5633 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5634 CGF.EmitBlock(Case1BB); 5635 5636 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5637 llvm::Value *EndArgs[] = { 5638 IdentTLoc, // ident_t *<loc> 5639 ThreadId, // i32 <gtid> 5640 Lock // kmp_critical_name *&<lock> 5641 }; 5642 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5643 CodeGenFunction &CGF, PrePostActionTy &Action) { 5644 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5645 auto IPriv = Privates.begin(); 5646 auto ILHS = LHSExprs.begin(); 5647 auto IRHS = RHSExprs.begin(); 5648 for (const Expr *E : ReductionOps) { 5649 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5650 cast<DeclRefExpr>(*IRHS)); 5651 ++IPriv; 5652 ++ILHS; 5653 ++IRHS; 5654 } 5655 }; 5656 RegionCodeGenTy RCG(CodeGen); 5657 CommonActionTy Action( 5658 nullptr, llvm::None, 5659 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5660 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5661 : OMPRTL___kmpc_end_reduce), 5662 EndArgs); 5663 RCG.setAction(Action); 5664 RCG(CGF); 5665 5666 CGF.EmitBranch(DefaultBB); 5667 5668 // 7. Build case 2: 5669 // ... 5670 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5671 // ... 5672 // break; 5673 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5674 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5675 CGF.EmitBlock(Case2BB); 5676 5677 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5678 CodeGenFunction &CGF, PrePostActionTy &Action) { 5679 auto ILHS = LHSExprs.begin(); 5680 auto IRHS = RHSExprs.begin(); 5681 auto IPriv = Privates.begin(); 5682 for (const Expr *E : ReductionOps) { 5683 const Expr *XExpr = nullptr; 5684 const Expr *EExpr = nullptr; 5685 const Expr *UpExpr = nullptr; 5686 BinaryOperatorKind BO = BO_Comma; 5687 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5688 if (BO->getOpcode() == BO_Assign) { 5689 XExpr = BO->getLHS(); 5690 UpExpr = BO->getRHS(); 5691 } 5692 } 5693 // Try to emit update expression as a simple atomic. 5694 const Expr *RHSExpr = UpExpr; 5695 if (RHSExpr) { 5696 // Analyze RHS part of the whole expression. 5697 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5698 RHSExpr->IgnoreParenImpCasts())) { 5699 // If this is a conditional operator, analyze its condition for 5700 // min/max reduction operator. 5701 RHSExpr = ACO->getCond(); 5702 } 5703 if (const auto *BORHS = 5704 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5705 EExpr = BORHS->getRHS(); 5706 BO = BORHS->getOpcode(); 5707 } 5708 } 5709 if (XExpr) { 5710 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5711 auto &&AtomicRedGen = [BO, VD, 5712 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5713 const Expr *EExpr, const Expr *UpExpr) { 5714 LValue X = CGF.EmitLValue(XExpr); 5715 RValue E; 5716 if (EExpr) 5717 E = CGF.EmitAnyExpr(EExpr); 5718 CGF.EmitOMPAtomicSimpleUpdateExpr( 5719 X, E, BO, /*IsXLHSInRHSPart=*/true, 5720 llvm::AtomicOrdering::Monotonic, Loc, 5721 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5722 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5723 PrivateScope.addPrivate( 5724 VD, [&CGF, VD, XRValue, Loc]() { 5725 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5726 CGF.emitOMPSimpleStore( 5727 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5728 VD->getType().getNonReferenceType(), Loc); 5729 return LHSTemp; 5730 }); 5731 (void)PrivateScope.Privatize(); 5732 return CGF.EmitAnyExpr(UpExpr); 5733 }); 5734 }; 5735 if ((*IPriv)->getType()->isArrayType()) { 5736 // Emit atomic reduction for array section. 5737 const auto *RHSVar = 5738 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5739 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5740 AtomicRedGen, XExpr, EExpr, UpExpr); 5741 } else { 5742 // Emit atomic reduction for array subscript or single variable. 5743 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5744 } 5745 } else { 5746 // Emit as a critical region. 5747 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5748 const Expr *, const Expr *) { 5749 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5750 std::string Name = RT.getName({"atomic_reduction"}); 5751 RT.emitCriticalRegion( 5752 CGF, Name, 5753 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5754 Action.Enter(CGF); 5755 emitReductionCombiner(CGF, E); 5756 }, 5757 Loc); 5758 }; 5759 if ((*IPriv)->getType()->isArrayType()) { 5760 const auto *LHSVar = 5761 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5762 const auto *RHSVar = 5763 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5764 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5765 CritRedGen); 5766 } else { 5767 CritRedGen(CGF, nullptr, nullptr, nullptr); 5768 } 5769 } 5770 ++ILHS; 5771 ++IRHS; 5772 ++IPriv; 5773 } 5774 }; 5775 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5776 if (!WithNowait) { 5777 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5778 llvm::Value *EndArgs[] = { 5779 IdentTLoc, // ident_t *<loc> 5780 ThreadId, // i32 <gtid> 5781 Lock // kmp_critical_name *&<lock> 5782 }; 5783 CommonActionTy Action(nullptr, llvm::None, 5784 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5785 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5786 EndArgs); 5787 AtomicRCG.setAction(Action); 5788 AtomicRCG(CGF); 5789 } else { 5790 AtomicRCG(CGF); 5791 } 5792 5793 CGF.EmitBranch(DefaultBB); 5794 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5795 } 5796 5797 /// Generates unique name for artificial threadprivate variables. 5798 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5799 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5800 const Expr *Ref) { 5801 SmallString<256> Buffer; 5802 llvm::raw_svector_ostream Out(Buffer); 5803 const clang::DeclRefExpr *DE; 5804 const VarDecl *D = ::getBaseDecl(Ref, DE); 5805 if (!D) 5806 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5807 D = D->getCanonicalDecl(); 5808 std::string Name = CGM.getOpenMPRuntime().getName( 5809 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5810 Out << Prefix << Name << "_" 5811 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5812 return std::string(Out.str()); 5813 } 5814 5815 /// Emits reduction initializer function: 5816 /// \code 5817 /// void @.red_init(void* %arg, void* %orig) { 5818 /// %0 = bitcast void* %arg to <type>* 5819 /// store <type> <init>, <type>* %0 5820 /// ret void 5821 /// } 5822 /// \endcode 5823 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5824 SourceLocation Loc, 5825 ReductionCodeGen &RCG, unsigned N) { 5826 ASTContext &C = CGM.getContext(); 5827 QualType VoidPtrTy = C.VoidPtrTy; 5828 VoidPtrTy.addRestrict(); 5829 FunctionArgList Args; 5830 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5831 ImplicitParamDecl::Other); 5832 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5833 ImplicitParamDecl::Other); 5834 Args.emplace_back(&Param); 5835 Args.emplace_back(&ParamOrig); 5836 const auto &FnInfo = 5837 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5838 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5839 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5840 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5841 Name, &CGM.getModule()); 5842 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5843 Fn->setDoesNotRecurse(); 5844 CodeGenFunction CGF(CGM); 5845 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5846 Address PrivateAddr = CGF.EmitLoadOfPointer( 5847 CGF.GetAddrOfLocalVar(&Param), 5848 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5849 llvm::Value *Size = nullptr; 5850 // If the size of the reduction item is non-constant, load it from global 5851 // threadprivate variable. 5852 if (RCG.getSizes(N).second) { 5853 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5854 CGF, CGM.getContext().getSizeType(), 5855 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5856 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5857 CGM.getContext().getSizeType(), Loc); 5858 } 5859 RCG.emitAggregateType(CGF, N, Size); 5860 LValue OrigLVal; 5861 // If initializer uses initializer from declare reduction construct, emit a 5862 // pointer to the address of the original reduction item (reuired by reduction 5863 // initializer) 5864 if (RCG.usesReductionInitializer(N)) { 5865 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5866 SharedAddr = CGF.EmitLoadOfPointer( 5867 SharedAddr, 5868 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5869 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5870 } else { 5871 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5872 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5873 CGM.getContext().VoidPtrTy); 5874 } 5875 // Emit the initializer: 5876 // %0 = bitcast void* %arg to <type>* 5877 // store <type> <init>, <type>* %0 5878 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5879 [](CodeGenFunction &) { return false; }); 5880 CGF.FinishFunction(); 5881 return Fn; 5882 } 5883 5884 /// Emits reduction combiner function: 5885 /// \code 5886 /// void @.red_comb(void* %arg0, void* %arg1) { 5887 /// %lhs = bitcast void* %arg0 to <type>* 5888 /// %rhs = bitcast void* %arg1 to <type>* 5889 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5890 /// store <type> %2, <type>* %lhs 5891 /// ret void 5892 /// } 5893 /// \endcode 5894 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5895 SourceLocation Loc, 5896 ReductionCodeGen &RCG, unsigned N, 5897 const Expr *ReductionOp, 5898 const Expr *LHS, const Expr *RHS, 5899 const Expr *PrivateRef) { 5900 ASTContext &C = CGM.getContext(); 5901 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5902 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5903 FunctionArgList Args; 5904 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5905 C.VoidPtrTy, ImplicitParamDecl::Other); 5906 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5907 ImplicitParamDecl::Other); 5908 Args.emplace_back(&ParamInOut); 5909 Args.emplace_back(&ParamIn); 5910 const auto &FnInfo = 5911 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5912 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5913 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5914 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5915 Name, &CGM.getModule()); 5916 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5917 Fn->setDoesNotRecurse(); 5918 CodeGenFunction CGF(CGM); 5919 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5920 llvm::Value *Size = nullptr; 5921 // If the size of the reduction item is non-constant, load it from global 5922 // threadprivate variable. 5923 if (RCG.getSizes(N).second) { 5924 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5925 CGF, CGM.getContext().getSizeType(), 5926 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5927 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5928 CGM.getContext().getSizeType(), Loc); 5929 } 5930 RCG.emitAggregateType(CGF, N, Size); 5931 // Remap lhs and rhs variables to the addresses of the function arguments. 5932 // %lhs = bitcast void* %arg0 to <type>* 5933 // %rhs = bitcast void* %arg1 to <type>* 5934 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5935 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5936 // Pull out the pointer to the variable. 5937 Address PtrAddr = CGF.EmitLoadOfPointer( 5938 CGF.GetAddrOfLocalVar(&ParamInOut), 5939 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5940 return CGF.Builder.CreateElementBitCast( 5941 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5942 }); 5943 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5944 // Pull out the pointer to the variable. 5945 Address PtrAddr = CGF.EmitLoadOfPointer( 5946 CGF.GetAddrOfLocalVar(&ParamIn), 5947 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5948 return CGF.Builder.CreateElementBitCast( 5949 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5950 }); 5951 PrivateScope.Privatize(); 5952 // Emit the combiner body: 5953 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5954 // store <type> %2, <type>* %lhs 5955 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5956 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5957 cast<DeclRefExpr>(RHS)); 5958 CGF.FinishFunction(); 5959 return Fn; 5960 } 5961 5962 /// Emits reduction finalizer function: 5963 /// \code 5964 /// void @.red_fini(void* %arg) { 5965 /// %0 = bitcast void* %arg to <type>* 5966 /// <destroy>(<type>* %0) 5967 /// ret void 5968 /// } 5969 /// \endcode 5970 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5971 SourceLocation Loc, 5972 ReductionCodeGen &RCG, unsigned N) { 5973 if (!RCG.needCleanups(N)) 5974 return nullptr; 5975 ASTContext &C = CGM.getContext(); 5976 FunctionArgList Args; 5977 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5978 ImplicitParamDecl::Other); 5979 Args.emplace_back(&Param); 5980 const auto &FnInfo = 5981 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5982 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5983 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5984 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5985 Name, &CGM.getModule()); 5986 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5987 Fn->setDoesNotRecurse(); 5988 CodeGenFunction CGF(CGM); 5989 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5990 Address PrivateAddr = CGF.EmitLoadOfPointer( 5991 CGF.GetAddrOfLocalVar(&Param), 5992 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5993 llvm::Value *Size = nullptr; 5994 // If the size of the reduction item is non-constant, load it from global 5995 // threadprivate variable. 5996 if (RCG.getSizes(N).second) { 5997 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5998 CGF, CGM.getContext().getSizeType(), 5999 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6000 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6001 CGM.getContext().getSizeType(), Loc); 6002 } 6003 RCG.emitAggregateType(CGF, N, Size); 6004 // Emit the finalizer body: 6005 // <destroy>(<type>* %0) 6006 RCG.emitCleanups(CGF, N, PrivateAddr); 6007 CGF.FinishFunction(Loc); 6008 return Fn; 6009 } 6010 6011 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6012 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6013 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6014 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6015 return nullptr; 6016 6017 // Build typedef struct: 6018 // kmp_taskred_input { 6019 // void *reduce_shar; // shared reduction item 6020 // void *reduce_orig; // original reduction item used for initialization 6021 // size_t reduce_size; // size of data item 6022 // void *reduce_init; // data initialization routine 6023 // void *reduce_fini; // data finalization routine 6024 // void *reduce_comb; // data combiner routine 6025 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6026 // } kmp_taskred_input_t; 6027 ASTContext &C = CGM.getContext(); 6028 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6029 RD->startDefinition(); 6030 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6031 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6032 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6033 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6034 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6035 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6036 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6037 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6038 RD->completeDefinition(); 6039 QualType RDType = C.getRecordType(RD); 6040 unsigned Size = Data.ReductionVars.size(); 6041 llvm::APInt ArraySize(/*numBits=*/64, Size); 6042 QualType ArrayRDType = C.getConstantArrayType( 6043 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6044 // kmp_task_red_input_t .rd_input.[Size]; 6045 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6046 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6047 Data.ReductionCopies, Data.ReductionOps); 6048 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6049 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6050 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6051 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6052 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6053 TaskRedInput.getPointer(), Idxs, 6054 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6055 ".rd_input.gep."); 6056 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6057 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6058 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6059 RCG.emitSharedOrigLValue(CGF, Cnt); 6060 llvm::Value *CastedShared = 6061 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6062 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6063 // ElemLVal.reduce_orig = &Origs[Cnt]; 6064 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6065 llvm::Value *CastedOrig = 6066 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6067 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6068 RCG.emitAggregateType(CGF, Cnt); 6069 llvm::Value *SizeValInChars; 6070 llvm::Value *SizeVal; 6071 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6072 // We use delayed creation/initialization for VLAs and array sections. It is 6073 // required because runtime does not provide the way to pass the sizes of 6074 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6075 // threadprivate global variables are used to store these values and use 6076 // them in the functions. 6077 bool DelayedCreation = !!SizeVal; 6078 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6079 /*isSigned=*/false); 6080 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6081 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6082 // ElemLVal.reduce_init = init; 6083 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6084 llvm::Value *InitAddr = 6085 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6086 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6087 // ElemLVal.reduce_fini = fini; 6088 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6089 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6090 llvm::Value *FiniAddr = Fini 6091 ? CGF.EmitCastToVoidPtr(Fini) 6092 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6093 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6094 // ElemLVal.reduce_comb = comb; 6095 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6096 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6097 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6098 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6099 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6100 // ElemLVal.flags = 0; 6101 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6102 if (DelayedCreation) { 6103 CGF.EmitStoreOfScalar( 6104 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6105 FlagsLVal); 6106 } else 6107 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6108 FlagsLVal.getType()); 6109 } 6110 if (Data.IsReductionWithTaskMod) { 6111 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6112 // is_ws, int num, void *data); 6113 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6114 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6115 CGM.IntTy, /*isSigned=*/true); 6116 llvm::Value *Args[] = { 6117 IdentTLoc, GTid, 6118 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6119 /*isSigned=*/true), 6120 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6121 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6122 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6123 return CGF.EmitRuntimeCall( 6124 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6125 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6126 Args); 6127 } 6128 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6129 llvm::Value *Args[] = { 6130 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6131 /*isSigned=*/true), 6132 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6133 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6134 CGM.VoidPtrTy)}; 6135 return CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6136 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6137 Args); 6138 } 6139 6140 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6141 SourceLocation Loc, 6142 bool IsWorksharingReduction) { 6143 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6144 // is_ws, int num, void *data); 6145 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6146 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6147 CGM.IntTy, /*isSigned=*/true); 6148 llvm::Value *Args[] = {IdentTLoc, GTid, 6149 llvm::ConstantInt::get(CGM.IntTy, 6150 IsWorksharingReduction ? 1 : 0, 6151 /*isSigned=*/true)}; 6152 (void)CGF.EmitRuntimeCall( 6153 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6154 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6155 Args); 6156 } 6157 6158 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6159 SourceLocation Loc, 6160 ReductionCodeGen &RCG, 6161 unsigned N) { 6162 auto Sizes = RCG.getSizes(N); 6163 // Emit threadprivate global variable if the type is non-constant 6164 // (Sizes.second = nullptr). 6165 if (Sizes.second) { 6166 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6167 /*isSigned=*/false); 6168 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6169 CGF, CGM.getContext().getSizeType(), 6170 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6171 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6172 } 6173 } 6174 6175 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6176 SourceLocation Loc, 6177 llvm::Value *ReductionsPtr, 6178 LValue SharedLVal) { 6179 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6180 // *d); 6181 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6182 CGM.IntTy, 6183 /*isSigned=*/true), 6184 ReductionsPtr, 6185 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6186 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6187 return Address( 6188 CGF.EmitRuntimeCall( 6189 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6190 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6191 Args), 6192 SharedLVal.getAlignment()); 6193 } 6194 6195 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6196 SourceLocation Loc) { 6197 if (!CGF.HaveInsertPoint()) 6198 return; 6199 6200 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6201 if (OMPBuilder) { 6202 OMPBuilder->CreateTaskwait(CGF.Builder); 6203 } else { 6204 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6205 // global_tid); 6206 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6207 // Ignore return result until untied tasks are supported. 6208 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6209 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6210 Args); 6211 } 6212 6213 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6214 Region->emitUntiedSwitch(CGF); 6215 } 6216 6217 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6218 OpenMPDirectiveKind InnerKind, 6219 const RegionCodeGenTy &CodeGen, 6220 bool HasCancel) { 6221 if (!CGF.HaveInsertPoint()) 6222 return; 6223 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6224 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6225 } 6226 6227 namespace { 6228 enum RTCancelKind { 6229 CancelNoreq = 0, 6230 CancelParallel = 1, 6231 CancelLoop = 2, 6232 CancelSections = 3, 6233 CancelTaskgroup = 4 6234 }; 6235 } // anonymous namespace 6236 6237 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6238 RTCancelKind CancelKind = CancelNoreq; 6239 if (CancelRegion == OMPD_parallel) 6240 CancelKind = CancelParallel; 6241 else if (CancelRegion == OMPD_for) 6242 CancelKind = CancelLoop; 6243 else if (CancelRegion == OMPD_sections) 6244 CancelKind = CancelSections; 6245 else { 6246 assert(CancelRegion == OMPD_taskgroup); 6247 CancelKind = CancelTaskgroup; 6248 } 6249 return CancelKind; 6250 } 6251 6252 void CGOpenMPRuntime::emitCancellationPointCall( 6253 CodeGenFunction &CGF, SourceLocation Loc, 6254 OpenMPDirectiveKind CancelRegion) { 6255 if (!CGF.HaveInsertPoint()) 6256 return; 6257 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6258 // global_tid, kmp_int32 cncl_kind); 6259 if (auto *OMPRegionInfo = 6260 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6261 // For 'cancellation point taskgroup', the task region info may not have a 6262 // cancel. This may instead happen in another adjacent task. 6263 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6264 llvm::Value *Args[] = { 6265 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6266 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6267 // Ignore return result until untied tasks are supported. 6268 llvm::Value *Result = CGF.EmitRuntimeCall( 6269 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6270 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6271 Args); 6272 // if (__kmpc_cancellationpoint()) { 6273 // exit from construct; 6274 // } 6275 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6276 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6277 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6278 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6279 CGF.EmitBlock(ExitBB); 6280 // exit from construct; 6281 CodeGenFunction::JumpDest CancelDest = 6282 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6283 CGF.EmitBranchThroughCleanup(CancelDest); 6284 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6285 } 6286 } 6287 } 6288 6289 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6290 const Expr *IfCond, 6291 OpenMPDirectiveKind CancelRegion) { 6292 if (!CGF.HaveInsertPoint()) 6293 return; 6294 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6295 // kmp_int32 cncl_kind); 6296 auto &M = CGM.getModule(); 6297 if (auto *OMPRegionInfo = 6298 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6299 auto &&ThenGen = [&M, Loc, CancelRegion, 6300 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6301 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6302 llvm::Value *Args[] = { 6303 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6304 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6305 // Ignore return result until untied tasks are supported. 6306 llvm::Value *Result = 6307 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6308 M, OMPRTL___kmpc_cancel), 6309 Args); 6310 // if (__kmpc_cancel()) { 6311 // exit from construct; 6312 // } 6313 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6314 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6315 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6316 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6317 CGF.EmitBlock(ExitBB); 6318 // exit from construct; 6319 CodeGenFunction::JumpDest CancelDest = 6320 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6321 CGF.EmitBranchThroughCleanup(CancelDest); 6322 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6323 }; 6324 if (IfCond) { 6325 emitIfClause(CGF, IfCond, ThenGen, 6326 [](CodeGenFunction &, PrePostActionTy &) {}); 6327 } else { 6328 RegionCodeGenTy ThenRCG(ThenGen); 6329 ThenRCG(CGF); 6330 } 6331 } 6332 } 6333 6334 namespace { 6335 /// Cleanup action for uses_allocators support. 6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6337 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6338 6339 public: 6340 OMPUsesAllocatorsActionTy( 6341 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6342 : Allocators(Allocators) {} 6343 void Enter(CodeGenFunction &CGF) override { 6344 if (!CGF.HaveInsertPoint()) 6345 return; 6346 for (const auto &AllocatorData : Allocators) { 6347 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6348 CGF, AllocatorData.first, AllocatorData.second); 6349 } 6350 } 6351 void Exit(CodeGenFunction &CGF) override { 6352 if (!CGF.HaveInsertPoint()) 6353 return; 6354 for (const auto &AllocatorData : Allocators) { 6355 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6356 AllocatorData.first); 6357 } 6358 } 6359 }; 6360 } // namespace 6361 6362 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6363 const OMPExecutableDirective &D, StringRef ParentName, 6364 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6365 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6366 assert(!ParentName.empty() && "Invalid target region parent name!"); 6367 HasEmittedTargetRegion = true; 6368 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6369 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6370 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6371 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6372 if (!D.AllocatorTraits) 6373 continue; 6374 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6375 } 6376 } 6377 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6378 CodeGen.setAction(UsesAllocatorAction); 6379 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6380 IsOffloadEntry, CodeGen); 6381 } 6382 6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6384 const Expr *Allocator, 6385 const Expr *AllocatorTraits) { 6386 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6387 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6388 // Use default memspace handle. 6389 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6390 llvm::Value *NumTraits = llvm::ConstantInt::get( 6391 CGF.IntTy, cast<ConstantArrayType>( 6392 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6393 ->getSize() 6394 .getLimitedValue()); 6395 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6396 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6397 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6398 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6399 AllocatorTraitsLVal.getBaseInfo(), 6400 AllocatorTraitsLVal.getTBAAInfo()); 6401 llvm::Value *Traits = 6402 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6403 6404 llvm::Value *AllocatorVal = 6405 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6406 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6407 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6408 // Store to allocator. 6409 CGF.EmitVarDecl(*cast<VarDecl>( 6410 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6411 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6412 AllocatorVal = 6413 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6414 Allocator->getType(), Allocator->getExprLoc()); 6415 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6416 } 6417 6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6419 const Expr *Allocator) { 6420 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6421 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6422 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6423 llvm::Value *AllocatorVal = 6424 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6425 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6426 CGF.getContext().VoidPtrTy, 6427 Allocator->getExprLoc()); 6428 (void)CGF.EmitRuntimeCall( 6429 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6430 CGM.getModule(), OMPRTL___kmpc_destroy_allocator), 6431 {ThreadId, AllocatorVal}); 6432 } 6433 6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6435 const OMPExecutableDirective &D, StringRef ParentName, 6436 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6437 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6438 // Create a unique name for the entry function using the source location 6439 // information of the current target region. The name will be something like: 6440 // 6441 // __omp_offloading_DD_FFFF_PP_lBB 6442 // 6443 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6444 // mangled name of the function that encloses the target region and BB is the 6445 // line number of the target region. 6446 6447 unsigned DeviceID; 6448 unsigned FileID; 6449 unsigned Line; 6450 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6451 Line); 6452 SmallString<64> EntryFnName; 6453 { 6454 llvm::raw_svector_ostream OS(EntryFnName); 6455 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6456 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6457 } 6458 6459 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6460 6461 CodeGenFunction CGF(CGM, true); 6462 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6463 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6464 6465 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6466 6467 // If this target outline function is not an offload entry, we don't need to 6468 // register it. 6469 if (!IsOffloadEntry) 6470 return; 6471 6472 // The target region ID is used by the runtime library to identify the current 6473 // target region, so it only has to be unique and not necessarily point to 6474 // anything. It could be the pointer to the outlined function that implements 6475 // the target region, but we aren't using that so that the compiler doesn't 6476 // need to keep that, and could therefore inline the host function if proven 6477 // worthwhile during optimization. In the other hand, if emitting code for the 6478 // device, the ID has to be the function address so that it can retrieved from 6479 // the offloading entry and launched by the runtime library. We also mark the 6480 // outlined function to have external linkage in case we are emitting code for 6481 // the device, because these functions will be entry points to the device. 6482 6483 if (CGM.getLangOpts().OpenMPIsDevice) { 6484 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6485 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6486 OutlinedFn->setDSOLocal(false); 6487 } else { 6488 std::string Name = getName({EntryFnName, "region_id"}); 6489 OutlinedFnID = new llvm::GlobalVariable( 6490 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6491 llvm::GlobalValue::WeakAnyLinkage, 6492 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6493 } 6494 6495 // Register the information for the entry associated with this target region. 6496 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6497 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6498 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6499 } 6500 6501 /// Checks if the expression is constant or does not have non-trivial function 6502 /// calls. 6503 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6504 // We can skip constant expressions. 6505 // We can skip expressions with trivial calls or simple expressions. 6506 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6507 !E->hasNonTrivialCall(Ctx)) && 6508 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6509 } 6510 6511 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6512 const Stmt *Body) { 6513 const Stmt *Child = Body->IgnoreContainers(); 6514 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6515 Child = nullptr; 6516 for (const Stmt *S : C->body()) { 6517 if (const auto *E = dyn_cast<Expr>(S)) { 6518 if (isTrivial(Ctx, E)) 6519 continue; 6520 } 6521 // Some of the statements can be ignored. 6522 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6523 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6524 continue; 6525 // Analyze declarations. 6526 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6527 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6528 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6529 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6530 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6531 isa<UsingDirectiveDecl>(D) || 6532 isa<OMPDeclareReductionDecl>(D) || 6533 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6534 return true; 6535 const auto *VD = dyn_cast<VarDecl>(D); 6536 if (!VD) 6537 return false; 6538 return VD->isConstexpr() || 6539 ((VD->getType().isTrivialType(Ctx) || 6540 VD->getType()->isReferenceType()) && 6541 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6542 })) 6543 continue; 6544 } 6545 // Found multiple children - cannot get the one child only. 6546 if (Child) 6547 return nullptr; 6548 Child = S; 6549 } 6550 if (Child) 6551 Child = Child->IgnoreContainers(); 6552 } 6553 return Child; 6554 } 6555 6556 /// Emit the number of teams for a target directive. Inspect the num_teams 6557 /// clause associated with a teams construct combined or closely nested 6558 /// with the target directive. 6559 /// 6560 /// Emit a team of size one for directives such as 'target parallel' that 6561 /// have no associated teams construct. 6562 /// 6563 /// Otherwise, return nullptr. 6564 static llvm::Value * 6565 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6566 const OMPExecutableDirective &D) { 6567 assert(!CGF.getLangOpts().OpenMPIsDevice && 6568 "Clauses associated with the teams directive expected to be emitted " 6569 "only for the host!"); 6570 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6571 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6572 "Expected target-based executable directive."); 6573 CGBuilderTy &Bld = CGF.Builder; 6574 switch (DirectiveKind) { 6575 case OMPD_target: { 6576 const auto *CS = D.getInnermostCapturedStmt(); 6577 const auto *Body = 6578 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6579 const Stmt *ChildStmt = 6580 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6581 if (const auto *NestedDir = 6582 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6583 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6584 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6585 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6586 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6587 const Expr *NumTeams = 6588 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6589 llvm::Value *NumTeamsVal = 6590 CGF.EmitScalarExpr(NumTeams, 6591 /*IgnoreResultAssign*/ true); 6592 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6593 /*isSigned=*/true); 6594 } 6595 return Bld.getInt32(0); 6596 } 6597 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6598 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6599 return Bld.getInt32(1); 6600 return Bld.getInt32(0); 6601 } 6602 return nullptr; 6603 } 6604 case OMPD_target_teams: 6605 case OMPD_target_teams_distribute: 6606 case OMPD_target_teams_distribute_simd: 6607 case OMPD_target_teams_distribute_parallel_for: 6608 case OMPD_target_teams_distribute_parallel_for_simd: { 6609 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6610 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6611 const Expr *NumTeams = 6612 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6613 llvm::Value *NumTeamsVal = 6614 CGF.EmitScalarExpr(NumTeams, 6615 /*IgnoreResultAssign*/ true); 6616 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6617 /*isSigned=*/true); 6618 } 6619 return Bld.getInt32(0); 6620 } 6621 case OMPD_target_parallel: 6622 case OMPD_target_parallel_for: 6623 case OMPD_target_parallel_for_simd: 6624 case OMPD_target_simd: 6625 return Bld.getInt32(1); 6626 case OMPD_parallel: 6627 case OMPD_for: 6628 case OMPD_parallel_for: 6629 case OMPD_parallel_master: 6630 case OMPD_parallel_sections: 6631 case OMPD_for_simd: 6632 case OMPD_parallel_for_simd: 6633 case OMPD_cancel: 6634 case OMPD_cancellation_point: 6635 case OMPD_ordered: 6636 case OMPD_threadprivate: 6637 case OMPD_allocate: 6638 case OMPD_task: 6639 case OMPD_simd: 6640 case OMPD_sections: 6641 case OMPD_section: 6642 case OMPD_single: 6643 case OMPD_master: 6644 case OMPD_critical: 6645 case OMPD_taskyield: 6646 case OMPD_barrier: 6647 case OMPD_taskwait: 6648 case OMPD_taskgroup: 6649 case OMPD_atomic: 6650 case OMPD_flush: 6651 case OMPD_depobj: 6652 case OMPD_scan: 6653 case OMPD_teams: 6654 case OMPD_target_data: 6655 case OMPD_target_exit_data: 6656 case OMPD_target_enter_data: 6657 case OMPD_distribute: 6658 case OMPD_distribute_simd: 6659 case OMPD_distribute_parallel_for: 6660 case OMPD_distribute_parallel_for_simd: 6661 case OMPD_teams_distribute: 6662 case OMPD_teams_distribute_simd: 6663 case OMPD_teams_distribute_parallel_for: 6664 case OMPD_teams_distribute_parallel_for_simd: 6665 case OMPD_target_update: 6666 case OMPD_declare_simd: 6667 case OMPD_declare_variant: 6668 case OMPD_begin_declare_variant: 6669 case OMPD_end_declare_variant: 6670 case OMPD_declare_target: 6671 case OMPD_end_declare_target: 6672 case OMPD_declare_reduction: 6673 case OMPD_declare_mapper: 6674 case OMPD_taskloop: 6675 case OMPD_taskloop_simd: 6676 case OMPD_master_taskloop: 6677 case OMPD_master_taskloop_simd: 6678 case OMPD_parallel_master_taskloop: 6679 case OMPD_parallel_master_taskloop_simd: 6680 case OMPD_requires: 6681 case OMPD_unknown: 6682 break; 6683 } 6684 llvm_unreachable("Unexpected directive kind."); 6685 } 6686 6687 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6688 llvm::Value *DefaultThreadLimitVal) { 6689 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6690 CGF.getContext(), CS->getCapturedStmt()); 6691 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6692 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6693 llvm::Value *NumThreads = nullptr; 6694 llvm::Value *CondVal = nullptr; 6695 // Handle if clause. If if clause present, the number of threads is 6696 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6697 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6698 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6699 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6700 const OMPIfClause *IfClause = nullptr; 6701 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6702 if (C->getNameModifier() == OMPD_unknown || 6703 C->getNameModifier() == OMPD_parallel) { 6704 IfClause = C; 6705 break; 6706 } 6707 } 6708 if (IfClause) { 6709 const Expr *Cond = IfClause->getCondition(); 6710 bool Result; 6711 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6712 if (!Result) 6713 return CGF.Builder.getInt32(1); 6714 } else { 6715 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6716 if (const auto *PreInit = 6717 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6718 for (const auto *I : PreInit->decls()) { 6719 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6720 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6721 } else { 6722 CodeGenFunction::AutoVarEmission Emission = 6723 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6724 CGF.EmitAutoVarCleanups(Emission); 6725 } 6726 } 6727 } 6728 CondVal = CGF.EvaluateExprAsBool(Cond); 6729 } 6730 } 6731 } 6732 // Check the value of num_threads clause iff if clause was not specified 6733 // or is not evaluated to false. 6734 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6735 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6736 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6737 const auto *NumThreadsClause = 6738 Dir->getSingleClause<OMPNumThreadsClause>(); 6739 CodeGenFunction::LexicalScope Scope( 6740 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6741 if (const auto *PreInit = 6742 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6743 for (const auto *I : PreInit->decls()) { 6744 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6745 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6746 } else { 6747 CodeGenFunction::AutoVarEmission Emission = 6748 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6749 CGF.EmitAutoVarCleanups(Emission); 6750 } 6751 } 6752 } 6753 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6754 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6755 /*isSigned=*/false); 6756 if (DefaultThreadLimitVal) 6757 NumThreads = CGF.Builder.CreateSelect( 6758 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6759 DefaultThreadLimitVal, NumThreads); 6760 } else { 6761 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6762 : CGF.Builder.getInt32(0); 6763 } 6764 // Process condition of the if clause. 6765 if (CondVal) { 6766 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6767 CGF.Builder.getInt32(1)); 6768 } 6769 return NumThreads; 6770 } 6771 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6772 return CGF.Builder.getInt32(1); 6773 return DefaultThreadLimitVal; 6774 } 6775 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6776 : CGF.Builder.getInt32(0); 6777 } 6778 6779 /// Emit the number of threads for a target directive. Inspect the 6780 /// thread_limit clause associated with a teams construct combined or closely 6781 /// nested with the target directive. 6782 /// 6783 /// Emit the num_threads clause for directives such as 'target parallel' that 6784 /// have no associated teams construct. 6785 /// 6786 /// Otherwise, return nullptr. 6787 static llvm::Value * 6788 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6789 const OMPExecutableDirective &D) { 6790 assert(!CGF.getLangOpts().OpenMPIsDevice && 6791 "Clauses associated with the teams directive expected to be emitted " 6792 "only for the host!"); 6793 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6794 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6795 "Expected target-based executable directive."); 6796 CGBuilderTy &Bld = CGF.Builder; 6797 llvm::Value *ThreadLimitVal = nullptr; 6798 llvm::Value *NumThreadsVal = nullptr; 6799 switch (DirectiveKind) { 6800 case OMPD_target: { 6801 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6802 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6803 return NumThreads; 6804 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6805 CGF.getContext(), CS->getCapturedStmt()); 6806 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6807 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6808 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6809 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6810 const auto *ThreadLimitClause = 6811 Dir->getSingleClause<OMPThreadLimitClause>(); 6812 CodeGenFunction::LexicalScope Scope( 6813 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6814 if (const auto *PreInit = 6815 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6816 for (const auto *I : PreInit->decls()) { 6817 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6818 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6819 } else { 6820 CodeGenFunction::AutoVarEmission Emission = 6821 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6822 CGF.EmitAutoVarCleanups(Emission); 6823 } 6824 } 6825 } 6826 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6827 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6828 ThreadLimitVal = 6829 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6830 } 6831 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6832 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6833 CS = Dir->getInnermostCapturedStmt(); 6834 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6835 CGF.getContext(), CS->getCapturedStmt()); 6836 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6837 } 6838 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6839 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6840 CS = Dir->getInnermostCapturedStmt(); 6841 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6842 return NumThreads; 6843 } 6844 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6845 return Bld.getInt32(1); 6846 } 6847 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6848 } 6849 case OMPD_target_teams: { 6850 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6851 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6852 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6853 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6854 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6855 ThreadLimitVal = 6856 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6857 } 6858 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6859 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6860 return NumThreads; 6861 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6862 CGF.getContext(), CS->getCapturedStmt()); 6863 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6864 if (Dir->getDirectiveKind() == OMPD_distribute) { 6865 CS = Dir->getInnermostCapturedStmt(); 6866 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6867 return NumThreads; 6868 } 6869 } 6870 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6871 } 6872 case OMPD_target_teams_distribute: 6873 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6874 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6875 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6876 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6877 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6878 ThreadLimitVal = 6879 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6880 } 6881 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6882 case OMPD_target_parallel: 6883 case OMPD_target_parallel_for: 6884 case OMPD_target_parallel_for_simd: 6885 case OMPD_target_teams_distribute_parallel_for: 6886 case OMPD_target_teams_distribute_parallel_for_simd: { 6887 llvm::Value *CondVal = nullptr; 6888 // Handle if clause. If if clause present, the number of threads is 6889 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6890 if (D.hasClausesOfKind<OMPIfClause>()) { 6891 const OMPIfClause *IfClause = nullptr; 6892 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6893 if (C->getNameModifier() == OMPD_unknown || 6894 C->getNameModifier() == OMPD_parallel) { 6895 IfClause = C; 6896 break; 6897 } 6898 } 6899 if (IfClause) { 6900 const Expr *Cond = IfClause->getCondition(); 6901 bool Result; 6902 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6903 if (!Result) 6904 return Bld.getInt32(1); 6905 } else { 6906 CodeGenFunction::RunCleanupsScope Scope(CGF); 6907 CondVal = CGF.EvaluateExprAsBool(Cond); 6908 } 6909 } 6910 } 6911 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6912 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6913 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6914 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6915 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6916 ThreadLimitVal = 6917 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6918 } 6919 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6920 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6921 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6922 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6923 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6924 NumThreadsVal = 6925 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6926 ThreadLimitVal = ThreadLimitVal 6927 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6928 ThreadLimitVal), 6929 NumThreadsVal, ThreadLimitVal) 6930 : NumThreadsVal; 6931 } 6932 if (!ThreadLimitVal) 6933 ThreadLimitVal = Bld.getInt32(0); 6934 if (CondVal) 6935 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6936 return ThreadLimitVal; 6937 } 6938 case OMPD_target_teams_distribute_simd: 6939 case OMPD_target_simd: 6940 return Bld.getInt32(1); 6941 case OMPD_parallel: 6942 case OMPD_for: 6943 case OMPD_parallel_for: 6944 case OMPD_parallel_master: 6945 case OMPD_parallel_sections: 6946 case OMPD_for_simd: 6947 case OMPD_parallel_for_simd: 6948 case OMPD_cancel: 6949 case OMPD_cancellation_point: 6950 case OMPD_ordered: 6951 case OMPD_threadprivate: 6952 case OMPD_allocate: 6953 case OMPD_task: 6954 case OMPD_simd: 6955 case OMPD_sections: 6956 case OMPD_section: 6957 case OMPD_single: 6958 case OMPD_master: 6959 case OMPD_critical: 6960 case OMPD_taskyield: 6961 case OMPD_barrier: 6962 case OMPD_taskwait: 6963 case OMPD_taskgroup: 6964 case OMPD_atomic: 6965 case OMPD_flush: 6966 case OMPD_depobj: 6967 case OMPD_scan: 6968 case OMPD_teams: 6969 case OMPD_target_data: 6970 case OMPD_target_exit_data: 6971 case OMPD_target_enter_data: 6972 case OMPD_distribute: 6973 case OMPD_distribute_simd: 6974 case OMPD_distribute_parallel_for: 6975 case OMPD_distribute_parallel_for_simd: 6976 case OMPD_teams_distribute: 6977 case OMPD_teams_distribute_simd: 6978 case OMPD_teams_distribute_parallel_for: 6979 case OMPD_teams_distribute_parallel_for_simd: 6980 case OMPD_target_update: 6981 case OMPD_declare_simd: 6982 case OMPD_declare_variant: 6983 case OMPD_begin_declare_variant: 6984 case OMPD_end_declare_variant: 6985 case OMPD_declare_target: 6986 case OMPD_end_declare_target: 6987 case OMPD_declare_reduction: 6988 case OMPD_declare_mapper: 6989 case OMPD_taskloop: 6990 case OMPD_taskloop_simd: 6991 case OMPD_master_taskloop: 6992 case OMPD_master_taskloop_simd: 6993 case OMPD_parallel_master_taskloop: 6994 case OMPD_parallel_master_taskloop_simd: 6995 case OMPD_requires: 6996 case OMPD_unknown: 6997 break; 6998 } 6999 llvm_unreachable("Unsupported directive kind."); 7000 } 7001 7002 namespace { 7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7004 7005 // Utility to handle information from clauses associated with a given 7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7007 // It provides a convenient interface to obtain the information and generate 7008 // code for that information. 7009 class MappableExprsHandler { 7010 public: 7011 /// Values for bit flags used to specify the mapping type for 7012 /// offloading. 7013 enum OpenMPOffloadMappingFlags : uint64_t { 7014 /// No flags 7015 OMP_MAP_NONE = 0x0, 7016 /// Allocate memory on the device and move data from host to device. 7017 OMP_MAP_TO = 0x01, 7018 /// Allocate memory on the device and move data from device to host. 7019 OMP_MAP_FROM = 0x02, 7020 /// Always perform the requested mapping action on the element, even 7021 /// if it was already mapped before. 7022 OMP_MAP_ALWAYS = 0x04, 7023 /// Delete the element from the device environment, ignoring the 7024 /// current reference count associated with the element. 7025 OMP_MAP_DELETE = 0x08, 7026 /// The element being mapped is a pointer-pointee pair; both the 7027 /// pointer and the pointee should be mapped. 7028 OMP_MAP_PTR_AND_OBJ = 0x10, 7029 /// This flags signals that the base address of an entry should be 7030 /// passed to the target kernel as an argument. 7031 OMP_MAP_TARGET_PARAM = 0x20, 7032 /// Signal that the runtime library has to return the device pointer 7033 /// in the current position for the data being mapped. Used when we have the 7034 /// use_device_ptr clause. 7035 OMP_MAP_RETURN_PARAM = 0x40, 7036 /// This flag signals that the reference being passed is a pointer to 7037 /// private data. 7038 OMP_MAP_PRIVATE = 0x80, 7039 /// Pass the element to the device by value. 7040 OMP_MAP_LITERAL = 0x100, 7041 /// Implicit map 7042 OMP_MAP_IMPLICIT = 0x200, 7043 /// Close is a hint to the runtime to allocate memory close to 7044 /// the target device. 7045 OMP_MAP_CLOSE = 0x400, 7046 /// The 16 MSBs of the flags indicate whether the entry is member of some 7047 /// struct/class. 7048 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7049 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7050 }; 7051 7052 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7053 static unsigned getFlagMemberOffset() { 7054 unsigned Offset = 0; 7055 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7056 Remain = Remain >> 1) 7057 Offset++; 7058 return Offset; 7059 } 7060 7061 /// Class that associates information with a base pointer to be passed to the 7062 /// runtime library. 7063 class BasePointerInfo { 7064 /// The base pointer. 7065 llvm::Value *Ptr = nullptr; 7066 /// The base declaration that refers to this device pointer, or null if 7067 /// there is none. 7068 const ValueDecl *DevPtrDecl = nullptr; 7069 7070 public: 7071 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7072 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7073 llvm::Value *operator*() const { return Ptr; } 7074 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7075 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7076 }; 7077 7078 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7079 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7080 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7081 7082 /// Map between a struct and the its lowest & highest elements which have been 7083 /// mapped. 7084 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7085 /// HE(FieldIndex, Pointer)} 7086 struct StructRangeInfoTy { 7087 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7088 0, Address::invalid()}; 7089 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7090 0, Address::invalid()}; 7091 Address Base = Address::invalid(); 7092 }; 7093 7094 private: 7095 /// Kind that defines how a device pointer has to be returned. 7096 struct MapInfo { 7097 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7098 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7099 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7100 bool ReturnDevicePointer = false; 7101 bool IsImplicit = false; 7102 7103 MapInfo() = default; 7104 MapInfo( 7105 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7106 OpenMPMapClauseKind MapType, 7107 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7108 bool ReturnDevicePointer, bool IsImplicit) 7109 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7110 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7111 }; 7112 7113 /// If use_device_ptr is used on a pointer which is a struct member and there 7114 /// is no map information about it, then emission of that entry is deferred 7115 /// until the whole struct has been processed. 7116 struct DeferredDevicePtrEntryTy { 7117 const Expr *IE = nullptr; 7118 const ValueDecl *VD = nullptr; 7119 7120 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7121 : IE(IE), VD(VD) {} 7122 }; 7123 7124 /// The target directive from where the mappable clauses were extracted. It 7125 /// is either a executable directive or a user-defined mapper directive. 7126 llvm::PointerUnion<const OMPExecutableDirective *, 7127 const OMPDeclareMapperDecl *> 7128 CurDir; 7129 7130 /// Function the directive is being generated for. 7131 CodeGenFunction &CGF; 7132 7133 /// Set of all first private variables in the current directive. 7134 /// bool data is set to true if the variable is implicitly marked as 7135 /// firstprivate, false otherwise. 7136 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7137 7138 /// Map between device pointer declarations and their expression components. 7139 /// The key value for declarations in 'this' is null. 7140 llvm::DenseMap< 7141 const ValueDecl *, 7142 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7143 DevPointersMap; 7144 7145 llvm::Value *getExprTypeSize(const Expr *E) const { 7146 QualType ExprTy = E->getType().getCanonicalType(); 7147 7148 // Calculate the size for array shaping expression. 7149 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7150 llvm::Value *Size = 7151 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7152 for (const Expr *SE : OAE->getDimensions()) { 7153 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7154 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7155 CGF.getContext().getSizeType(), 7156 SE->getExprLoc()); 7157 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7158 } 7159 return Size; 7160 } 7161 7162 // Reference types are ignored for mapping purposes. 7163 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7164 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7165 7166 // Given that an array section is considered a built-in type, we need to 7167 // do the calculation based on the length of the section instead of relying 7168 // on CGF.getTypeSize(E->getType()). 7169 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7170 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7171 OAE->getBase()->IgnoreParenImpCasts()) 7172 .getCanonicalType(); 7173 7174 // If there is no length associated with the expression and lower bound is 7175 // not specified too, that means we are using the whole length of the 7176 // base. 7177 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7178 !OAE->getLowerBound()) 7179 return CGF.getTypeSize(BaseTy); 7180 7181 llvm::Value *ElemSize; 7182 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7183 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7184 } else { 7185 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7186 assert(ATy && "Expecting array type if not a pointer type."); 7187 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7188 } 7189 7190 // If we don't have a length at this point, that is because we have an 7191 // array section with a single element. 7192 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7193 return ElemSize; 7194 7195 if (const Expr *LenExpr = OAE->getLength()) { 7196 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7197 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7198 CGF.getContext().getSizeType(), 7199 LenExpr->getExprLoc()); 7200 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7201 } 7202 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7203 OAE->getLowerBound() && "expected array_section[lb:]."); 7204 // Size = sizetype - lb * elemtype; 7205 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7206 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7207 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7208 CGF.getContext().getSizeType(), 7209 OAE->getLowerBound()->getExprLoc()); 7210 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7211 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7212 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7213 LengthVal = CGF.Builder.CreateSelect( 7214 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7215 return LengthVal; 7216 } 7217 return CGF.getTypeSize(ExprTy); 7218 } 7219 7220 /// Return the corresponding bits for a given map clause modifier. Add 7221 /// a flag marking the map as a pointer if requested. Add a flag marking the 7222 /// map as the first one of a series of maps that relate to the same map 7223 /// expression. 7224 OpenMPOffloadMappingFlags getMapTypeBits( 7225 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7226 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7227 OpenMPOffloadMappingFlags Bits = 7228 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7229 switch (MapType) { 7230 case OMPC_MAP_alloc: 7231 case OMPC_MAP_release: 7232 // alloc and release is the default behavior in the runtime library, i.e. 7233 // if we don't pass any bits alloc/release that is what the runtime is 7234 // going to do. Therefore, we don't need to signal anything for these two 7235 // type modifiers. 7236 break; 7237 case OMPC_MAP_to: 7238 Bits |= OMP_MAP_TO; 7239 break; 7240 case OMPC_MAP_from: 7241 Bits |= OMP_MAP_FROM; 7242 break; 7243 case OMPC_MAP_tofrom: 7244 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7245 break; 7246 case OMPC_MAP_delete: 7247 Bits |= OMP_MAP_DELETE; 7248 break; 7249 case OMPC_MAP_unknown: 7250 llvm_unreachable("Unexpected map type!"); 7251 } 7252 if (AddPtrFlag) 7253 Bits |= OMP_MAP_PTR_AND_OBJ; 7254 if (AddIsTargetParamFlag) 7255 Bits |= OMP_MAP_TARGET_PARAM; 7256 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7257 != MapModifiers.end()) 7258 Bits |= OMP_MAP_ALWAYS; 7259 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7260 != MapModifiers.end()) 7261 Bits |= OMP_MAP_CLOSE; 7262 return Bits; 7263 } 7264 7265 /// Return true if the provided expression is a final array section. A 7266 /// final array section, is one whose length can't be proved to be one. 7267 bool isFinalArraySectionExpression(const Expr *E) const { 7268 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7269 7270 // It is not an array section and therefore not a unity-size one. 7271 if (!OASE) 7272 return false; 7273 7274 // An array section with no colon always refer to a single element. 7275 if (OASE->getColonLoc().isInvalid()) 7276 return false; 7277 7278 const Expr *Length = OASE->getLength(); 7279 7280 // If we don't have a length we have to check if the array has size 1 7281 // for this dimension. Also, we should always expect a length if the 7282 // base type is pointer. 7283 if (!Length) { 7284 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7285 OASE->getBase()->IgnoreParenImpCasts()) 7286 .getCanonicalType(); 7287 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7288 return ATy->getSize().getSExtValue() != 1; 7289 // If we don't have a constant dimension length, we have to consider 7290 // the current section as having any size, so it is not necessarily 7291 // unitary. If it happen to be unity size, that's user fault. 7292 return true; 7293 } 7294 7295 // Check if the length evaluates to 1. 7296 Expr::EvalResult Result; 7297 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7298 return true; // Can have more that size 1. 7299 7300 llvm::APSInt ConstLength = Result.Val.getInt(); 7301 return ConstLength.getSExtValue() != 1; 7302 } 7303 7304 /// Generate the base pointers, section pointers, sizes and map type 7305 /// bits for the provided map type, map modifier, and expression components. 7306 /// \a IsFirstComponent should be set to true if the provided set of 7307 /// components is the first associated with a capture. 7308 void generateInfoForComponentList( 7309 OpenMPMapClauseKind MapType, 7310 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7311 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7312 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7313 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7314 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7315 bool IsImplicit, 7316 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7317 OverlappedElements = llvm::None) const { 7318 // The following summarizes what has to be generated for each map and the 7319 // types below. The generated information is expressed in this order: 7320 // base pointer, section pointer, size, flags 7321 // (to add to the ones that come from the map type and modifier). 7322 // 7323 // double d; 7324 // int i[100]; 7325 // float *p; 7326 // 7327 // struct S1 { 7328 // int i; 7329 // float f[50]; 7330 // } 7331 // struct S2 { 7332 // int i; 7333 // float f[50]; 7334 // S1 s; 7335 // double *p; 7336 // struct S2 *ps; 7337 // } 7338 // S2 s; 7339 // S2 *ps; 7340 // 7341 // map(d) 7342 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7343 // 7344 // map(i) 7345 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7346 // 7347 // map(i[1:23]) 7348 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7349 // 7350 // map(p) 7351 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7352 // 7353 // map(p[1:24]) 7354 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7355 // 7356 // map(s) 7357 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7358 // 7359 // map(s.i) 7360 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7361 // 7362 // map(s.s.f) 7363 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7364 // 7365 // map(s.p) 7366 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7367 // 7368 // map(to: s.p[:22]) 7369 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7370 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7371 // &(s.p), &(s.p[0]), 22*sizeof(double), 7372 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7373 // (*) alloc space for struct members, only this is a target parameter 7374 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7375 // optimizes this entry out, same in the examples below) 7376 // (***) map the pointee (map: to) 7377 // 7378 // map(s.ps) 7379 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7380 // 7381 // map(from: s.ps->s.i) 7382 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7383 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7384 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7385 // 7386 // map(to: s.ps->ps) 7387 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7388 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7389 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7390 // 7391 // map(s.ps->ps->ps) 7392 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7393 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7394 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7395 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7396 // 7397 // map(to: s.ps->ps->s.f[:22]) 7398 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7399 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7400 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7401 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7402 // 7403 // map(ps) 7404 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7405 // 7406 // map(ps->i) 7407 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7408 // 7409 // map(ps->s.f) 7410 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7411 // 7412 // map(from: ps->p) 7413 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7414 // 7415 // map(to: ps->p[:22]) 7416 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7417 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7418 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7419 // 7420 // map(ps->ps) 7421 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7422 // 7423 // map(from: ps->ps->s.i) 7424 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7425 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7426 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7427 // 7428 // map(from: ps->ps->ps) 7429 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7430 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7431 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7432 // 7433 // map(ps->ps->ps->ps) 7434 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7435 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7436 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7437 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7438 // 7439 // map(to: ps->ps->ps->s.f[:22]) 7440 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7441 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7442 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7443 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7444 // 7445 // map(to: s.f[:22]) map(from: s.p[:33]) 7446 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7447 // sizeof(double*) (**), TARGET_PARAM 7448 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7449 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7450 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7451 // (*) allocate contiguous space needed to fit all mapped members even if 7452 // we allocate space for members not mapped (in this example, 7453 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7454 // them as well because they fall between &s.f[0] and &s.p) 7455 // 7456 // map(from: s.f[:22]) map(to: ps->p[:33]) 7457 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7458 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7459 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7460 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7461 // (*) the struct this entry pertains to is the 2nd element in the list of 7462 // arguments, hence MEMBER_OF(2) 7463 // 7464 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7465 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7466 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7467 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7468 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7469 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7470 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7471 // (*) the struct this entry pertains to is the 4th element in the list 7472 // of arguments, hence MEMBER_OF(4) 7473 7474 // Track if the map information being generated is the first for a capture. 7475 bool IsCaptureFirstInfo = IsFirstComponentList; 7476 // When the variable is on a declare target link or in a to clause with 7477 // unified memory, a reference is needed to hold the host/device address 7478 // of the variable. 7479 bool RequiresReference = false; 7480 7481 // Scan the components from the base to the complete expression. 7482 auto CI = Components.rbegin(); 7483 auto CE = Components.rend(); 7484 auto I = CI; 7485 7486 // Track if the map information being generated is the first for a list of 7487 // components. 7488 bool IsExpressionFirstInfo = true; 7489 Address BP = Address::invalid(); 7490 const Expr *AssocExpr = I->getAssociatedExpression(); 7491 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7492 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7493 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7494 7495 if (isa<MemberExpr>(AssocExpr)) { 7496 // The base is the 'this' pointer. The content of the pointer is going 7497 // to be the base of the field being mapped. 7498 BP = CGF.LoadCXXThisAddress(); 7499 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7500 (OASE && 7501 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7502 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7503 } else if (OAShE && 7504 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7505 BP = Address( 7506 CGF.EmitScalarExpr(OAShE->getBase()), 7507 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7508 } else { 7509 // The base is the reference to the variable. 7510 // BP = &Var. 7511 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7512 if (const auto *VD = 7513 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7514 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7515 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7516 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7517 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7518 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7519 RequiresReference = true; 7520 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7521 } 7522 } 7523 } 7524 7525 // If the variable is a pointer and is being dereferenced (i.e. is not 7526 // the last component), the base has to be the pointer itself, not its 7527 // reference. References are ignored for mapping purposes. 7528 QualType Ty = 7529 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7530 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7531 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7532 7533 // We do not need to generate individual map information for the 7534 // pointer, it can be associated with the combined storage. 7535 ++I; 7536 } 7537 } 7538 7539 // Track whether a component of the list should be marked as MEMBER_OF some 7540 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7541 // in a component list should be marked as MEMBER_OF, all subsequent entries 7542 // do not belong to the base struct. E.g. 7543 // struct S2 s; 7544 // s.ps->ps->ps->f[:] 7545 // (1) (2) (3) (4) 7546 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7547 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7548 // is the pointee of ps(2) which is not member of struct s, so it should not 7549 // be marked as such (it is still PTR_AND_OBJ). 7550 // The variable is initialized to false so that PTR_AND_OBJ entries which 7551 // are not struct members are not considered (e.g. array of pointers to 7552 // data). 7553 bool ShouldBeMemberOf = false; 7554 7555 // Variable keeping track of whether or not we have encountered a component 7556 // in the component list which is a member expression. Useful when we have a 7557 // pointer or a final array section, in which case it is the previous 7558 // component in the list which tells us whether we have a member expression. 7559 // E.g. X.f[:] 7560 // While processing the final array section "[:]" it is "f" which tells us 7561 // whether we are dealing with a member of a declared struct. 7562 const MemberExpr *EncounteredME = nullptr; 7563 7564 for (; I != CE; ++I) { 7565 // If the current component is member of a struct (parent struct) mark it. 7566 if (!EncounteredME) { 7567 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7568 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7569 // as MEMBER_OF the parent struct. 7570 if (EncounteredME) 7571 ShouldBeMemberOf = true; 7572 } 7573 7574 auto Next = std::next(I); 7575 7576 // We need to generate the addresses and sizes if this is the last 7577 // component, if the component is a pointer or if it is an array section 7578 // whose length can't be proved to be one. If this is a pointer, it 7579 // becomes the base address for the following components. 7580 7581 // A final array section, is one whose length can't be proved to be one. 7582 bool IsFinalArraySection = 7583 isFinalArraySectionExpression(I->getAssociatedExpression()); 7584 7585 // Get information on whether the element is a pointer. Have to do a 7586 // special treatment for array sections given that they are built-in 7587 // types. 7588 const auto *OASE = 7589 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7590 const auto *OAShE = 7591 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7592 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7593 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7594 bool IsPointer = 7595 OAShE || 7596 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7597 .getCanonicalType() 7598 ->isAnyPointerType()) || 7599 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7600 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7601 7602 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7603 // If this is not the last component, we expect the pointer to be 7604 // associated with an array expression or member expression. 7605 assert((Next == CE || 7606 isa<MemberExpr>(Next->getAssociatedExpression()) || 7607 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7608 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7609 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7610 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7611 "Unexpected expression"); 7612 7613 Address LB = Address::invalid(); 7614 if (OAShE) { 7615 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7616 CGF.getContext().getTypeAlignInChars( 7617 OAShE->getBase()->getType())); 7618 } else { 7619 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7620 .getAddress(CGF); 7621 } 7622 7623 // If this component is a pointer inside the base struct then we don't 7624 // need to create any entry for it - it will be combined with the object 7625 // it is pointing to into a single PTR_AND_OBJ entry. 7626 bool IsMemberPointer = 7627 IsPointer && EncounteredME && 7628 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7629 EncounteredME); 7630 if (!OverlappedElements.empty()) { 7631 // Handle base element with the info for overlapped elements. 7632 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7633 assert(Next == CE && 7634 "Expected last element for the overlapped elements."); 7635 assert(!IsPointer && 7636 "Unexpected base element with the pointer type."); 7637 // Mark the whole struct as the struct that requires allocation on the 7638 // device. 7639 PartialStruct.LowestElem = {0, LB}; 7640 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7641 I->getAssociatedExpression()->getType()); 7642 Address HB = CGF.Builder.CreateConstGEP( 7643 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7644 CGF.VoidPtrTy), 7645 TypeSize.getQuantity() - 1); 7646 PartialStruct.HighestElem = { 7647 std::numeric_limits<decltype( 7648 PartialStruct.HighestElem.first)>::max(), 7649 HB}; 7650 PartialStruct.Base = BP; 7651 // Emit data for non-overlapped data. 7652 OpenMPOffloadMappingFlags Flags = 7653 OMP_MAP_MEMBER_OF | 7654 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7655 /*AddPtrFlag=*/false, 7656 /*AddIsTargetParamFlag=*/false); 7657 LB = BP; 7658 llvm::Value *Size = nullptr; 7659 // Do bitcopy of all non-overlapped structure elements. 7660 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7661 Component : OverlappedElements) { 7662 Address ComponentLB = Address::invalid(); 7663 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7664 Component) { 7665 if (MC.getAssociatedDeclaration()) { 7666 ComponentLB = 7667 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7668 .getAddress(CGF); 7669 Size = CGF.Builder.CreatePtrDiff( 7670 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7671 CGF.EmitCastToVoidPtr(LB.getPointer())); 7672 break; 7673 } 7674 } 7675 BasePointers.push_back(BP.getPointer()); 7676 Pointers.push_back(LB.getPointer()); 7677 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7678 /*isSigned=*/true)); 7679 Types.push_back(Flags); 7680 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7681 } 7682 BasePointers.push_back(BP.getPointer()); 7683 Pointers.push_back(LB.getPointer()); 7684 Size = CGF.Builder.CreatePtrDiff( 7685 CGF.EmitCastToVoidPtr( 7686 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7687 CGF.EmitCastToVoidPtr(LB.getPointer())); 7688 Sizes.push_back( 7689 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7690 Types.push_back(Flags); 7691 break; 7692 } 7693 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7694 if (!IsMemberPointer) { 7695 BasePointers.push_back(BP.getPointer()); 7696 Pointers.push_back(LB.getPointer()); 7697 Sizes.push_back( 7698 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7699 7700 // We need to add a pointer flag for each map that comes from the 7701 // same expression except for the first one. We also need to signal 7702 // this map is the first one that relates with the current capture 7703 // (there is a set of entries for each capture). 7704 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7705 MapType, MapModifiers, IsImplicit, 7706 !IsExpressionFirstInfo || RequiresReference, 7707 IsCaptureFirstInfo && !RequiresReference); 7708 7709 if (!IsExpressionFirstInfo) { 7710 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7711 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7712 if (IsPointer) 7713 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7714 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7715 7716 if (ShouldBeMemberOf) { 7717 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7718 // should be later updated with the correct value of MEMBER_OF. 7719 Flags |= OMP_MAP_MEMBER_OF; 7720 // From now on, all subsequent PTR_AND_OBJ entries should not be 7721 // marked as MEMBER_OF. 7722 ShouldBeMemberOf = false; 7723 } 7724 } 7725 7726 Types.push_back(Flags); 7727 } 7728 7729 // If we have encountered a member expression so far, keep track of the 7730 // mapped member. If the parent is "*this", then the value declaration 7731 // is nullptr. 7732 if (EncounteredME) { 7733 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7734 unsigned FieldIndex = FD->getFieldIndex(); 7735 7736 // Update info about the lowest and highest elements for this struct 7737 if (!PartialStruct.Base.isValid()) { 7738 PartialStruct.LowestElem = {FieldIndex, LB}; 7739 PartialStruct.HighestElem = {FieldIndex, LB}; 7740 PartialStruct.Base = BP; 7741 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7742 PartialStruct.LowestElem = {FieldIndex, LB}; 7743 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7744 PartialStruct.HighestElem = {FieldIndex, LB}; 7745 } 7746 } 7747 7748 // If we have a final array section, we are done with this expression. 7749 if (IsFinalArraySection) 7750 break; 7751 7752 // The pointer becomes the base for the next element. 7753 if (Next != CE) 7754 BP = LB; 7755 7756 IsExpressionFirstInfo = false; 7757 IsCaptureFirstInfo = false; 7758 } 7759 } 7760 } 7761 7762 /// Return the adjusted map modifiers if the declaration a capture refers to 7763 /// appears in a first-private clause. This is expected to be used only with 7764 /// directives that start with 'target'. 7765 MappableExprsHandler::OpenMPOffloadMappingFlags 7766 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7767 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7768 7769 // A first private variable captured by reference will use only the 7770 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7771 // declaration is known as first-private in this handler. 7772 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7773 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7774 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7775 return MappableExprsHandler::OMP_MAP_ALWAYS | 7776 MappableExprsHandler::OMP_MAP_TO; 7777 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7778 return MappableExprsHandler::OMP_MAP_TO | 7779 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7780 return MappableExprsHandler::OMP_MAP_PRIVATE | 7781 MappableExprsHandler::OMP_MAP_TO; 7782 } 7783 return MappableExprsHandler::OMP_MAP_TO | 7784 MappableExprsHandler::OMP_MAP_FROM; 7785 } 7786 7787 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7788 // Rotate by getFlagMemberOffset() bits. 7789 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7790 << getFlagMemberOffset()); 7791 } 7792 7793 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7794 OpenMPOffloadMappingFlags MemberOfFlag) { 7795 // If the entry is PTR_AND_OBJ but has not been marked with the special 7796 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7797 // marked as MEMBER_OF. 7798 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7799 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7800 return; 7801 7802 // Reset the placeholder value to prepare the flag for the assignment of the 7803 // proper MEMBER_OF value. 7804 Flags &= ~OMP_MAP_MEMBER_OF; 7805 Flags |= MemberOfFlag; 7806 } 7807 7808 void getPlainLayout(const CXXRecordDecl *RD, 7809 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7810 bool AsBase) const { 7811 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7812 7813 llvm::StructType *St = 7814 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7815 7816 unsigned NumElements = St->getNumElements(); 7817 llvm::SmallVector< 7818 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7819 RecordLayout(NumElements); 7820 7821 // Fill bases. 7822 for (const auto &I : RD->bases()) { 7823 if (I.isVirtual()) 7824 continue; 7825 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7826 // Ignore empty bases. 7827 if (Base->isEmpty() || CGF.getContext() 7828 .getASTRecordLayout(Base) 7829 .getNonVirtualSize() 7830 .isZero()) 7831 continue; 7832 7833 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7834 RecordLayout[FieldIndex] = Base; 7835 } 7836 // Fill in virtual bases. 7837 for (const auto &I : RD->vbases()) { 7838 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7839 // Ignore empty bases. 7840 if (Base->isEmpty()) 7841 continue; 7842 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7843 if (RecordLayout[FieldIndex]) 7844 continue; 7845 RecordLayout[FieldIndex] = Base; 7846 } 7847 // Fill in all the fields. 7848 assert(!RD->isUnion() && "Unexpected union."); 7849 for (const auto *Field : RD->fields()) { 7850 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7851 // will fill in later.) 7852 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7853 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7854 RecordLayout[FieldIndex] = Field; 7855 } 7856 } 7857 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7858 &Data : RecordLayout) { 7859 if (Data.isNull()) 7860 continue; 7861 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7862 getPlainLayout(Base, Layout, /*AsBase=*/true); 7863 else 7864 Layout.push_back(Data.get<const FieldDecl *>()); 7865 } 7866 } 7867 7868 public: 7869 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7870 : CurDir(&Dir), CGF(CGF) { 7871 // Extract firstprivate clause information. 7872 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7873 for (const auto *D : C->varlists()) 7874 FirstPrivateDecls.try_emplace( 7875 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7876 // Extract implicit firstprivates from uses_allocators clauses. 7877 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7878 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7879 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7880 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7881 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7882 /*Implicit=*/true); 7883 else if (const auto *VD = dyn_cast<VarDecl>( 7884 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7885 ->getDecl())) 7886 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7887 } 7888 } 7889 // Extract device pointer clause information. 7890 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7891 for (auto L : C->component_lists()) 7892 DevPointersMap[L.first].push_back(L.second); 7893 } 7894 7895 /// Constructor for the declare mapper directive. 7896 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7897 : CurDir(&Dir), CGF(CGF) {} 7898 7899 /// Generate code for the combined entry if we have a partially mapped struct 7900 /// and take care of the mapping flags of the arguments corresponding to 7901 /// individual struct members. 7902 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7903 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7904 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7905 const StructRangeInfoTy &PartialStruct) const { 7906 // Base is the base of the struct 7907 BasePointers.push_back(PartialStruct.Base.getPointer()); 7908 // Pointer is the address of the lowest element 7909 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7910 Pointers.push_back(LB); 7911 // Size is (addr of {highest+1} element) - (addr of lowest element) 7912 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7913 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7914 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7915 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7916 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7917 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7918 /*isSigned=*/false); 7919 Sizes.push_back(Size); 7920 // Map type is always TARGET_PARAM 7921 Types.push_back(OMP_MAP_TARGET_PARAM); 7922 // Remove TARGET_PARAM flag from the first element 7923 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7924 7925 // All other current entries will be MEMBER_OF the combined entry 7926 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7927 // 0xFFFF in the MEMBER_OF field). 7928 OpenMPOffloadMappingFlags MemberOfFlag = 7929 getMemberOfFlag(BasePointers.size() - 1); 7930 for (auto &M : CurTypes) 7931 setCorrectMemberOfFlag(M, MemberOfFlag); 7932 } 7933 7934 /// Generate all the base pointers, section pointers, sizes and map 7935 /// types for the extracted mappable expressions. Also, for each item that 7936 /// relates with a device pointer, a pair of the relevant declaration and 7937 /// index where it occurs is appended to the device pointers info array. 7938 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7939 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7940 MapFlagsArrayTy &Types) const { 7941 // We have to process the component lists that relate with the same 7942 // declaration in a single chunk so that we can generate the map flags 7943 // correctly. Therefore, we organize all lists in a map. 7944 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7945 7946 // Helper function to fill the information map for the different supported 7947 // clauses. 7948 auto &&InfoGen = [&Info]( 7949 const ValueDecl *D, 7950 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7951 OpenMPMapClauseKind MapType, 7952 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7953 bool ReturnDevicePointer, bool IsImplicit) { 7954 const ValueDecl *VD = 7955 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7956 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7957 IsImplicit); 7958 }; 7959 7960 assert(CurDir.is<const OMPExecutableDirective *>() && 7961 "Expect a executable directive"); 7962 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7963 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7964 for (const auto L : C->component_lists()) { 7965 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7966 /*ReturnDevicePointer=*/false, C->isImplicit()); 7967 } 7968 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7969 for (const auto L : C->component_lists()) { 7970 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7971 /*ReturnDevicePointer=*/false, C->isImplicit()); 7972 } 7973 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7974 for (const auto L : C->component_lists()) { 7975 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7976 /*ReturnDevicePointer=*/false, C->isImplicit()); 7977 } 7978 7979 // Look at the use_device_ptr clause information and mark the existing map 7980 // entries as such. If there is no map information for an entry in the 7981 // use_device_ptr list, we create one with map type 'alloc' and zero size 7982 // section. It is the user fault if that was not mapped before. If there is 7983 // no map information and the pointer is a struct member, then we defer the 7984 // emission of that entry until the whole struct has been processed. 7985 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7986 DeferredInfo; 7987 7988 for (const auto *C : 7989 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7990 for (const auto L : C->component_lists()) { 7991 assert(!L.second.empty() && "Not expecting empty list of components!"); 7992 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7993 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7994 const Expr *IE = L.second.back().getAssociatedExpression(); 7995 // If the first component is a member expression, we have to look into 7996 // 'this', which maps to null in the map of map information. Otherwise 7997 // look directly for the information. 7998 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7999 8000 // We potentially have map information for this declaration already. 8001 // Look for the first set of components that refer to it. 8002 if (It != Info.end()) { 8003 auto CI = std::find_if( 8004 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8005 return MI.Components.back().getAssociatedDeclaration() == VD; 8006 }); 8007 // If we found a map entry, signal that the pointer has to be returned 8008 // and move on to the next declaration. 8009 if (CI != It->second.end()) { 8010 CI->ReturnDevicePointer = true; 8011 continue; 8012 } 8013 } 8014 8015 // We didn't find any match in our map information - generate a zero 8016 // size array section - if the pointer is a struct member we defer this 8017 // action until the whole struct has been processed. 8018 if (isa<MemberExpr>(IE)) { 8019 // Insert the pointer into Info to be processed by 8020 // generateInfoForComponentList. Because it is a member pointer 8021 // without a pointee, no entry will be generated for it, therefore 8022 // we need to generate one after the whole struct has been processed. 8023 // Nonetheless, generateInfoForComponentList must be called to take 8024 // the pointer into account for the calculation of the range of the 8025 // partial struct. 8026 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8027 /*ReturnDevicePointer=*/false, C->isImplicit()); 8028 DeferredInfo[nullptr].emplace_back(IE, VD); 8029 } else { 8030 llvm::Value *Ptr = 8031 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8032 BasePointers.emplace_back(Ptr, VD); 8033 Pointers.push_back(Ptr); 8034 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8035 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8036 } 8037 } 8038 } 8039 8040 for (const auto &M : Info) { 8041 // We need to know when we generate information for the first component 8042 // associated with a capture, because the mapping flags depend on it. 8043 bool IsFirstComponentList = true; 8044 8045 // Temporary versions of arrays 8046 MapBaseValuesArrayTy CurBasePointers; 8047 MapValuesArrayTy CurPointers; 8048 MapValuesArrayTy CurSizes; 8049 MapFlagsArrayTy CurTypes; 8050 StructRangeInfoTy PartialStruct; 8051 8052 for (const MapInfo &L : M.second) { 8053 assert(!L.Components.empty() && 8054 "Not expecting declaration with no component lists."); 8055 8056 // Remember the current base pointer index. 8057 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8058 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8059 CurBasePointers, CurPointers, CurSizes, 8060 CurTypes, PartialStruct, 8061 IsFirstComponentList, L.IsImplicit); 8062 8063 // If this entry relates with a device pointer, set the relevant 8064 // declaration and add the 'return pointer' flag. 8065 if (L.ReturnDevicePointer) { 8066 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8067 "Unexpected number of mapped base pointers."); 8068 8069 const ValueDecl *RelevantVD = 8070 L.Components.back().getAssociatedDeclaration(); 8071 assert(RelevantVD && 8072 "No relevant declaration related with device pointer??"); 8073 8074 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8075 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8076 } 8077 IsFirstComponentList = false; 8078 } 8079 8080 // Append any pending zero-length pointers which are struct members and 8081 // used with use_device_ptr. 8082 auto CI = DeferredInfo.find(M.first); 8083 if (CI != DeferredInfo.end()) { 8084 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8085 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8086 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8087 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8088 CurBasePointers.emplace_back(BasePtr, L.VD); 8089 CurPointers.push_back(Ptr); 8090 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8091 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8092 // value MEMBER_OF=FFFF so that the entry is later updated with the 8093 // correct value of MEMBER_OF. 8094 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8095 OMP_MAP_MEMBER_OF); 8096 } 8097 } 8098 8099 // If there is an entry in PartialStruct it means we have a struct with 8100 // individual members mapped. Emit an extra combined entry. 8101 if (PartialStruct.Base.isValid()) 8102 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8103 PartialStruct); 8104 8105 // We need to append the results of this capture to what we already have. 8106 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8107 Pointers.append(CurPointers.begin(), CurPointers.end()); 8108 Sizes.append(CurSizes.begin(), CurSizes.end()); 8109 Types.append(CurTypes.begin(), CurTypes.end()); 8110 } 8111 } 8112 8113 /// Generate all the base pointers, section pointers, sizes and map types for 8114 /// the extracted map clauses of user-defined mapper. 8115 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8116 MapValuesArrayTy &Pointers, 8117 MapValuesArrayTy &Sizes, 8118 MapFlagsArrayTy &Types) const { 8119 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8120 "Expect a declare mapper directive"); 8121 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8122 // We have to process the component lists that relate with the same 8123 // declaration in a single chunk so that we can generate the map flags 8124 // correctly. Therefore, we organize all lists in a map. 8125 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8126 8127 // Helper function to fill the information map for the different supported 8128 // clauses. 8129 auto &&InfoGen = [&Info]( 8130 const ValueDecl *D, 8131 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8132 OpenMPMapClauseKind MapType, 8133 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8134 bool ReturnDevicePointer, bool IsImplicit) { 8135 const ValueDecl *VD = 8136 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8137 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8138 IsImplicit); 8139 }; 8140 8141 for (const auto *C : CurMapperDir->clauselists()) { 8142 const auto *MC = cast<OMPMapClause>(C); 8143 for (const auto L : MC->component_lists()) { 8144 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8145 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8146 } 8147 } 8148 8149 for (const auto &M : Info) { 8150 // We need to know when we generate information for the first component 8151 // associated with a capture, because the mapping flags depend on it. 8152 bool IsFirstComponentList = true; 8153 8154 // Temporary versions of arrays 8155 MapBaseValuesArrayTy CurBasePointers; 8156 MapValuesArrayTy CurPointers; 8157 MapValuesArrayTy CurSizes; 8158 MapFlagsArrayTy CurTypes; 8159 StructRangeInfoTy PartialStruct; 8160 8161 for (const MapInfo &L : M.second) { 8162 assert(!L.Components.empty() && 8163 "Not expecting declaration with no component lists."); 8164 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8165 CurBasePointers, CurPointers, CurSizes, 8166 CurTypes, PartialStruct, 8167 IsFirstComponentList, L.IsImplicit); 8168 IsFirstComponentList = false; 8169 } 8170 8171 // If there is an entry in PartialStruct it means we have a struct with 8172 // individual members mapped. Emit an extra combined entry. 8173 if (PartialStruct.Base.isValid()) 8174 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8175 PartialStruct); 8176 8177 // We need to append the results of this capture to what we already have. 8178 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8179 Pointers.append(CurPointers.begin(), CurPointers.end()); 8180 Sizes.append(CurSizes.begin(), CurSizes.end()); 8181 Types.append(CurTypes.begin(), CurTypes.end()); 8182 } 8183 } 8184 8185 /// Emit capture info for lambdas for variables captured by reference. 8186 void generateInfoForLambdaCaptures( 8187 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8188 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8189 MapFlagsArrayTy &Types, 8190 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8191 const auto *RD = VD->getType() 8192 .getCanonicalType() 8193 .getNonReferenceType() 8194 ->getAsCXXRecordDecl(); 8195 if (!RD || !RD->isLambda()) 8196 return; 8197 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8198 LValue VDLVal = CGF.MakeAddrLValue( 8199 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8200 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8201 FieldDecl *ThisCapture = nullptr; 8202 RD->getCaptureFields(Captures, ThisCapture); 8203 if (ThisCapture) { 8204 LValue ThisLVal = 8205 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8206 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8207 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8208 VDLVal.getPointer(CGF)); 8209 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8210 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8211 Sizes.push_back( 8212 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8213 CGF.Int64Ty, /*isSigned=*/true)); 8214 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8215 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8216 } 8217 for (const LambdaCapture &LC : RD->captures()) { 8218 if (!LC.capturesVariable()) 8219 continue; 8220 const VarDecl *VD = LC.getCapturedVar(); 8221 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8222 continue; 8223 auto It = Captures.find(VD); 8224 assert(It != Captures.end() && "Found lambda capture without field."); 8225 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8226 if (LC.getCaptureKind() == LCK_ByRef) { 8227 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8228 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8229 VDLVal.getPointer(CGF)); 8230 BasePointers.push_back(VarLVal.getPointer(CGF)); 8231 Pointers.push_back(VarLValVal.getPointer(CGF)); 8232 Sizes.push_back(CGF.Builder.CreateIntCast( 8233 CGF.getTypeSize( 8234 VD->getType().getCanonicalType().getNonReferenceType()), 8235 CGF.Int64Ty, /*isSigned=*/true)); 8236 } else { 8237 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8238 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8239 VDLVal.getPointer(CGF)); 8240 BasePointers.push_back(VarLVal.getPointer(CGF)); 8241 Pointers.push_back(VarRVal.getScalarVal()); 8242 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8243 } 8244 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8245 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8246 } 8247 } 8248 8249 /// Set correct indices for lambdas captures. 8250 void adjustMemberOfForLambdaCaptures( 8251 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8252 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8253 MapFlagsArrayTy &Types) const { 8254 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8255 // Set correct member_of idx for all implicit lambda captures. 8256 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8257 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8258 continue; 8259 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8260 assert(BasePtr && "Unable to find base lambda address."); 8261 int TgtIdx = -1; 8262 for (unsigned J = I; J > 0; --J) { 8263 unsigned Idx = J - 1; 8264 if (Pointers[Idx] != BasePtr) 8265 continue; 8266 TgtIdx = Idx; 8267 break; 8268 } 8269 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8270 // All other current entries will be MEMBER_OF the combined entry 8271 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8272 // 0xFFFF in the MEMBER_OF field). 8273 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8274 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8275 } 8276 } 8277 8278 /// Generate the base pointers, section pointers, sizes and map types 8279 /// associated to a given capture. 8280 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8281 llvm::Value *Arg, 8282 MapBaseValuesArrayTy &BasePointers, 8283 MapValuesArrayTy &Pointers, 8284 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8285 StructRangeInfoTy &PartialStruct) const { 8286 assert(!Cap->capturesVariableArrayType() && 8287 "Not expecting to generate map info for a variable array type!"); 8288 8289 // We need to know when we generating information for the first component 8290 const ValueDecl *VD = Cap->capturesThis() 8291 ? nullptr 8292 : Cap->getCapturedVar()->getCanonicalDecl(); 8293 8294 // If this declaration appears in a is_device_ptr clause we just have to 8295 // pass the pointer by value. If it is a reference to a declaration, we just 8296 // pass its value. 8297 if (DevPointersMap.count(VD)) { 8298 BasePointers.emplace_back(Arg, VD); 8299 Pointers.push_back(Arg); 8300 Sizes.push_back( 8301 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8302 CGF.Int64Ty, /*isSigned=*/true)); 8303 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8304 return; 8305 } 8306 8307 using MapData = 8308 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8309 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8310 SmallVector<MapData, 4> DeclComponentLists; 8311 assert(CurDir.is<const OMPExecutableDirective *>() && 8312 "Expect a executable directive"); 8313 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8314 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8315 for (const auto L : C->decl_component_lists(VD)) { 8316 assert(L.first == VD && 8317 "We got information for the wrong declaration??"); 8318 assert(!L.second.empty() && 8319 "Not expecting declaration with no component lists."); 8320 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8321 C->getMapTypeModifiers(), 8322 C->isImplicit()); 8323 } 8324 } 8325 8326 // Find overlapping elements (including the offset from the base element). 8327 llvm::SmallDenseMap< 8328 const MapData *, 8329 llvm::SmallVector< 8330 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8331 4> 8332 OverlappedData; 8333 size_t Count = 0; 8334 for (const MapData &L : DeclComponentLists) { 8335 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8336 OpenMPMapClauseKind MapType; 8337 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8338 bool IsImplicit; 8339 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8340 ++Count; 8341 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8342 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8343 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8344 auto CI = Components.rbegin(); 8345 auto CE = Components.rend(); 8346 auto SI = Components1.rbegin(); 8347 auto SE = Components1.rend(); 8348 for (; CI != CE && SI != SE; ++CI, ++SI) { 8349 if (CI->getAssociatedExpression()->getStmtClass() != 8350 SI->getAssociatedExpression()->getStmtClass()) 8351 break; 8352 // Are we dealing with different variables/fields? 8353 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8354 break; 8355 } 8356 // Found overlapping if, at least for one component, reached the head of 8357 // the components list. 8358 if (CI == CE || SI == SE) { 8359 assert((CI != CE || SI != SE) && 8360 "Unexpected full match of the mapping components."); 8361 const MapData &BaseData = CI == CE ? L : L1; 8362 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8363 SI == SE ? Components : Components1; 8364 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8365 OverlappedElements.getSecond().push_back(SubData); 8366 } 8367 } 8368 } 8369 // Sort the overlapped elements for each item. 8370 llvm::SmallVector<const FieldDecl *, 4> Layout; 8371 if (!OverlappedData.empty()) { 8372 if (const auto *CRD = 8373 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8374 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8375 else { 8376 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8377 Layout.append(RD->field_begin(), RD->field_end()); 8378 } 8379 } 8380 for (auto &Pair : OverlappedData) { 8381 llvm::sort( 8382 Pair.getSecond(), 8383 [&Layout]( 8384 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8385 OMPClauseMappableExprCommon::MappableExprComponentListRef 8386 Second) { 8387 auto CI = First.rbegin(); 8388 auto CE = First.rend(); 8389 auto SI = Second.rbegin(); 8390 auto SE = Second.rend(); 8391 for (; CI != CE && SI != SE; ++CI, ++SI) { 8392 if (CI->getAssociatedExpression()->getStmtClass() != 8393 SI->getAssociatedExpression()->getStmtClass()) 8394 break; 8395 // Are we dealing with different variables/fields? 8396 if (CI->getAssociatedDeclaration() != 8397 SI->getAssociatedDeclaration()) 8398 break; 8399 } 8400 8401 // Lists contain the same elements. 8402 if (CI == CE && SI == SE) 8403 return false; 8404 8405 // List with less elements is less than list with more elements. 8406 if (CI == CE || SI == SE) 8407 return CI == CE; 8408 8409 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8410 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8411 if (FD1->getParent() == FD2->getParent()) 8412 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8413 const auto It = 8414 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8415 return FD == FD1 || FD == FD2; 8416 }); 8417 return *It == FD1; 8418 }); 8419 } 8420 8421 // Associated with a capture, because the mapping flags depend on it. 8422 // Go through all of the elements with the overlapped elements. 8423 for (const auto &Pair : OverlappedData) { 8424 const MapData &L = *Pair.getFirst(); 8425 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8426 OpenMPMapClauseKind MapType; 8427 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8428 bool IsImplicit; 8429 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8430 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8431 OverlappedComponents = Pair.getSecond(); 8432 bool IsFirstComponentList = true; 8433 generateInfoForComponentList(MapType, MapModifiers, Components, 8434 BasePointers, Pointers, Sizes, Types, 8435 PartialStruct, IsFirstComponentList, 8436 IsImplicit, OverlappedComponents); 8437 } 8438 // Go through other elements without overlapped elements. 8439 bool IsFirstComponentList = OverlappedData.empty(); 8440 for (const MapData &L : DeclComponentLists) { 8441 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8442 OpenMPMapClauseKind MapType; 8443 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8444 bool IsImplicit; 8445 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8446 auto It = OverlappedData.find(&L); 8447 if (It == OverlappedData.end()) 8448 generateInfoForComponentList(MapType, MapModifiers, Components, 8449 BasePointers, Pointers, Sizes, Types, 8450 PartialStruct, IsFirstComponentList, 8451 IsImplicit); 8452 IsFirstComponentList = false; 8453 } 8454 } 8455 8456 /// Generate the base pointers, section pointers, sizes and map types 8457 /// associated with the declare target link variables. 8458 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8459 MapValuesArrayTy &Pointers, 8460 MapValuesArrayTy &Sizes, 8461 MapFlagsArrayTy &Types) const { 8462 assert(CurDir.is<const OMPExecutableDirective *>() && 8463 "Expect a executable directive"); 8464 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8465 // Map other list items in the map clause which are not captured variables 8466 // but "declare target link" global variables. 8467 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8468 for (const auto L : C->component_lists()) { 8469 if (!L.first) 8470 continue; 8471 const auto *VD = dyn_cast<VarDecl>(L.first); 8472 if (!VD) 8473 continue; 8474 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8475 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8476 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8477 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8478 continue; 8479 StructRangeInfoTy PartialStruct; 8480 generateInfoForComponentList( 8481 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8482 Pointers, Sizes, Types, PartialStruct, 8483 /*IsFirstComponentList=*/true, C->isImplicit()); 8484 assert(!PartialStruct.Base.isValid() && 8485 "No partial structs for declare target link expected."); 8486 } 8487 } 8488 } 8489 8490 /// Generate the default map information for a given capture \a CI, 8491 /// record field declaration \a RI and captured value \a CV. 8492 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8493 const FieldDecl &RI, llvm::Value *CV, 8494 MapBaseValuesArrayTy &CurBasePointers, 8495 MapValuesArrayTy &CurPointers, 8496 MapValuesArrayTy &CurSizes, 8497 MapFlagsArrayTy &CurMapTypes) const { 8498 bool IsImplicit = true; 8499 // Do the default mapping. 8500 if (CI.capturesThis()) { 8501 CurBasePointers.push_back(CV); 8502 CurPointers.push_back(CV); 8503 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8504 CurSizes.push_back( 8505 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8506 CGF.Int64Ty, /*isSigned=*/true)); 8507 // Default map type. 8508 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8509 } else if (CI.capturesVariableByCopy()) { 8510 CurBasePointers.push_back(CV); 8511 CurPointers.push_back(CV); 8512 if (!RI.getType()->isAnyPointerType()) { 8513 // We have to signal to the runtime captures passed by value that are 8514 // not pointers. 8515 CurMapTypes.push_back(OMP_MAP_LITERAL); 8516 CurSizes.push_back(CGF.Builder.CreateIntCast( 8517 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8518 } else { 8519 // Pointers are implicitly mapped with a zero size and no flags 8520 // (other than first map that is added for all implicit maps). 8521 CurMapTypes.push_back(OMP_MAP_NONE); 8522 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8523 } 8524 const VarDecl *VD = CI.getCapturedVar(); 8525 auto I = FirstPrivateDecls.find(VD); 8526 if (I != FirstPrivateDecls.end()) 8527 IsImplicit = I->getSecond(); 8528 } else { 8529 assert(CI.capturesVariable() && "Expected captured reference."); 8530 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8531 QualType ElementType = PtrTy->getPointeeType(); 8532 CurSizes.push_back(CGF.Builder.CreateIntCast( 8533 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8534 // The default map type for a scalar/complex type is 'to' because by 8535 // default the value doesn't have to be retrieved. For an aggregate 8536 // type, the default is 'tofrom'. 8537 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8538 const VarDecl *VD = CI.getCapturedVar(); 8539 auto I = FirstPrivateDecls.find(VD); 8540 if (I != FirstPrivateDecls.end() && 8541 VD->getType().isConstant(CGF.getContext())) { 8542 llvm::Constant *Addr = 8543 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8544 // Copy the value of the original variable to the new global copy. 8545 CGF.Builder.CreateMemCpy( 8546 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8547 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8548 CurSizes.back(), /*IsVolatile=*/false); 8549 // Use new global variable as the base pointers. 8550 CurBasePointers.push_back(Addr); 8551 CurPointers.push_back(Addr); 8552 } else { 8553 CurBasePointers.push_back(CV); 8554 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8555 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8556 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8557 AlignmentSource::Decl)); 8558 CurPointers.push_back(PtrAddr.getPointer()); 8559 } else { 8560 CurPointers.push_back(CV); 8561 } 8562 } 8563 if (I != FirstPrivateDecls.end()) 8564 IsImplicit = I->getSecond(); 8565 } 8566 // Every default map produces a single argument which is a target parameter. 8567 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8568 8569 // Add flag stating this is an implicit map. 8570 if (IsImplicit) 8571 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8572 } 8573 }; 8574 } // anonymous namespace 8575 8576 /// Emit the arrays used to pass the captures and map information to the 8577 /// offloading runtime library. If there is no map or capture information, 8578 /// return nullptr by reference. 8579 static void 8580 emitOffloadingArrays(CodeGenFunction &CGF, 8581 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8582 MappableExprsHandler::MapValuesArrayTy &Pointers, 8583 MappableExprsHandler::MapValuesArrayTy &Sizes, 8584 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8585 CGOpenMPRuntime::TargetDataInfo &Info) { 8586 CodeGenModule &CGM = CGF.CGM; 8587 ASTContext &Ctx = CGF.getContext(); 8588 8589 // Reset the array information. 8590 Info.clearArrayInfo(); 8591 Info.NumberOfPtrs = BasePointers.size(); 8592 8593 if (Info.NumberOfPtrs) { 8594 // Detect if we have any capture size requiring runtime evaluation of the 8595 // size so that a constant array could be eventually used. 8596 bool hasRuntimeEvaluationCaptureSize = false; 8597 for (llvm::Value *S : Sizes) 8598 if (!isa<llvm::Constant>(S)) { 8599 hasRuntimeEvaluationCaptureSize = true; 8600 break; 8601 } 8602 8603 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8604 QualType PointerArrayType = Ctx.getConstantArrayType( 8605 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8606 /*IndexTypeQuals=*/0); 8607 8608 Info.BasePointersArray = 8609 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8610 Info.PointersArray = 8611 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8612 8613 // If we don't have any VLA types or other types that require runtime 8614 // evaluation, we can use a constant array for the map sizes, otherwise we 8615 // need to fill up the arrays as we do for the pointers. 8616 QualType Int64Ty = 8617 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8618 if (hasRuntimeEvaluationCaptureSize) { 8619 QualType SizeArrayType = Ctx.getConstantArrayType( 8620 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8621 /*IndexTypeQuals=*/0); 8622 Info.SizesArray = 8623 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8624 } else { 8625 // We expect all the sizes to be constant, so we collect them to create 8626 // a constant array. 8627 SmallVector<llvm::Constant *, 16> ConstSizes; 8628 for (llvm::Value *S : Sizes) 8629 ConstSizes.push_back(cast<llvm::Constant>(S)); 8630 8631 auto *SizesArrayInit = llvm::ConstantArray::get( 8632 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8633 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8634 auto *SizesArrayGbl = new llvm::GlobalVariable( 8635 CGM.getModule(), SizesArrayInit->getType(), 8636 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8637 SizesArrayInit, Name); 8638 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8639 Info.SizesArray = SizesArrayGbl; 8640 } 8641 8642 // The map types are always constant so we don't need to generate code to 8643 // fill arrays. Instead, we create an array constant. 8644 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8645 llvm::copy(MapTypes, Mapping.begin()); 8646 llvm::Constant *MapTypesArrayInit = 8647 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8648 std::string MaptypesName = 8649 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8650 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8651 CGM.getModule(), MapTypesArrayInit->getType(), 8652 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8653 MapTypesArrayInit, MaptypesName); 8654 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8655 Info.MapTypesArray = MapTypesArrayGbl; 8656 8657 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8658 llvm::Value *BPVal = *BasePointers[I]; 8659 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8660 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8661 Info.BasePointersArray, 0, I); 8662 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8663 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8664 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8665 CGF.Builder.CreateStore(BPVal, BPAddr); 8666 8667 if (Info.requiresDevicePointerInfo()) 8668 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8669 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8670 8671 llvm::Value *PVal = Pointers[I]; 8672 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8673 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8674 Info.PointersArray, 0, I); 8675 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8676 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8677 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8678 CGF.Builder.CreateStore(PVal, PAddr); 8679 8680 if (hasRuntimeEvaluationCaptureSize) { 8681 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8682 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8683 Info.SizesArray, 8684 /*Idx0=*/0, 8685 /*Idx1=*/I); 8686 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8687 CGF.Builder.CreateStore( 8688 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8689 SAddr); 8690 } 8691 } 8692 } 8693 } 8694 8695 /// Emit the arguments to be passed to the runtime library based on the 8696 /// arrays of pointers, sizes and map types. 8697 static void emitOffloadingArraysArgument( 8698 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8699 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8700 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8701 CodeGenModule &CGM = CGF.CGM; 8702 if (Info.NumberOfPtrs) { 8703 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8704 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8705 Info.BasePointersArray, 8706 /*Idx0=*/0, /*Idx1=*/0); 8707 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8708 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8709 Info.PointersArray, 8710 /*Idx0=*/0, 8711 /*Idx1=*/0); 8712 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8713 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8714 /*Idx0=*/0, /*Idx1=*/0); 8715 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8716 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8717 Info.MapTypesArray, 8718 /*Idx0=*/0, 8719 /*Idx1=*/0); 8720 } else { 8721 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8722 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8723 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8724 MapTypesArrayArg = 8725 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8726 } 8727 } 8728 8729 /// Check for inner distribute directive. 8730 static const OMPExecutableDirective * 8731 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8732 const auto *CS = D.getInnermostCapturedStmt(); 8733 const auto *Body = 8734 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8735 const Stmt *ChildStmt = 8736 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8737 8738 if (const auto *NestedDir = 8739 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8740 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8741 switch (D.getDirectiveKind()) { 8742 case OMPD_target: 8743 if (isOpenMPDistributeDirective(DKind)) 8744 return NestedDir; 8745 if (DKind == OMPD_teams) { 8746 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8747 /*IgnoreCaptured=*/true); 8748 if (!Body) 8749 return nullptr; 8750 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8751 if (const auto *NND = 8752 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8753 DKind = NND->getDirectiveKind(); 8754 if (isOpenMPDistributeDirective(DKind)) 8755 return NND; 8756 } 8757 } 8758 return nullptr; 8759 case OMPD_target_teams: 8760 if (isOpenMPDistributeDirective(DKind)) 8761 return NestedDir; 8762 return nullptr; 8763 case OMPD_target_parallel: 8764 case OMPD_target_simd: 8765 case OMPD_target_parallel_for: 8766 case OMPD_target_parallel_for_simd: 8767 return nullptr; 8768 case OMPD_target_teams_distribute: 8769 case OMPD_target_teams_distribute_simd: 8770 case OMPD_target_teams_distribute_parallel_for: 8771 case OMPD_target_teams_distribute_parallel_for_simd: 8772 case OMPD_parallel: 8773 case OMPD_for: 8774 case OMPD_parallel_for: 8775 case OMPD_parallel_master: 8776 case OMPD_parallel_sections: 8777 case OMPD_for_simd: 8778 case OMPD_parallel_for_simd: 8779 case OMPD_cancel: 8780 case OMPD_cancellation_point: 8781 case OMPD_ordered: 8782 case OMPD_threadprivate: 8783 case OMPD_allocate: 8784 case OMPD_task: 8785 case OMPD_simd: 8786 case OMPD_sections: 8787 case OMPD_section: 8788 case OMPD_single: 8789 case OMPD_master: 8790 case OMPD_critical: 8791 case OMPD_taskyield: 8792 case OMPD_barrier: 8793 case OMPD_taskwait: 8794 case OMPD_taskgroup: 8795 case OMPD_atomic: 8796 case OMPD_flush: 8797 case OMPD_depobj: 8798 case OMPD_scan: 8799 case OMPD_teams: 8800 case OMPD_target_data: 8801 case OMPD_target_exit_data: 8802 case OMPD_target_enter_data: 8803 case OMPD_distribute: 8804 case OMPD_distribute_simd: 8805 case OMPD_distribute_parallel_for: 8806 case OMPD_distribute_parallel_for_simd: 8807 case OMPD_teams_distribute: 8808 case OMPD_teams_distribute_simd: 8809 case OMPD_teams_distribute_parallel_for: 8810 case OMPD_teams_distribute_parallel_for_simd: 8811 case OMPD_target_update: 8812 case OMPD_declare_simd: 8813 case OMPD_declare_variant: 8814 case OMPD_begin_declare_variant: 8815 case OMPD_end_declare_variant: 8816 case OMPD_declare_target: 8817 case OMPD_end_declare_target: 8818 case OMPD_declare_reduction: 8819 case OMPD_declare_mapper: 8820 case OMPD_taskloop: 8821 case OMPD_taskloop_simd: 8822 case OMPD_master_taskloop: 8823 case OMPD_master_taskloop_simd: 8824 case OMPD_parallel_master_taskloop: 8825 case OMPD_parallel_master_taskloop_simd: 8826 case OMPD_requires: 8827 case OMPD_unknown: 8828 llvm_unreachable("Unexpected directive."); 8829 } 8830 } 8831 8832 return nullptr; 8833 } 8834 8835 /// Emit the user-defined mapper function. The code generation follows the 8836 /// pattern in the example below. 8837 /// \code 8838 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8839 /// void *base, void *begin, 8840 /// int64_t size, int64_t type) { 8841 /// // Allocate space for an array section first. 8842 /// if (size > 1 && !maptype.IsDelete) 8843 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8844 /// size*sizeof(Ty), clearToFrom(type)); 8845 /// // Map members. 8846 /// for (unsigned i = 0; i < size; i++) { 8847 /// // For each component specified by this mapper: 8848 /// for (auto c : all_components) { 8849 /// if (c.hasMapper()) 8850 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8851 /// c.arg_type); 8852 /// else 8853 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8854 /// c.arg_begin, c.arg_size, c.arg_type); 8855 /// } 8856 /// } 8857 /// // Delete the array section. 8858 /// if (size > 1 && maptype.IsDelete) 8859 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8860 /// size*sizeof(Ty), clearToFrom(type)); 8861 /// } 8862 /// \endcode 8863 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8864 CodeGenFunction *CGF) { 8865 if (UDMMap.count(D) > 0) 8866 return; 8867 ASTContext &C = CGM.getContext(); 8868 QualType Ty = D->getType(); 8869 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8870 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8871 auto *MapperVarDecl = 8872 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8873 SourceLocation Loc = D->getLocation(); 8874 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8875 8876 // Prepare mapper function arguments and attributes. 8877 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8878 C.VoidPtrTy, ImplicitParamDecl::Other); 8879 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8880 ImplicitParamDecl::Other); 8881 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8882 C.VoidPtrTy, ImplicitParamDecl::Other); 8883 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8884 ImplicitParamDecl::Other); 8885 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8886 ImplicitParamDecl::Other); 8887 FunctionArgList Args; 8888 Args.push_back(&HandleArg); 8889 Args.push_back(&BaseArg); 8890 Args.push_back(&BeginArg); 8891 Args.push_back(&SizeArg); 8892 Args.push_back(&TypeArg); 8893 const CGFunctionInfo &FnInfo = 8894 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8895 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8896 SmallString<64> TyStr; 8897 llvm::raw_svector_ostream Out(TyStr); 8898 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8899 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8900 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8901 Name, &CGM.getModule()); 8902 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8903 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8904 // Start the mapper function code generation. 8905 CodeGenFunction MapperCGF(CGM); 8906 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8907 // Compute the starting and end addreses of array elements. 8908 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8909 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8910 C.getPointerType(Int64Ty), Loc); 8911 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8912 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8913 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8914 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8915 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8916 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8917 C.getPointerType(Int64Ty), Loc); 8918 // Prepare common arguments for array initiation and deletion. 8919 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8920 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8921 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8922 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8923 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8924 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8925 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8926 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8927 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8928 8929 // Emit array initiation if this is an array section and \p MapType indicates 8930 // that memory allocation is required. 8931 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8932 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8933 ElementSize, HeadBB, /*IsInit=*/true); 8934 8935 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8936 8937 // Emit the loop header block. 8938 MapperCGF.EmitBlock(HeadBB); 8939 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8940 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8941 // Evaluate whether the initial condition is satisfied. 8942 llvm::Value *IsEmpty = 8943 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8944 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8945 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8946 8947 // Emit the loop body block. 8948 MapperCGF.EmitBlock(BodyBB); 8949 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8950 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8951 PtrPHI->addIncoming(PtrBegin, EntryBB); 8952 Address PtrCurrent = 8953 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8954 .getAlignment() 8955 .alignmentOfArrayElement(ElementSize)); 8956 // Privatize the declared variable of mapper to be the current array element. 8957 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8958 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8959 return MapperCGF 8960 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8961 .getAddress(MapperCGF); 8962 }); 8963 (void)Scope.Privatize(); 8964 8965 // Get map clause information. Fill up the arrays with all mapped variables. 8966 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8967 MappableExprsHandler::MapValuesArrayTy Pointers; 8968 MappableExprsHandler::MapValuesArrayTy Sizes; 8969 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8970 MappableExprsHandler MEHandler(*D, MapperCGF); 8971 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8972 8973 // Call the runtime API __tgt_mapper_num_components to get the number of 8974 // pre-existing components. 8975 llvm::Value *OffloadingArgs[] = {Handle}; 8976 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8977 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 8978 CGM.getModule(), OMPRTL___tgt_mapper_num_components), 8979 OffloadingArgs); 8980 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8981 PreviousSize, 8982 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8983 8984 // Fill up the runtime mapper handle for all components. 8985 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8986 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8987 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8988 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8989 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8990 llvm::Value *CurSizeArg = Sizes[I]; 8991 8992 // Extract the MEMBER_OF field from the map type. 8993 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8994 MapperCGF.EmitBlock(MemberBB); 8995 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8996 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8997 OriMapType, 8998 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8999 llvm::BasicBlock *MemberCombineBB = 9000 MapperCGF.createBasicBlock("omp.member.combine"); 9001 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9002 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9003 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9004 // Add the number of pre-existing components to the MEMBER_OF field if it 9005 // is valid. 9006 MapperCGF.EmitBlock(MemberCombineBB); 9007 llvm::Value *CombinedMember = 9008 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9009 // Do nothing if it is not a member of previous components. 9010 MapperCGF.EmitBlock(TypeBB); 9011 llvm::PHINode *MemberMapType = 9012 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9013 MemberMapType->addIncoming(OriMapType, MemberBB); 9014 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9015 9016 // Combine the map type inherited from user-defined mapper with that 9017 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9018 // bits of the \a MapType, which is the input argument of the mapper 9019 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9020 // bits of MemberMapType. 9021 // [OpenMP 5.0], 1.2.6. map-type decay. 9022 // | alloc | to | from | tofrom | release | delete 9023 // ---------------------------------------------------------- 9024 // alloc | alloc | alloc | alloc | alloc | release | delete 9025 // to | alloc | to | alloc | to | release | delete 9026 // from | alloc | alloc | from | from | release | delete 9027 // tofrom | alloc | to | from | tofrom | release | delete 9028 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9029 MapType, 9030 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9031 MappableExprsHandler::OMP_MAP_FROM)); 9032 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9033 llvm::BasicBlock *AllocElseBB = 9034 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9035 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9036 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9037 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9038 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9039 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9040 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9041 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9042 MapperCGF.EmitBlock(AllocBB); 9043 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9044 MemberMapType, 9045 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9046 MappableExprsHandler::OMP_MAP_FROM))); 9047 MapperCGF.Builder.CreateBr(EndBB); 9048 MapperCGF.EmitBlock(AllocElseBB); 9049 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9050 LeftToFrom, 9051 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9052 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9053 // In case of to, clear OMP_MAP_FROM. 9054 MapperCGF.EmitBlock(ToBB); 9055 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9056 MemberMapType, 9057 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9058 MapperCGF.Builder.CreateBr(EndBB); 9059 MapperCGF.EmitBlock(ToElseBB); 9060 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9061 LeftToFrom, 9062 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9063 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9064 // In case of from, clear OMP_MAP_TO. 9065 MapperCGF.EmitBlock(FromBB); 9066 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9067 MemberMapType, 9068 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9069 // In case of tofrom, do nothing. 9070 MapperCGF.EmitBlock(EndBB); 9071 llvm::PHINode *CurMapType = 9072 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9073 CurMapType->addIncoming(AllocMapType, AllocBB); 9074 CurMapType->addIncoming(ToMapType, ToBB); 9075 CurMapType->addIncoming(FromMapType, FromBB); 9076 CurMapType->addIncoming(MemberMapType, ToElseBB); 9077 9078 // TODO: call the corresponding mapper function if a user-defined mapper is 9079 // associated with this map clause. 9080 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9081 // data structure. 9082 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9083 CurSizeArg, CurMapType}; 9084 MapperCGF.EmitRuntimeCall( 9085 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9086 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9087 OffloadingArgs); 9088 } 9089 9090 // Update the pointer to point to the next element that needs to be mapped, 9091 // and check whether we have mapped all elements. 9092 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9093 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9094 PtrPHI->addIncoming(PtrNext, BodyBB); 9095 llvm::Value *IsDone = 9096 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9097 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9098 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9099 9100 MapperCGF.EmitBlock(ExitBB); 9101 // Emit array deletion if this is an array section and \p MapType indicates 9102 // that deletion is required. 9103 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9104 ElementSize, DoneBB, /*IsInit=*/false); 9105 9106 // Emit the function exit block. 9107 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9108 MapperCGF.FinishFunction(); 9109 UDMMap.try_emplace(D, Fn); 9110 if (CGF) { 9111 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9112 Decls.second.push_back(D); 9113 } 9114 } 9115 9116 /// Emit the array initialization or deletion portion for user-defined mapper 9117 /// code generation. First, it evaluates whether an array section is mapped and 9118 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9119 /// true, and \a MapType indicates to not delete this array, array 9120 /// initialization code is generated. If \a IsInit is false, and \a MapType 9121 /// indicates to not this array, array deletion code is generated. 9122 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9123 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9124 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9125 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9126 StringRef Prefix = IsInit ? ".init" : ".del"; 9127 9128 // Evaluate if this is an array section. 9129 llvm::BasicBlock *IsDeleteBB = 9130 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9131 llvm::BasicBlock *BodyBB = 9132 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9133 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9134 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9135 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9136 9137 // Evaluate if we are going to delete this section. 9138 MapperCGF.EmitBlock(IsDeleteBB); 9139 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9140 MapType, 9141 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9142 llvm::Value *DeleteCond; 9143 if (IsInit) { 9144 DeleteCond = MapperCGF.Builder.CreateIsNull( 9145 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9146 } else { 9147 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9148 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9149 } 9150 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9151 9152 MapperCGF.EmitBlock(BodyBB); 9153 // Get the array size by multiplying element size and element number (i.e., \p 9154 // Size). 9155 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9156 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9157 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9158 // memory allocation/deletion purpose only. 9159 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9160 MapType, 9161 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9162 MappableExprsHandler::OMP_MAP_FROM))); 9163 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9164 // data structure. 9165 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9166 MapperCGF.EmitRuntimeCall( 9167 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9168 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9169 OffloadingArgs); 9170 } 9171 9172 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9173 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9174 llvm::Value *DeviceID, 9175 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9176 const OMPLoopDirective &D)> 9177 SizeEmitter) { 9178 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9179 const OMPExecutableDirective *TD = &D; 9180 // Get nested teams distribute kind directive, if any. 9181 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9182 TD = getNestedDistributeDirective(CGM.getContext(), D); 9183 if (!TD) 9184 return; 9185 const auto *LD = cast<OMPLoopDirective>(TD); 9186 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9187 PrePostActionTy &) { 9188 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9189 llvm::Value *Args[] = {DeviceID, NumIterations}; 9190 CGF.EmitRuntimeCall( 9191 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9192 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9193 Args); 9194 } 9195 }; 9196 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9197 } 9198 9199 void CGOpenMPRuntime::emitTargetCall( 9200 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9201 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9202 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9203 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9204 const OMPLoopDirective &D)> 9205 SizeEmitter) { 9206 if (!CGF.HaveInsertPoint()) 9207 return; 9208 9209 assert(OutlinedFn && "Invalid outlined function!"); 9210 9211 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9212 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9213 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9214 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9215 PrePostActionTy &) { 9216 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9217 }; 9218 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9219 9220 CodeGenFunction::OMPTargetDataInfo InputInfo; 9221 llvm::Value *MapTypesArray = nullptr; 9222 // Fill up the pointer arrays and transfer execution to the device. 9223 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9224 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9225 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9226 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9227 // Reverse offloading is not supported, so just execute on the host. 9228 if (RequiresOuterTask) { 9229 CapturedVars.clear(); 9230 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9231 } 9232 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9233 return; 9234 } 9235 9236 // On top of the arrays that were filled up, the target offloading call 9237 // takes as arguments the device id as well as the host pointer. The host 9238 // pointer is used by the runtime library to identify the current target 9239 // region, so it only has to be unique and not necessarily point to 9240 // anything. It could be the pointer to the outlined function that 9241 // implements the target region, but we aren't using that so that the 9242 // compiler doesn't need to keep that, and could therefore inline the host 9243 // function if proven worthwhile during optimization. 9244 9245 // From this point on, we need to have an ID of the target region defined. 9246 assert(OutlinedFnID && "Invalid outlined function ID!"); 9247 9248 // Emit device ID if any. 9249 llvm::Value *DeviceID; 9250 if (Device.getPointer()) { 9251 assert((Device.getInt() == OMPC_DEVICE_unknown || 9252 Device.getInt() == OMPC_DEVICE_device_num) && 9253 "Expected device_num modifier."); 9254 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9255 DeviceID = 9256 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9257 } else { 9258 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9259 } 9260 9261 // Emit the number of elements in the offloading arrays. 9262 llvm::Value *PointerNum = 9263 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9264 9265 // Return value of the runtime offloading call. 9266 llvm::Value *Return; 9267 9268 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9269 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9270 9271 // Emit tripcount for the target loop-based directive. 9272 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9273 9274 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9275 // The target region is an outlined function launched by the runtime 9276 // via calls __tgt_target() or __tgt_target_teams(). 9277 // 9278 // __tgt_target() launches a target region with one team and one thread, 9279 // executing a serial region. This master thread may in turn launch 9280 // more threads within its team upon encountering a parallel region, 9281 // however, no additional teams can be launched on the device. 9282 // 9283 // __tgt_target_teams() launches a target region with one or more teams, 9284 // each with one or more threads. This call is required for target 9285 // constructs such as: 9286 // 'target teams' 9287 // 'target' / 'teams' 9288 // 'target teams distribute parallel for' 9289 // 'target parallel' 9290 // and so on. 9291 // 9292 // Note that on the host and CPU targets, the runtime implementation of 9293 // these calls simply call the outlined function without forking threads. 9294 // The outlined functions themselves have runtime calls to 9295 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9296 // the compiler in emitTeamsCall() and emitParallelCall(). 9297 // 9298 // In contrast, on the NVPTX target, the implementation of 9299 // __tgt_target_teams() launches a GPU kernel with the requested number 9300 // of teams and threads so no additional calls to the runtime are required. 9301 if (NumTeams) { 9302 // If we have NumTeams defined this means that we have an enclosed teams 9303 // region. Therefore we also expect to have NumThreads defined. These two 9304 // values should be defined in the presence of a teams directive, 9305 // regardless of having any clauses associated. If the user is using teams 9306 // but no clauses, these two values will be the default that should be 9307 // passed to the runtime library - a 32-bit integer with the value zero. 9308 assert(NumThreads && "Thread limit expression should be available along " 9309 "with number of teams."); 9310 llvm::Value *OffloadingArgs[] = {DeviceID, 9311 OutlinedFnID, 9312 PointerNum, 9313 InputInfo.BasePointersArray.getPointer(), 9314 InputInfo.PointersArray.getPointer(), 9315 InputInfo.SizesArray.getPointer(), 9316 MapTypesArray, 9317 NumTeams, 9318 NumThreads}; 9319 Return = CGF.EmitRuntimeCall( 9320 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9321 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait 9322 : OMPRTL___tgt_target_teams), 9323 OffloadingArgs); 9324 } else { 9325 llvm::Value *OffloadingArgs[] = {DeviceID, 9326 OutlinedFnID, 9327 PointerNum, 9328 InputInfo.BasePointersArray.getPointer(), 9329 InputInfo.PointersArray.getPointer(), 9330 InputInfo.SizesArray.getPointer(), 9331 MapTypesArray}; 9332 Return = CGF.EmitRuntimeCall( 9333 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9334 CGM.getModule(), 9335 HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target), 9336 OffloadingArgs); 9337 } 9338 9339 // Check the error code and execute the host version if required. 9340 llvm::BasicBlock *OffloadFailedBlock = 9341 CGF.createBasicBlock("omp_offload.failed"); 9342 llvm::BasicBlock *OffloadContBlock = 9343 CGF.createBasicBlock("omp_offload.cont"); 9344 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9345 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9346 9347 CGF.EmitBlock(OffloadFailedBlock); 9348 if (RequiresOuterTask) { 9349 CapturedVars.clear(); 9350 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9351 } 9352 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9353 CGF.EmitBranch(OffloadContBlock); 9354 9355 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9356 }; 9357 9358 // Notify that the host version must be executed. 9359 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9360 RequiresOuterTask](CodeGenFunction &CGF, 9361 PrePostActionTy &) { 9362 if (RequiresOuterTask) { 9363 CapturedVars.clear(); 9364 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9365 } 9366 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9367 }; 9368 9369 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9370 &CapturedVars, RequiresOuterTask, 9371 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9372 // Fill up the arrays with all the captured variables. 9373 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9374 MappableExprsHandler::MapValuesArrayTy Pointers; 9375 MappableExprsHandler::MapValuesArrayTy Sizes; 9376 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9377 9378 // Get mappable expression information. 9379 MappableExprsHandler MEHandler(D, CGF); 9380 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9381 9382 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9383 auto CV = CapturedVars.begin(); 9384 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9385 CE = CS.capture_end(); 9386 CI != CE; ++CI, ++RI, ++CV) { 9387 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9388 MappableExprsHandler::MapValuesArrayTy CurPointers; 9389 MappableExprsHandler::MapValuesArrayTy CurSizes; 9390 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9391 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9392 9393 // VLA sizes are passed to the outlined region by copy and do not have map 9394 // information associated. 9395 if (CI->capturesVariableArrayType()) { 9396 CurBasePointers.push_back(*CV); 9397 CurPointers.push_back(*CV); 9398 CurSizes.push_back(CGF.Builder.CreateIntCast( 9399 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9400 // Copy to the device as an argument. No need to retrieve it. 9401 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9402 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9403 MappableExprsHandler::OMP_MAP_IMPLICIT); 9404 } else { 9405 // If we have any information in the map clause, we use it, otherwise we 9406 // just do a default mapping. 9407 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9408 CurSizes, CurMapTypes, PartialStruct); 9409 if (CurBasePointers.empty()) 9410 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9411 CurPointers, CurSizes, CurMapTypes); 9412 // Generate correct mapping for variables captured by reference in 9413 // lambdas. 9414 if (CI->capturesVariable()) 9415 MEHandler.generateInfoForLambdaCaptures( 9416 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9417 CurMapTypes, LambdaPointers); 9418 } 9419 // We expect to have at least an element of information for this capture. 9420 assert(!CurBasePointers.empty() && 9421 "Non-existing map pointer for capture!"); 9422 assert(CurBasePointers.size() == CurPointers.size() && 9423 CurBasePointers.size() == CurSizes.size() && 9424 CurBasePointers.size() == CurMapTypes.size() && 9425 "Inconsistent map information sizes!"); 9426 9427 // If there is an entry in PartialStruct it means we have a struct with 9428 // individual members mapped. Emit an extra combined entry. 9429 if (PartialStruct.Base.isValid()) 9430 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9431 CurMapTypes, PartialStruct); 9432 9433 // We need to append the results of this capture to what we already have. 9434 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9435 Pointers.append(CurPointers.begin(), CurPointers.end()); 9436 Sizes.append(CurSizes.begin(), CurSizes.end()); 9437 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9438 } 9439 // Adjust MEMBER_OF flags for the lambdas captures. 9440 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9441 Pointers, MapTypes); 9442 // Map other list items in the map clause which are not captured variables 9443 // but "declare target link" global variables. 9444 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9445 MapTypes); 9446 9447 TargetDataInfo Info; 9448 // Fill up the arrays and create the arguments. 9449 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9450 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9451 Info.PointersArray, Info.SizesArray, 9452 Info.MapTypesArray, Info); 9453 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9454 InputInfo.BasePointersArray = 9455 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9456 InputInfo.PointersArray = 9457 Address(Info.PointersArray, CGM.getPointerAlign()); 9458 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9459 MapTypesArray = Info.MapTypesArray; 9460 if (RequiresOuterTask) 9461 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9462 else 9463 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9464 }; 9465 9466 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9467 CodeGenFunction &CGF, PrePostActionTy &) { 9468 if (RequiresOuterTask) { 9469 CodeGenFunction::OMPTargetDataInfo InputInfo; 9470 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9471 } else { 9472 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9473 } 9474 }; 9475 9476 // If we have a target function ID it means that we need to support 9477 // offloading, otherwise, just execute on the host. We need to execute on host 9478 // regardless of the conditional in the if clause if, e.g., the user do not 9479 // specify target triples. 9480 if (OutlinedFnID) { 9481 if (IfCond) { 9482 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9483 } else { 9484 RegionCodeGenTy ThenRCG(TargetThenGen); 9485 ThenRCG(CGF); 9486 } 9487 } else { 9488 RegionCodeGenTy ElseRCG(TargetElseGen); 9489 ElseRCG(CGF); 9490 } 9491 } 9492 9493 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9494 StringRef ParentName) { 9495 if (!S) 9496 return; 9497 9498 // Codegen OMP target directives that offload compute to the device. 9499 bool RequiresDeviceCodegen = 9500 isa<OMPExecutableDirective>(S) && 9501 isOpenMPTargetExecutionDirective( 9502 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9503 9504 if (RequiresDeviceCodegen) { 9505 const auto &E = *cast<OMPExecutableDirective>(S); 9506 unsigned DeviceID; 9507 unsigned FileID; 9508 unsigned Line; 9509 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9510 FileID, Line); 9511 9512 // Is this a target region that should not be emitted as an entry point? If 9513 // so just signal we are done with this target region. 9514 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9515 ParentName, Line)) 9516 return; 9517 9518 switch (E.getDirectiveKind()) { 9519 case OMPD_target: 9520 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9521 cast<OMPTargetDirective>(E)); 9522 break; 9523 case OMPD_target_parallel: 9524 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9525 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9526 break; 9527 case OMPD_target_teams: 9528 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9529 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9530 break; 9531 case OMPD_target_teams_distribute: 9532 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9533 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9534 break; 9535 case OMPD_target_teams_distribute_simd: 9536 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9537 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9538 break; 9539 case OMPD_target_parallel_for: 9540 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9541 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9542 break; 9543 case OMPD_target_parallel_for_simd: 9544 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9545 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9546 break; 9547 case OMPD_target_simd: 9548 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9549 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9550 break; 9551 case OMPD_target_teams_distribute_parallel_for: 9552 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9553 CGM, ParentName, 9554 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9555 break; 9556 case OMPD_target_teams_distribute_parallel_for_simd: 9557 CodeGenFunction:: 9558 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9559 CGM, ParentName, 9560 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9561 break; 9562 case OMPD_parallel: 9563 case OMPD_for: 9564 case OMPD_parallel_for: 9565 case OMPD_parallel_master: 9566 case OMPD_parallel_sections: 9567 case OMPD_for_simd: 9568 case OMPD_parallel_for_simd: 9569 case OMPD_cancel: 9570 case OMPD_cancellation_point: 9571 case OMPD_ordered: 9572 case OMPD_threadprivate: 9573 case OMPD_allocate: 9574 case OMPD_task: 9575 case OMPD_simd: 9576 case OMPD_sections: 9577 case OMPD_section: 9578 case OMPD_single: 9579 case OMPD_master: 9580 case OMPD_critical: 9581 case OMPD_taskyield: 9582 case OMPD_barrier: 9583 case OMPD_taskwait: 9584 case OMPD_taskgroup: 9585 case OMPD_atomic: 9586 case OMPD_flush: 9587 case OMPD_depobj: 9588 case OMPD_scan: 9589 case OMPD_teams: 9590 case OMPD_target_data: 9591 case OMPD_target_exit_data: 9592 case OMPD_target_enter_data: 9593 case OMPD_distribute: 9594 case OMPD_distribute_simd: 9595 case OMPD_distribute_parallel_for: 9596 case OMPD_distribute_parallel_for_simd: 9597 case OMPD_teams_distribute: 9598 case OMPD_teams_distribute_simd: 9599 case OMPD_teams_distribute_parallel_for: 9600 case OMPD_teams_distribute_parallel_for_simd: 9601 case OMPD_target_update: 9602 case OMPD_declare_simd: 9603 case OMPD_declare_variant: 9604 case OMPD_begin_declare_variant: 9605 case OMPD_end_declare_variant: 9606 case OMPD_declare_target: 9607 case OMPD_end_declare_target: 9608 case OMPD_declare_reduction: 9609 case OMPD_declare_mapper: 9610 case OMPD_taskloop: 9611 case OMPD_taskloop_simd: 9612 case OMPD_master_taskloop: 9613 case OMPD_master_taskloop_simd: 9614 case OMPD_parallel_master_taskloop: 9615 case OMPD_parallel_master_taskloop_simd: 9616 case OMPD_requires: 9617 case OMPD_unknown: 9618 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9619 } 9620 return; 9621 } 9622 9623 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9624 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9625 return; 9626 9627 scanForTargetRegionsFunctions( 9628 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9629 return; 9630 } 9631 9632 // If this is a lambda function, look into its body. 9633 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9634 S = L->getBody(); 9635 9636 // Keep looking for target regions recursively. 9637 for (const Stmt *II : S->children()) 9638 scanForTargetRegionsFunctions(II, ParentName); 9639 } 9640 9641 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9642 // If emitting code for the host, we do not process FD here. Instead we do 9643 // the normal code generation. 9644 if (!CGM.getLangOpts().OpenMPIsDevice) { 9645 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9646 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9647 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9648 // Do not emit device_type(nohost) functions for the host. 9649 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9650 return true; 9651 } 9652 return false; 9653 } 9654 9655 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9656 // Try to detect target regions in the function. 9657 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9658 StringRef Name = CGM.getMangledName(GD); 9659 scanForTargetRegionsFunctions(FD->getBody(), Name); 9660 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9661 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9662 // Do not emit device_type(nohost) functions for the host. 9663 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9664 return true; 9665 } 9666 9667 // Do not to emit function if it is not marked as declare target. 9668 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9669 AlreadyEmittedTargetDecls.count(VD) == 0; 9670 } 9671 9672 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9673 if (!CGM.getLangOpts().OpenMPIsDevice) 9674 return false; 9675 9676 // Check if there are Ctors/Dtors in this declaration and look for target 9677 // regions in it. We use the complete variant to produce the kernel name 9678 // mangling. 9679 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9680 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9681 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9682 StringRef ParentName = 9683 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9684 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9685 } 9686 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9687 StringRef ParentName = 9688 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9689 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9690 } 9691 } 9692 9693 // Do not to emit variable if it is not marked as declare target. 9694 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9695 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9696 cast<VarDecl>(GD.getDecl())); 9697 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9698 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9699 HasRequiresUnifiedSharedMemory)) { 9700 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9701 return true; 9702 } 9703 return false; 9704 } 9705 9706 llvm::Constant * 9707 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9708 const VarDecl *VD) { 9709 assert(VD->getType().isConstant(CGM.getContext()) && 9710 "Expected constant variable."); 9711 StringRef VarName; 9712 llvm::Constant *Addr; 9713 llvm::GlobalValue::LinkageTypes Linkage; 9714 QualType Ty = VD->getType(); 9715 SmallString<128> Buffer; 9716 { 9717 unsigned DeviceID; 9718 unsigned FileID; 9719 unsigned Line; 9720 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9721 FileID, Line); 9722 llvm::raw_svector_ostream OS(Buffer); 9723 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9724 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9725 VarName = OS.str(); 9726 } 9727 Linkage = llvm::GlobalValue::InternalLinkage; 9728 Addr = 9729 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9730 getDefaultFirstprivateAddressSpace()); 9731 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9732 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9733 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9734 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9735 VarName, Addr, VarSize, 9736 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9737 return Addr; 9738 } 9739 9740 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9741 llvm::Constant *Addr) { 9742 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9743 !CGM.getLangOpts().OpenMPIsDevice) 9744 return; 9745 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9746 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9747 if (!Res) { 9748 if (CGM.getLangOpts().OpenMPIsDevice) { 9749 // Register non-target variables being emitted in device code (debug info 9750 // may cause this). 9751 StringRef VarName = CGM.getMangledName(VD); 9752 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9753 } 9754 return; 9755 } 9756 // Register declare target variables. 9757 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9758 StringRef VarName; 9759 CharUnits VarSize; 9760 llvm::GlobalValue::LinkageTypes Linkage; 9761 9762 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9763 !HasRequiresUnifiedSharedMemory) { 9764 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9765 VarName = CGM.getMangledName(VD); 9766 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9767 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9768 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9769 } else { 9770 VarSize = CharUnits::Zero(); 9771 } 9772 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9773 // Temp solution to prevent optimizations of the internal variables. 9774 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9775 std::string RefName = getName({VarName, "ref"}); 9776 if (!CGM.GetGlobalValue(RefName)) { 9777 llvm::Constant *AddrRef = 9778 getOrCreateInternalVariable(Addr->getType(), RefName); 9779 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9780 GVAddrRef->setConstant(/*Val=*/true); 9781 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9782 GVAddrRef->setInitializer(Addr); 9783 CGM.addCompilerUsedGlobal(GVAddrRef); 9784 } 9785 } 9786 } else { 9787 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9788 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9789 HasRequiresUnifiedSharedMemory)) && 9790 "Declare target attribute must link or to with unified memory."); 9791 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9792 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9793 else 9794 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9795 9796 if (CGM.getLangOpts().OpenMPIsDevice) { 9797 VarName = Addr->getName(); 9798 Addr = nullptr; 9799 } else { 9800 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9801 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9802 } 9803 VarSize = CGM.getPointerSize(); 9804 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9805 } 9806 9807 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9808 VarName, Addr, VarSize, Flags, Linkage); 9809 } 9810 9811 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9812 if (isa<FunctionDecl>(GD.getDecl()) || 9813 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9814 return emitTargetFunctions(GD); 9815 9816 return emitTargetGlobalVariable(GD); 9817 } 9818 9819 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9820 for (const VarDecl *VD : DeferredGlobalVariables) { 9821 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9822 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9823 if (!Res) 9824 continue; 9825 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9826 !HasRequiresUnifiedSharedMemory) { 9827 CGM.EmitGlobal(VD); 9828 } else { 9829 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9830 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9831 HasRequiresUnifiedSharedMemory)) && 9832 "Expected link clause or to clause with unified memory."); 9833 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9834 } 9835 } 9836 } 9837 9838 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9839 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9840 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9841 " Expected target-based directive."); 9842 } 9843 9844 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9845 for (const OMPClause *Clause : D->clauselists()) { 9846 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9847 HasRequiresUnifiedSharedMemory = true; 9848 } else if (const auto *AC = 9849 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9850 switch (AC->getAtomicDefaultMemOrderKind()) { 9851 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9852 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9853 break; 9854 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9855 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 9856 break; 9857 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 9858 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 9859 break; 9860 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 9861 break; 9862 } 9863 } 9864 } 9865 } 9866 9867 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 9868 return RequiresAtomicOrdering; 9869 } 9870 9871 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9872 LangAS &AS) { 9873 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9874 return false; 9875 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9876 switch(A->getAllocatorType()) { 9877 case OMPAllocateDeclAttr::OMPNullMemAlloc: 9878 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9879 // Not supported, fallback to the default mem space. 9880 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9881 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9882 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9883 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9884 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9885 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9886 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9887 AS = LangAS::Default; 9888 return true; 9889 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9890 llvm_unreachable("Expected predefined allocator for the variables with the " 9891 "static storage."); 9892 } 9893 return false; 9894 } 9895 9896 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9897 return HasRequiresUnifiedSharedMemory; 9898 } 9899 9900 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9901 CodeGenModule &CGM) 9902 : CGM(CGM) { 9903 if (CGM.getLangOpts().OpenMPIsDevice) { 9904 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9905 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9906 } 9907 } 9908 9909 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9910 if (CGM.getLangOpts().OpenMPIsDevice) 9911 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9912 } 9913 9914 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9915 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9916 return true; 9917 9918 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9919 // Do not to emit function if it is marked as declare target as it was already 9920 // emitted. 9921 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9922 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 9923 if (auto *F = dyn_cast_or_null<llvm::Function>( 9924 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 9925 return !F->isDeclaration(); 9926 return false; 9927 } 9928 return true; 9929 } 9930 9931 return !AlreadyEmittedTargetDecls.insert(D).second; 9932 } 9933 9934 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9935 // If we don't have entries or if we are emitting code for the device, we 9936 // don't need to do anything. 9937 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9938 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9939 (OffloadEntriesInfoManager.empty() && 9940 !HasEmittedDeclareTargetRegion && 9941 !HasEmittedTargetRegion)) 9942 return nullptr; 9943 9944 // Create and register the function that handles the requires directives. 9945 ASTContext &C = CGM.getContext(); 9946 9947 llvm::Function *RequiresRegFn; 9948 { 9949 CodeGenFunction CGF(CGM); 9950 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9951 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9952 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9953 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9954 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9955 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9956 // TODO: check for other requires clauses. 9957 // The requires directive takes effect only when a target region is 9958 // present in the compilation unit. Otherwise it is ignored and not 9959 // passed to the runtime. This avoids the runtime from throwing an error 9960 // for mismatching requires clauses across compilation units that don't 9961 // contain at least 1 target region. 9962 assert((HasEmittedTargetRegion || 9963 HasEmittedDeclareTargetRegion || 9964 !OffloadEntriesInfoManager.empty()) && 9965 "Target or declare target region expected."); 9966 if (HasRequiresUnifiedSharedMemory) 9967 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9968 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9969 CGM.getModule(), OMPRTL___tgt_register_requires), 9970 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9971 CGF.FinishFunction(); 9972 } 9973 return RequiresRegFn; 9974 } 9975 9976 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9977 const OMPExecutableDirective &D, 9978 SourceLocation Loc, 9979 llvm::Function *OutlinedFn, 9980 ArrayRef<llvm::Value *> CapturedVars) { 9981 if (!CGF.HaveInsertPoint()) 9982 return; 9983 9984 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9985 CodeGenFunction::RunCleanupsScope Scope(CGF); 9986 9987 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9988 llvm::Value *Args[] = { 9989 RTLoc, 9990 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9991 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9992 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9993 RealArgs.append(std::begin(Args), std::end(Args)); 9994 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9995 9996 llvm::FunctionCallee RTLFn = 9997 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9998 CGM.getModule(), OMPRTL___kmpc_fork_teams); 9999 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10000 } 10001 10002 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10003 const Expr *NumTeams, 10004 const Expr *ThreadLimit, 10005 SourceLocation Loc) { 10006 if (!CGF.HaveInsertPoint()) 10007 return; 10008 10009 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10010 10011 llvm::Value *NumTeamsVal = 10012 NumTeams 10013 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10014 CGF.CGM.Int32Ty, /* isSigned = */ true) 10015 : CGF.Builder.getInt32(0); 10016 10017 llvm::Value *ThreadLimitVal = 10018 ThreadLimit 10019 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10020 CGF.CGM.Int32Ty, /* isSigned = */ true) 10021 : CGF.Builder.getInt32(0); 10022 10023 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10024 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10025 ThreadLimitVal}; 10026 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10027 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10028 PushNumTeamsArgs); 10029 } 10030 10031 void CGOpenMPRuntime::emitTargetDataCalls( 10032 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10033 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10034 if (!CGF.HaveInsertPoint()) 10035 return; 10036 10037 // Action used to replace the default codegen action and turn privatization 10038 // off. 10039 PrePostActionTy NoPrivAction; 10040 10041 // Generate the code for the opening of the data environment. Capture all the 10042 // arguments of the runtime call by reference because they are used in the 10043 // closing of the region. 10044 auto &&BeginThenGen = [this, &D, Device, &Info, 10045 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10046 // Fill up the arrays with all the mapped variables. 10047 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10048 MappableExprsHandler::MapValuesArrayTy Pointers; 10049 MappableExprsHandler::MapValuesArrayTy Sizes; 10050 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10051 10052 // Get map clause information. 10053 MappableExprsHandler MCHandler(D, CGF); 10054 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10055 10056 // Fill up the arrays and create the arguments. 10057 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10058 10059 llvm::Value *BasePointersArrayArg = nullptr; 10060 llvm::Value *PointersArrayArg = nullptr; 10061 llvm::Value *SizesArrayArg = nullptr; 10062 llvm::Value *MapTypesArrayArg = nullptr; 10063 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10064 SizesArrayArg, MapTypesArrayArg, Info); 10065 10066 // Emit device ID if any. 10067 llvm::Value *DeviceID = nullptr; 10068 if (Device) { 10069 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10070 CGF.Int64Ty, /*isSigned=*/true); 10071 } else { 10072 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10073 } 10074 10075 // Emit the number of elements in the offloading arrays. 10076 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10077 10078 llvm::Value *OffloadingArgs[] = { 10079 DeviceID, PointerNum, BasePointersArrayArg, 10080 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10081 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10082 CGM.getModule(), OMPRTL___tgt_target_data_begin), 10083 OffloadingArgs); 10084 10085 // If device pointer privatization is required, emit the body of the region 10086 // here. It will have to be duplicated: with and without privatization. 10087 if (!Info.CaptureDeviceAddrMap.empty()) 10088 CodeGen(CGF); 10089 }; 10090 10091 // Generate code for the closing of the data region. 10092 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10093 PrePostActionTy &) { 10094 assert(Info.isValid() && "Invalid data environment closing arguments."); 10095 10096 llvm::Value *BasePointersArrayArg = nullptr; 10097 llvm::Value *PointersArrayArg = nullptr; 10098 llvm::Value *SizesArrayArg = nullptr; 10099 llvm::Value *MapTypesArrayArg = nullptr; 10100 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10101 SizesArrayArg, MapTypesArrayArg, Info); 10102 10103 // Emit device ID if any. 10104 llvm::Value *DeviceID = nullptr; 10105 if (Device) { 10106 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10107 CGF.Int64Ty, /*isSigned=*/true); 10108 } else { 10109 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10110 } 10111 10112 // Emit the number of elements in the offloading arrays. 10113 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10114 10115 llvm::Value *OffloadingArgs[] = { 10116 DeviceID, PointerNum, BasePointersArrayArg, 10117 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10118 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10119 CGM.getModule(), OMPRTL___tgt_target_data_end), 10120 OffloadingArgs); 10121 }; 10122 10123 // If we need device pointer privatization, we need to emit the body of the 10124 // region with no privatization in the 'else' branch of the conditional. 10125 // Otherwise, we don't have to do anything. 10126 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10127 PrePostActionTy &) { 10128 if (!Info.CaptureDeviceAddrMap.empty()) { 10129 CodeGen.setAction(NoPrivAction); 10130 CodeGen(CGF); 10131 } 10132 }; 10133 10134 // We don't have to do anything to close the region if the if clause evaluates 10135 // to false. 10136 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10137 10138 if (IfCond) { 10139 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10140 } else { 10141 RegionCodeGenTy RCG(BeginThenGen); 10142 RCG(CGF); 10143 } 10144 10145 // If we don't require privatization of device pointers, we emit the body in 10146 // between the runtime calls. This avoids duplicating the body code. 10147 if (Info.CaptureDeviceAddrMap.empty()) { 10148 CodeGen.setAction(NoPrivAction); 10149 CodeGen(CGF); 10150 } 10151 10152 if (IfCond) { 10153 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10154 } else { 10155 RegionCodeGenTy RCG(EndThenGen); 10156 RCG(CGF); 10157 } 10158 } 10159 10160 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10161 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10162 const Expr *Device) { 10163 if (!CGF.HaveInsertPoint()) 10164 return; 10165 10166 assert((isa<OMPTargetEnterDataDirective>(D) || 10167 isa<OMPTargetExitDataDirective>(D) || 10168 isa<OMPTargetUpdateDirective>(D)) && 10169 "Expecting either target enter, exit data, or update directives."); 10170 10171 CodeGenFunction::OMPTargetDataInfo InputInfo; 10172 llvm::Value *MapTypesArray = nullptr; 10173 // Generate the code for the opening of the data environment. 10174 auto &&ThenGen = [this, &D, Device, &InputInfo, 10175 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10176 // Emit device ID if any. 10177 llvm::Value *DeviceID = nullptr; 10178 if (Device) { 10179 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10180 CGF.Int64Ty, /*isSigned=*/true); 10181 } else { 10182 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10183 } 10184 10185 // Emit the number of elements in the offloading arrays. 10186 llvm::Constant *PointerNum = 10187 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10188 10189 llvm::Value *OffloadingArgs[] = {DeviceID, 10190 PointerNum, 10191 InputInfo.BasePointersArray.getPointer(), 10192 InputInfo.PointersArray.getPointer(), 10193 InputInfo.SizesArray.getPointer(), 10194 MapTypesArray}; 10195 10196 // Select the right runtime function call for each expected standalone 10197 // directive. 10198 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10199 RuntimeFunction RTLFn; 10200 switch (D.getDirectiveKind()) { 10201 case OMPD_target_enter_data: 10202 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait 10203 : OMPRTL___tgt_target_data_begin; 10204 break; 10205 case OMPD_target_exit_data: 10206 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait 10207 : OMPRTL___tgt_target_data_end; 10208 break; 10209 case OMPD_target_update: 10210 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait 10211 : OMPRTL___tgt_target_data_update; 10212 break; 10213 case OMPD_parallel: 10214 case OMPD_for: 10215 case OMPD_parallel_for: 10216 case OMPD_parallel_master: 10217 case OMPD_parallel_sections: 10218 case OMPD_for_simd: 10219 case OMPD_parallel_for_simd: 10220 case OMPD_cancel: 10221 case OMPD_cancellation_point: 10222 case OMPD_ordered: 10223 case OMPD_threadprivate: 10224 case OMPD_allocate: 10225 case OMPD_task: 10226 case OMPD_simd: 10227 case OMPD_sections: 10228 case OMPD_section: 10229 case OMPD_single: 10230 case OMPD_master: 10231 case OMPD_critical: 10232 case OMPD_taskyield: 10233 case OMPD_barrier: 10234 case OMPD_taskwait: 10235 case OMPD_taskgroup: 10236 case OMPD_atomic: 10237 case OMPD_flush: 10238 case OMPD_depobj: 10239 case OMPD_scan: 10240 case OMPD_teams: 10241 case OMPD_target_data: 10242 case OMPD_distribute: 10243 case OMPD_distribute_simd: 10244 case OMPD_distribute_parallel_for: 10245 case OMPD_distribute_parallel_for_simd: 10246 case OMPD_teams_distribute: 10247 case OMPD_teams_distribute_simd: 10248 case OMPD_teams_distribute_parallel_for: 10249 case OMPD_teams_distribute_parallel_for_simd: 10250 case OMPD_declare_simd: 10251 case OMPD_declare_variant: 10252 case OMPD_begin_declare_variant: 10253 case OMPD_end_declare_variant: 10254 case OMPD_declare_target: 10255 case OMPD_end_declare_target: 10256 case OMPD_declare_reduction: 10257 case OMPD_declare_mapper: 10258 case OMPD_taskloop: 10259 case OMPD_taskloop_simd: 10260 case OMPD_master_taskloop: 10261 case OMPD_master_taskloop_simd: 10262 case OMPD_parallel_master_taskloop: 10263 case OMPD_parallel_master_taskloop_simd: 10264 case OMPD_target: 10265 case OMPD_target_simd: 10266 case OMPD_target_teams_distribute: 10267 case OMPD_target_teams_distribute_simd: 10268 case OMPD_target_teams_distribute_parallel_for: 10269 case OMPD_target_teams_distribute_parallel_for_simd: 10270 case OMPD_target_teams: 10271 case OMPD_target_parallel: 10272 case OMPD_target_parallel_for: 10273 case OMPD_target_parallel_for_simd: 10274 case OMPD_requires: 10275 case OMPD_unknown: 10276 llvm_unreachable("Unexpected standalone target data directive."); 10277 break; 10278 } 10279 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10280 CGM.getModule(), RTLFn), 10281 OffloadingArgs); 10282 }; 10283 10284 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10285 CodeGenFunction &CGF, PrePostActionTy &) { 10286 // Fill up the arrays with all the mapped variables. 10287 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10288 MappableExprsHandler::MapValuesArrayTy Pointers; 10289 MappableExprsHandler::MapValuesArrayTy Sizes; 10290 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10291 10292 // Get map clause information. 10293 MappableExprsHandler MEHandler(D, CGF); 10294 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10295 10296 TargetDataInfo Info; 10297 // Fill up the arrays and create the arguments. 10298 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10299 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10300 Info.PointersArray, Info.SizesArray, 10301 Info.MapTypesArray, Info); 10302 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10303 InputInfo.BasePointersArray = 10304 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10305 InputInfo.PointersArray = 10306 Address(Info.PointersArray, CGM.getPointerAlign()); 10307 InputInfo.SizesArray = 10308 Address(Info.SizesArray, CGM.getPointerAlign()); 10309 MapTypesArray = Info.MapTypesArray; 10310 if (D.hasClausesOfKind<OMPDependClause>()) 10311 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10312 else 10313 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10314 }; 10315 10316 if (IfCond) { 10317 emitIfClause(CGF, IfCond, TargetThenGen, 10318 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10319 } else { 10320 RegionCodeGenTy ThenRCG(TargetThenGen); 10321 ThenRCG(CGF); 10322 } 10323 } 10324 10325 namespace { 10326 /// Kind of parameter in a function with 'declare simd' directive. 10327 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10328 /// Attribute set of the parameter. 10329 struct ParamAttrTy { 10330 ParamKindTy Kind = Vector; 10331 llvm::APSInt StrideOrArg; 10332 llvm::APSInt Alignment; 10333 }; 10334 } // namespace 10335 10336 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10337 ArrayRef<ParamAttrTy> ParamAttrs) { 10338 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10339 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10340 // of that clause. The VLEN value must be power of 2. 10341 // In other case the notion of the function`s "characteristic data type" (CDT) 10342 // is used to compute the vector length. 10343 // CDT is defined in the following order: 10344 // a) For non-void function, the CDT is the return type. 10345 // b) If the function has any non-uniform, non-linear parameters, then the 10346 // CDT is the type of the first such parameter. 10347 // c) If the CDT determined by a) or b) above is struct, union, or class 10348 // type which is pass-by-value (except for the type that maps to the 10349 // built-in complex data type), the characteristic data type is int. 10350 // d) If none of the above three cases is applicable, the CDT is int. 10351 // The VLEN is then determined based on the CDT and the size of vector 10352 // register of that ISA for which current vector version is generated. The 10353 // VLEN is computed using the formula below: 10354 // VLEN = sizeof(vector_register) / sizeof(CDT), 10355 // where vector register size specified in section 3.2.1 Registers and the 10356 // Stack Frame of original AMD64 ABI document. 10357 QualType RetType = FD->getReturnType(); 10358 if (RetType.isNull()) 10359 return 0; 10360 ASTContext &C = FD->getASTContext(); 10361 QualType CDT; 10362 if (!RetType.isNull() && !RetType->isVoidType()) { 10363 CDT = RetType; 10364 } else { 10365 unsigned Offset = 0; 10366 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10367 if (ParamAttrs[Offset].Kind == Vector) 10368 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10369 ++Offset; 10370 } 10371 if (CDT.isNull()) { 10372 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10373 if (ParamAttrs[I + Offset].Kind == Vector) { 10374 CDT = FD->getParamDecl(I)->getType(); 10375 break; 10376 } 10377 } 10378 } 10379 } 10380 if (CDT.isNull()) 10381 CDT = C.IntTy; 10382 CDT = CDT->getCanonicalTypeUnqualified(); 10383 if (CDT->isRecordType() || CDT->isUnionType()) 10384 CDT = C.IntTy; 10385 return C.getTypeSize(CDT); 10386 } 10387 10388 static void 10389 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10390 const llvm::APSInt &VLENVal, 10391 ArrayRef<ParamAttrTy> ParamAttrs, 10392 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10393 struct ISADataTy { 10394 char ISA; 10395 unsigned VecRegSize; 10396 }; 10397 ISADataTy ISAData[] = { 10398 { 10399 'b', 128 10400 }, // SSE 10401 { 10402 'c', 256 10403 }, // AVX 10404 { 10405 'd', 256 10406 }, // AVX2 10407 { 10408 'e', 512 10409 }, // AVX512 10410 }; 10411 llvm::SmallVector<char, 2> Masked; 10412 switch (State) { 10413 case OMPDeclareSimdDeclAttr::BS_Undefined: 10414 Masked.push_back('N'); 10415 Masked.push_back('M'); 10416 break; 10417 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10418 Masked.push_back('N'); 10419 break; 10420 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10421 Masked.push_back('M'); 10422 break; 10423 } 10424 for (char Mask : Masked) { 10425 for (const ISADataTy &Data : ISAData) { 10426 SmallString<256> Buffer; 10427 llvm::raw_svector_ostream Out(Buffer); 10428 Out << "_ZGV" << Data.ISA << Mask; 10429 if (!VLENVal) { 10430 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10431 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10432 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10433 } else { 10434 Out << VLENVal; 10435 } 10436 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10437 switch (ParamAttr.Kind){ 10438 case LinearWithVarStride: 10439 Out << 's' << ParamAttr.StrideOrArg; 10440 break; 10441 case Linear: 10442 Out << 'l'; 10443 if (ParamAttr.StrideOrArg != 1) 10444 Out << ParamAttr.StrideOrArg; 10445 break; 10446 case Uniform: 10447 Out << 'u'; 10448 break; 10449 case Vector: 10450 Out << 'v'; 10451 break; 10452 } 10453 if (!!ParamAttr.Alignment) 10454 Out << 'a' << ParamAttr.Alignment; 10455 } 10456 Out << '_' << Fn->getName(); 10457 Fn->addFnAttr(Out.str()); 10458 } 10459 } 10460 } 10461 10462 // This are the Functions that are needed to mangle the name of the 10463 // vector functions generated by the compiler, according to the rules 10464 // defined in the "Vector Function ABI specifications for AArch64", 10465 // available at 10466 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10467 10468 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10469 /// 10470 /// TODO: Need to implement the behavior for reference marked with a 10471 /// var or no linear modifiers (1.b in the section). For this, we 10472 /// need to extend ParamKindTy to support the linear modifiers. 10473 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10474 QT = QT.getCanonicalType(); 10475 10476 if (QT->isVoidType()) 10477 return false; 10478 10479 if (Kind == ParamKindTy::Uniform) 10480 return false; 10481 10482 if (Kind == ParamKindTy::Linear) 10483 return false; 10484 10485 // TODO: Handle linear references with modifiers 10486 10487 if (Kind == ParamKindTy::LinearWithVarStride) 10488 return false; 10489 10490 return true; 10491 } 10492 10493 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10494 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10495 QT = QT.getCanonicalType(); 10496 unsigned Size = C.getTypeSize(QT); 10497 10498 // Only scalars and complex within 16 bytes wide set PVB to true. 10499 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10500 return false; 10501 10502 if (QT->isFloatingType()) 10503 return true; 10504 10505 if (QT->isIntegerType()) 10506 return true; 10507 10508 if (QT->isPointerType()) 10509 return true; 10510 10511 // TODO: Add support for complex types (section 3.1.2, item 2). 10512 10513 return false; 10514 } 10515 10516 /// Computes the lane size (LS) of a return type or of an input parameter, 10517 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10518 /// TODO: Add support for references, section 3.2.1, item 1. 10519 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10520 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10521 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10522 if (getAArch64PBV(PTy, C)) 10523 return C.getTypeSize(PTy); 10524 } 10525 if (getAArch64PBV(QT, C)) 10526 return C.getTypeSize(QT); 10527 10528 return C.getTypeSize(C.getUIntPtrType()); 10529 } 10530 10531 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10532 // signature of the scalar function, as defined in 3.2.2 of the 10533 // AAVFABI. 10534 static std::tuple<unsigned, unsigned, bool> 10535 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10536 QualType RetType = FD->getReturnType().getCanonicalType(); 10537 10538 ASTContext &C = FD->getASTContext(); 10539 10540 bool OutputBecomesInput = false; 10541 10542 llvm::SmallVector<unsigned, 8> Sizes; 10543 if (!RetType->isVoidType()) { 10544 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10545 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10546 OutputBecomesInput = true; 10547 } 10548 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10549 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10550 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10551 } 10552 10553 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10554 // The LS of a function parameter / return value can only be a power 10555 // of 2, starting from 8 bits, up to 128. 10556 assert(std::all_of(Sizes.begin(), Sizes.end(), 10557 [](unsigned Size) { 10558 return Size == 8 || Size == 16 || Size == 32 || 10559 Size == 64 || Size == 128; 10560 }) && 10561 "Invalid size"); 10562 10563 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10564 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10565 OutputBecomesInput); 10566 } 10567 10568 /// Mangle the parameter part of the vector function name according to 10569 /// their OpenMP classification. The mangling function is defined in 10570 /// section 3.5 of the AAVFABI. 10571 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10572 SmallString<256> Buffer; 10573 llvm::raw_svector_ostream Out(Buffer); 10574 for (const auto &ParamAttr : ParamAttrs) { 10575 switch (ParamAttr.Kind) { 10576 case LinearWithVarStride: 10577 Out << "ls" << ParamAttr.StrideOrArg; 10578 break; 10579 case Linear: 10580 Out << 'l'; 10581 // Don't print the step value if it is not present or if it is 10582 // equal to 1. 10583 if (ParamAttr.StrideOrArg != 1) 10584 Out << ParamAttr.StrideOrArg; 10585 break; 10586 case Uniform: 10587 Out << 'u'; 10588 break; 10589 case Vector: 10590 Out << 'v'; 10591 break; 10592 } 10593 10594 if (!!ParamAttr.Alignment) 10595 Out << 'a' << ParamAttr.Alignment; 10596 } 10597 10598 return std::string(Out.str()); 10599 } 10600 10601 // Function used to add the attribute. The parameter `VLEN` is 10602 // templated to allow the use of "x" when targeting scalable functions 10603 // for SVE. 10604 template <typename T> 10605 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10606 char ISA, StringRef ParSeq, 10607 StringRef MangledName, bool OutputBecomesInput, 10608 llvm::Function *Fn) { 10609 SmallString<256> Buffer; 10610 llvm::raw_svector_ostream Out(Buffer); 10611 Out << Prefix << ISA << LMask << VLEN; 10612 if (OutputBecomesInput) 10613 Out << "v"; 10614 Out << ParSeq << "_" << MangledName; 10615 Fn->addFnAttr(Out.str()); 10616 } 10617 10618 // Helper function to generate the Advanced SIMD names depending on 10619 // the value of the NDS when simdlen is not present. 10620 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10621 StringRef Prefix, char ISA, 10622 StringRef ParSeq, StringRef MangledName, 10623 bool OutputBecomesInput, 10624 llvm::Function *Fn) { 10625 switch (NDS) { 10626 case 8: 10627 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10628 OutputBecomesInput, Fn); 10629 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10630 OutputBecomesInput, Fn); 10631 break; 10632 case 16: 10633 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10634 OutputBecomesInput, Fn); 10635 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10636 OutputBecomesInput, Fn); 10637 break; 10638 case 32: 10639 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10640 OutputBecomesInput, Fn); 10641 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10642 OutputBecomesInput, Fn); 10643 break; 10644 case 64: 10645 case 128: 10646 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10647 OutputBecomesInput, Fn); 10648 break; 10649 default: 10650 llvm_unreachable("Scalar type is too wide."); 10651 } 10652 } 10653 10654 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10655 static void emitAArch64DeclareSimdFunction( 10656 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10657 ArrayRef<ParamAttrTy> ParamAttrs, 10658 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10659 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10660 10661 // Get basic data for building the vector signature. 10662 const auto Data = getNDSWDS(FD, ParamAttrs); 10663 const unsigned NDS = std::get<0>(Data); 10664 const unsigned WDS = std::get<1>(Data); 10665 const bool OutputBecomesInput = std::get<2>(Data); 10666 10667 // Check the values provided via `simdlen` by the user. 10668 // 1. A `simdlen(1)` doesn't produce vector signatures, 10669 if (UserVLEN == 1) { 10670 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10671 DiagnosticsEngine::Warning, 10672 "The clause simdlen(1) has no effect when targeting aarch64."); 10673 CGM.getDiags().Report(SLoc, DiagID); 10674 return; 10675 } 10676 10677 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10678 // Advanced SIMD output. 10679 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10680 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10681 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10682 "power of 2 when targeting Advanced SIMD."); 10683 CGM.getDiags().Report(SLoc, DiagID); 10684 return; 10685 } 10686 10687 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10688 // limits. 10689 if (ISA == 's' && UserVLEN != 0) { 10690 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10691 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10692 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10693 "lanes in the architectural constraints " 10694 "for SVE (min is 128-bit, max is " 10695 "2048-bit, by steps of 128-bit)"); 10696 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10697 return; 10698 } 10699 } 10700 10701 // Sort out parameter sequence. 10702 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10703 StringRef Prefix = "_ZGV"; 10704 // Generate simdlen from user input (if any). 10705 if (UserVLEN) { 10706 if (ISA == 's') { 10707 // SVE generates only a masked function. 10708 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10709 OutputBecomesInput, Fn); 10710 } else { 10711 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10712 // Advanced SIMD generates one or two functions, depending on 10713 // the `[not]inbranch` clause. 10714 switch (State) { 10715 case OMPDeclareSimdDeclAttr::BS_Undefined: 10716 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10717 OutputBecomesInput, Fn); 10718 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10719 OutputBecomesInput, Fn); 10720 break; 10721 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10722 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10723 OutputBecomesInput, Fn); 10724 break; 10725 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10726 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10727 OutputBecomesInput, Fn); 10728 break; 10729 } 10730 } 10731 } else { 10732 // If no user simdlen is provided, follow the AAVFABI rules for 10733 // generating the vector length. 10734 if (ISA == 's') { 10735 // SVE, section 3.4.1, item 1. 10736 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10737 OutputBecomesInput, Fn); 10738 } else { 10739 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10740 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10741 // two vector names depending on the use of the clause 10742 // `[not]inbranch`. 10743 switch (State) { 10744 case OMPDeclareSimdDeclAttr::BS_Undefined: 10745 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10746 OutputBecomesInput, Fn); 10747 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10748 OutputBecomesInput, Fn); 10749 break; 10750 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10751 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10752 OutputBecomesInput, Fn); 10753 break; 10754 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10755 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10756 OutputBecomesInput, Fn); 10757 break; 10758 } 10759 } 10760 } 10761 } 10762 10763 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10764 llvm::Function *Fn) { 10765 ASTContext &C = CGM.getContext(); 10766 FD = FD->getMostRecentDecl(); 10767 // Map params to their positions in function decl. 10768 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10769 if (isa<CXXMethodDecl>(FD)) 10770 ParamPositions.try_emplace(FD, 0); 10771 unsigned ParamPos = ParamPositions.size(); 10772 for (const ParmVarDecl *P : FD->parameters()) { 10773 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10774 ++ParamPos; 10775 } 10776 while (FD) { 10777 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10778 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10779 // Mark uniform parameters. 10780 for (const Expr *E : Attr->uniforms()) { 10781 E = E->IgnoreParenImpCasts(); 10782 unsigned Pos; 10783 if (isa<CXXThisExpr>(E)) { 10784 Pos = ParamPositions[FD]; 10785 } else { 10786 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10787 ->getCanonicalDecl(); 10788 Pos = ParamPositions[PVD]; 10789 } 10790 ParamAttrs[Pos].Kind = Uniform; 10791 } 10792 // Get alignment info. 10793 auto NI = Attr->alignments_begin(); 10794 for (const Expr *E : Attr->aligneds()) { 10795 E = E->IgnoreParenImpCasts(); 10796 unsigned Pos; 10797 QualType ParmTy; 10798 if (isa<CXXThisExpr>(E)) { 10799 Pos = ParamPositions[FD]; 10800 ParmTy = E->getType(); 10801 } else { 10802 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10803 ->getCanonicalDecl(); 10804 Pos = ParamPositions[PVD]; 10805 ParmTy = PVD->getType(); 10806 } 10807 ParamAttrs[Pos].Alignment = 10808 (*NI) 10809 ? (*NI)->EvaluateKnownConstInt(C) 10810 : llvm::APSInt::getUnsigned( 10811 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10812 .getQuantity()); 10813 ++NI; 10814 } 10815 // Mark linear parameters. 10816 auto SI = Attr->steps_begin(); 10817 auto MI = Attr->modifiers_begin(); 10818 for (const Expr *E : Attr->linears()) { 10819 E = E->IgnoreParenImpCasts(); 10820 unsigned Pos; 10821 // Rescaling factor needed to compute the linear parameter 10822 // value in the mangled name. 10823 unsigned PtrRescalingFactor = 1; 10824 if (isa<CXXThisExpr>(E)) { 10825 Pos = ParamPositions[FD]; 10826 } else { 10827 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10828 ->getCanonicalDecl(); 10829 Pos = ParamPositions[PVD]; 10830 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 10831 PtrRescalingFactor = CGM.getContext() 10832 .getTypeSizeInChars(P->getPointeeType()) 10833 .getQuantity(); 10834 } 10835 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10836 ParamAttr.Kind = Linear; 10837 // Assuming a stride of 1, for `linear` without modifiers. 10838 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 10839 if (*SI) { 10840 Expr::EvalResult Result; 10841 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10842 if (const auto *DRE = 10843 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10844 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10845 ParamAttr.Kind = LinearWithVarStride; 10846 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10847 ParamPositions[StridePVD->getCanonicalDecl()]); 10848 } 10849 } 10850 } else { 10851 ParamAttr.StrideOrArg = Result.Val.getInt(); 10852 } 10853 } 10854 // If we are using a linear clause on a pointer, we need to 10855 // rescale the value of linear_step with the byte size of the 10856 // pointee type. 10857 if (Linear == ParamAttr.Kind) 10858 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 10859 ++SI; 10860 ++MI; 10861 } 10862 llvm::APSInt VLENVal; 10863 SourceLocation ExprLoc; 10864 const Expr *VLENExpr = Attr->getSimdlen(); 10865 if (VLENExpr) { 10866 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10867 ExprLoc = VLENExpr->getExprLoc(); 10868 } 10869 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10870 if (CGM.getTriple().isX86()) { 10871 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10872 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10873 unsigned VLEN = VLENVal.getExtValue(); 10874 StringRef MangledName = Fn->getName(); 10875 if (CGM.getTarget().hasFeature("sve")) 10876 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10877 MangledName, 's', 128, Fn, ExprLoc); 10878 if (CGM.getTarget().hasFeature("neon")) 10879 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10880 MangledName, 'n', 128, Fn, ExprLoc); 10881 } 10882 } 10883 FD = FD->getPreviousDecl(); 10884 } 10885 } 10886 10887 namespace { 10888 /// Cleanup action for doacross support. 10889 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10890 public: 10891 static const int DoacrossFinArgs = 2; 10892 10893 private: 10894 llvm::FunctionCallee RTLFn; 10895 llvm::Value *Args[DoacrossFinArgs]; 10896 10897 public: 10898 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10899 ArrayRef<llvm::Value *> CallArgs) 10900 : RTLFn(RTLFn) { 10901 assert(CallArgs.size() == DoacrossFinArgs); 10902 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10903 } 10904 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10905 if (!CGF.HaveInsertPoint()) 10906 return; 10907 CGF.EmitRuntimeCall(RTLFn, Args); 10908 } 10909 }; 10910 } // namespace 10911 10912 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10913 const OMPLoopDirective &D, 10914 ArrayRef<Expr *> NumIterations) { 10915 if (!CGF.HaveInsertPoint()) 10916 return; 10917 10918 ASTContext &C = CGM.getContext(); 10919 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10920 RecordDecl *RD; 10921 if (KmpDimTy.isNull()) { 10922 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10923 // kmp_int64 lo; // lower 10924 // kmp_int64 up; // upper 10925 // kmp_int64 st; // stride 10926 // }; 10927 RD = C.buildImplicitRecord("kmp_dim"); 10928 RD->startDefinition(); 10929 addFieldToRecordDecl(C, RD, Int64Ty); 10930 addFieldToRecordDecl(C, RD, Int64Ty); 10931 addFieldToRecordDecl(C, RD, Int64Ty); 10932 RD->completeDefinition(); 10933 KmpDimTy = C.getRecordType(RD); 10934 } else { 10935 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10936 } 10937 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10938 QualType ArrayTy = 10939 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10940 10941 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10942 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10943 enum { LowerFD = 0, UpperFD, StrideFD }; 10944 // Fill dims with data. 10945 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10946 LValue DimsLVal = CGF.MakeAddrLValue( 10947 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10948 // dims.upper = num_iterations; 10949 LValue UpperLVal = CGF.EmitLValueForField( 10950 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10951 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 10952 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 10953 Int64Ty, NumIterations[I]->getExprLoc()); 10954 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10955 // dims.stride = 1; 10956 LValue StrideLVal = CGF.EmitLValueForField( 10957 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10958 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10959 StrideLVal); 10960 } 10961 10962 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10963 // kmp_int32 num_dims, struct kmp_dim * dims); 10964 llvm::Value *Args[] = { 10965 emitUpdateLocation(CGF, D.getBeginLoc()), 10966 getThreadID(CGF, D.getBeginLoc()), 10967 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10968 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10969 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10970 CGM.VoidPtrTy)}; 10971 10972 llvm::FunctionCallee RTLFn = 10973 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10974 CGM.getModule(), OMPRTL___kmpc_doacross_init); 10975 CGF.EmitRuntimeCall(RTLFn, Args); 10976 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10977 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10978 llvm::FunctionCallee FiniRTLFn = 10979 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10980 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 10981 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10982 llvm::makeArrayRef(FiniArgs)); 10983 } 10984 10985 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10986 const OMPDependClause *C) { 10987 QualType Int64Ty = 10988 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10989 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10990 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10991 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 10992 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10993 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10994 const Expr *CounterVal = C->getLoopData(I); 10995 assert(CounterVal); 10996 llvm::Value *CntVal = CGF.EmitScalarConversion( 10997 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10998 CounterVal->getExprLoc()); 10999 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11000 /*Volatile=*/false, Int64Ty); 11001 } 11002 llvm::Value *Args[] = { 11003 emitUpdateLocation(CGF, C->getBeginLoc()), 11004 getThreadID(CGF, C->getBeginLoc()), 11005 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11006 llvm::FunctionCallee RTLFn; 11007 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11008 RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11009 CGM.getModule(), OMPRTL___kmpc_doacross_post); 11010 } else { 11011 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11012 RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11013 CGM.getModule(), OMPRTL___kmpc_doacross_wait); 11014 } 11015 CGF.EmitRuntimeCall(RTLFn, Args); 11016 } 11017 11018 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11019 llvm::FunctionCallee Callee, 11020 ArrayRef<llvm::Value *> Args) const { 11021 assert(Loc.isValid() && "Outlined function call location must be valid."); 11022 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11023 11024 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11025 if (Fn->doesNotThrow()) { 11026 CGF.EmitNounwindRuntimeCall(Fn, Args); 11027 return; 11028 } 11029 } 11030 CGF.EmitRuntimeCall(Callee, Args); 11031 } 11032 11033 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11034 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11035 ArrayRef<llvm::Value *> Args) const { 11036 emitCall(CGF, Loc, OutlinedFn, Args); 11037 } 11038 11039 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11040 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11041 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11042 HasEmittedDeclareTargetRegion = true; 11043 } 11044 11045 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11046 const VarDecl *NativeParam, 11047 const VarDecl *TargetParam) const { 11048 return CGF.GetAddrOfLocalVar(NativeParam); 11049 } 11050 11051 namespace { 11052 /// Cleanup action for allocate support. 11053 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11054 public: 11055 static const int CleanupArgs = 3; 11056 11057 private: 11058 llvm::FunctionCallee RTLFn; 11059 llvm::Value *Args[CleanupArgs]; 11060 11061 public: 11062 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11063 ArrayRef<llvm::Value *> CallArgs) 11064 : RTLFn(RTLFn) { 11065 assert(CallArgs.size() == CleanupArgs && 11066 "Size of arguments does not match."); 11067 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11068 } 11069 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11070 if (!CGF.HaveInsertPoint()) 11071 return; 11072 CGF.EmitRuntimeCall(RTLFn, Args); 11073 } 11074 }; 11075 } // namespace 11076 11077 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11078 const VarDecl *VD) { 11079 if (!VD) 11080 return Address::invalid(); 11081 const VarDecl *CVD = VD->getCanonicalDecl(); 11082 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11083 return Address::invalid(); 11084 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11085 // Use the default allocation. 11086 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11087 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11088 !AA->getAllocator()) 11089 return Address::invalid(); 11090 llvm::Value *Size; 11091 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11092 if (CVD->getType()->isVariablyModifiedType()) { 11093 Size = CGF.getTypeSize(CVD->getType()); 11094 // Align the size: ((size + align - 1) / align) * align 11095 Size = CGF.Builder.CreateNUWAdd( 11096 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11097 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11098 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11099 } else { 11100 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11101 Size = CGM.getSize(Sz.alignTo(Align)); 11102 } 11103 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11104 assert(AA->getAllocator() && 11105 "Expected allocator expression for non-default allocator."); 11106 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11107 // According to the standard, the original allocator type is a enum (integer). 11108 // Convert to pointer type, if required. 11109 if (Allocator->getType()->isIntegerTy()) 11110 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11111 else if (Allocator->getType()->isPointerTy()) 11112 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11113 CGM.VoidPtrTy); 11114 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11115 11116 llvm::Value *Addr = 11117 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11118 CGM.getModule(), OMPRTL___kmpc_alloc), 11119 Args, getName({CVD->getName(), ".void.addr"})); 11120 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11121 Allocator}; 11122 llvm::FunctionCallee FiniRTLFn = 11123 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(CGM.getModule(), 11124 OMPRTL___kmpc_free); 11125 11126 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11127 llvm::makeArrayRef(FiniArgs)); 11128 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11129 Addr, 11130 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11131 getName({CVD->getName(), ".addr"})); 11132 return Address(Addr, Align); 11133 } 11134 11135 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11136 CodeGenModule &CGM, const OMPLoopDirective &S) 11137 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11138 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11139 if (!NeedToPush) 11140 return; 11141 NontemporalDeclsSet &DS = 11142 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11143 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11144 for (const Stmt *Ref : C->private_refs()) { 11145 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11146 const ValueDecl *VD; 11147 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11148 VD = DRE->getDecl(); 11149 } else { 11150 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11151 assert((ME->isImplicitCXXThis() || 11152 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11153 "Expected member of current class."); 11154 VD = ME->getMemberDecl(); 11155 } 11156 DS.insert(VD); 11157 } 11158 } 11159 } 11160 11161 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11162 if (!NeedToPush) 11163 return; 11164 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11165 } 11166 11167 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11168 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11169 11170 return llvm::any_of( 11171 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11172 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11173 } 11174 11175 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11176 const OMPExecutableDirective &S, 11177 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11178 const { 11179 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11180 // Vars in target/task regions must be excluded completely. 11181 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11182 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11183 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11184 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11185 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11186 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11187 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11188 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11189 } 11190 } 11191 // Exclude vars in private clauses. 11192 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11193 for (const Expr *Ref : C->varlists()) { 11194 if (!Ref->getType()->isScalarType()) 11195 continue; 11196 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11197 if (!DRE) 11198 continue; 11199 NeedToCheckForLPCs.insert(DRE->getDecl()); 11200 } 11201 } 11202 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11203 for (const Expr *Ref : C->varlists()) { 11204 if (!Ref->getType()->isScalarType()) 11205 continue; 11206 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11207 if (!DRE) 11208 continue; 11209 NeedToCheckForLPCs.insert(DRE->getDecl()); 11210 } 11211 } 11212 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11213 for (const Expr *Ref : C->varlists()) { 11214 if (!Ref->getType()->isScalarType()) 11215 continue; 11216 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11217 if (!DRE) 11218 continue; 11219 NeedToCheckForLPCs.insert(DRE->getDecl()); 11220 } 11221 } 11222 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11223 for (const Expr *Ref : C->varlists()) { 11224 if (!Ref->getType()->isScalarType()) 11225 continue; 11226 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11227 if (!DRE) 11228 continue; 11229 NeedToCheckForLPCs.insert(DRE->getDecl()); 11230 } 11231 } 11232 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11233 for (const Expr *Ref : C->varlists()) { 11234 if (!Ref->getType()->isScalarType()) 11235 continue; 11236 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11237 if (!DRE) 11238 continue; 11239 NeedToCheckForLPCs.insert(DRE->getDecl()); 11240 } 11241 } 11242 for (const Decl *VD : NeedToCheckForLPCs) { 11243 for (const LastprivateConditionalData &Data : 11244 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11245 if (Data.DeclToUniqueName.count(VD) > 0) { 11246 if (!Data.Disabled) 11247 NeedToAddForLPCsAsDisabled.insert(VD); 11248 break; 11249 } 11250 } 11251 } 11252 } 11253 11254 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11255 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11256 : CGM(CGF.CGM), 11257 Action((CGM.getLangOpts().OpenMP >= 50 && 11258 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11259 [](const OMPLastprivateClause *C) { 11260 return C->getKind() == 11261 OMPC_LASTPRIVATE_conditional; 11262 })) 11263 ? ActionToDo::PushAsLastprivateConditional 11264 : ActionToDo::DoNotPush) { 11265 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11266 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11267 return; 11268 assert(Action == ActionToDo::PushAsLastprivateConditional && 11269 "Expected a push action."); 11270 LastprivateConditionalData &Data = 11271 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11272 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11273 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11274 continue; 11275 11276 for (const Expr *Ref : C->varlists()) { 11277 Data.DeclToUniqueName.insert(std::make_pair( 11278 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11279 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11280 } 11281 } 11282 Data.IVLVal = IVLVal; 11283 Data.Fn = CGF.CurFn; 11284 } 11285 11286 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11287 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11288 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11289 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11290 if (CGM.getLangOpts().OpenMP < 50) 11291 return; 11292 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11293 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11294 if (!NeedToAddForLPCsAsDisabled.empty()) { 11295 Action = ActionToDo::DisableLastprivateConditional; 11296 LastprivateConditionalData &Data = 11297 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11298 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11299 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11300 Data.Fn = CGF.CurFn; 11301 Data.Disabled = true; 11302 } 11303 } 11304 11305 CGOpenMPRuntime::LastprivateConditionalRAII 11306 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11307 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11308 return LastprivateConditionalRAII(CGF, S); 11309 } 11310 11311 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11312 if (CGM.getLangOpts().OpenMP < 50) 11313 return; 11314 if (Action == ActionToDo::DisableLastprivateConditional) { 11315 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11316 "Expected list of disabled private vars."); 11317 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11318 } 11319 if (Action == ActionToDo::PushAsLastprivateConditional) { 11320 assert( 11321 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11322 "Expected list of lastprivate conditional vars."); 11323 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11324 } 11325 } 11326 11327 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11328 const VarDecl *VD) { 11329 ASTContext &C = CGM.getContext(); 11330 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11331 if (I == LastprivateConditionalToTypes.end()) 11332 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11333 QualType NewType; 11334 const FieldDecl *VDField; 11335 const FieldDecl *FiredField; 11336 LValue BaseLVal; 11337 auto VI = I->getSecond().find(VD); 11338 if (VI == I->getSecond().end()) { 11339 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11340 RD->startDefinition(); 11341 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11342 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11343 RD->completeDefinition(); 11344 NewType = C.getRecordType(RD); 11345 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11346 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11347 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11348 } else { 11349 NewType = std::get<0>(VI->getSecond()); 11350 VDField = std::get<1>(VI->getSecond()); 11351 FiredField = std::get<2>(VI->getSecond()); 11352 BaseLVal = std::get<3>(VI->getSecond()); 11353 } 11354 LValue FiredLVal = 11355 CGF.EmitLValueForField(BaseLVal, FiredField); 11356 CGF.EmitStoreOfScalar( 11357 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11358 FiredLVal); 11359 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11360 } 11361 11362 namespace { 11363 /// Checks if the lastprivate conditional variable is referenced in LHS. 11364 class LastprivateConditionalRefChecker final 11365 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11366 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11367 const Expr *FoundE = nullptr; 11368 const Decl *FoundD = nullptr; 11369 StringRef UniqueDeclName; 11370 LValue IVLVal; 11371 llvm::Function *FoundFn = nullptr; 11372 SourceLocation Loc; 11373 11374 public: 11375 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11376 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11377 llvm::reverse(LPM)) { 11378 auto It = D.DeclToUniqueName.find(E->getDecl()); 11379 if (It == D.DeclToUniqueName.end()) 11380 continue; 11381 if (D.Disabled) 11382 return false; 11383 FoundE = E; 11384 FoundD = E->getDecl()->getCanonicalDecl(); 11385 UniqueDeclName = It->second; 11386 IVLVal = D.IVLVal; 11387 FoundFn = D.Fn; 11388 break; 11389 } 11390 return FoundE == E; 11391 } 11392 bool VisitMemberExpr(const MemberExpr *E) { 11393 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11394 return false; 11395 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11396 llvm::reverse(LPM)) { 11397 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11398 if (It == D.DeclToUniqueName.end()) 11399 continue; 11400 if (D.Disabled) 11401 return false; 11402 FoundE = E; 11403 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11404 UniqueDeclName = It->second; 11405 IVLVal = D.IVLVal; 11406 FoundFn = D.Fn; 11407 break; 11408 } 11409 return FoundE == E; 11410 } 11411 bool VisitStmt(const Stmt *S) { 11412 for (const Stmt *Child : S->children()) { 11413 if (!Child) 11414 continue; 11415 if (const auto *E = dyn_cast<Expr>(Child)) 11416 if (!E->isGLValue()) 11417 continue; 11418 if (Visit(Child)) 11419 return true; 11420 } 11421 return false; 11422 } 11423 explicit LastprivateConditionalRefChecker( 11424 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11425 : LPM(LPM) {} 11426 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11427 getFoundData() const { 11428 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11429 } 11430 }; 11431 } // namespace 11432 11433 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11434 LValue IVLVal, 11435 StringRef UniqueDeclName, 11436 LValue LVal, 11437 SourceLocation Loc) { 11438 // Last updated loop counter for the lastprivate conditional var. 11439 // int<xx> last_iv = 0; 11440 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11441 llvm::Constant *LastIV = 11442 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11443 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11444 IVLVal.getAlignment().getAsAlign()); 11445 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11446 11447 // Last value of the lastprivate conditional. 11448 // decltype(priv_a) last_a; 11449 llvm::Constant *Last = getOrCreateInternalVariable( 11450 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11451 cast<llvm::GlobalVariable>(Last)->setAlignment( 11452 LVal.getAlignment().getAsAlign()); 11453 LValue LastLVal = 11454 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11455 11456 // Global loop counter. Required to handle inner parallel-for regions. 11457 // iv 11458 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11459 11460 // #pragma omp critical(a) 11461 // if (last_iv <= iv) { 11462 // last_iv = iv; 11463 // last_a = priv_a; 11464 // } 11465 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11466 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11467 Action.Enter(CGF); 11468 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11469 // (last_iv <= iv) ? Check if the variable is updated and store new 11470 // value in global var. 11471 llvm::Value *CmpRes; 11472 if (IVLVal.getType()->isSignedIntegerType()) { 11473 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11474 } else { 11475 assert(IVLVal.getType()->isUnsignedIntegerType() && 11476 "Loop iteration variable must be integer."); 11477 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11478 } 11479 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11480 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11481 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11482 // { 11483 CGF.EmitBlock(ThenBB); 11484 11485 // last_iv = iv; 11486 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11487 11488 // last_a = priv_a; 11489 switch (CGF.getEvaluationKind(LVal.getType())) { 11490 case TEK_Scalar: { 11491 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11492 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11493 break; 11494 } 11495 case TEK_Complex: { 11496 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11497 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11498 break; 11499 } 11500 case TEK_Aggregate: 11501 llvm_unreachable( 11502 "Aggregates are not supported in lastprivate conditional."); 11503 } 11504 // } 11505 CGF.EmitBranch(ExitBB); 11506 // There is no need to emit line number for unconditional branch. 11507 (void)ApplyDebugLocation::CreateEmpty(CGF); 11508 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11509 }; 11510 11511 if (CGM.getLangOpts().OpenMPSimd) { 11512 // Do not emit as a critical region as no parallel region could be emitted. 11513 RegionCodeGenTy ThenRCG(CodeGen); 11514 ThenRCG(CGF); 11515 } else { 11516 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11517 } 11518 } 11519 11520 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11521 const Expr *LHS) { 11522 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11523 return; 11524 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11525 if (!Checker.Visit(LHS)) 11526 return; 11527 const Expr *FoundE; 11528 const Decl *FoundD; 11529 StringRef UniqueDeclName; 11530 LValue IVLVal; 11531 llvm::Function *FoundFn; 11532 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11533 Checker.getFoundData(); 11534 if (FoundFn != CGF.CurFn) { 11535 // Special codegen for inner parallel regions. 11536 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11537 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11538 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11539 "Lastprivate conditional is not found in outer region."); 11540 QualType StructTy = std::get<0>(It->getSecond()); 11541 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11542 LValue PrivLVal = CGF.EmitLValue(FoundE); 11543 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11544 PrivLVal.getAddress(CGF), 11545 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11546 LValue BaseLVal = 11547 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11548 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11549 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11550 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11551 FiredLVal, llvm::AtomicOrdering::Unordered, 11552 /*IsVolatile=*/true, /*isInit=*/false); 11553 return; 11554 } 11555 11556 // Private address of the lastprivate conditional in the current context. 11557 // priv_a 11558 LValue LVal = CGF.EmitLValue(FoundE); 11559 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11560 FoundE->getExprLoc()); 11561 } 11562 11563 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11564 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11565 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11566 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11567 return; 11568 auto Range = llvm::reverse(LastprivateConditionalStack); 11569 auto It = llvm::find_if( 11570 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11571 if (It == Range.end() || It->Fn != CGF.CurFn) 11572 return; 11573 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11574 assert(LPCI != LastprivateConditionalToTypes.end() && 11575 "Lastprivates must be registered already."); 11576 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11577 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11578 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11579 for (const auto &Pair : It->DeclToUniqueName) { 11580 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11581 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11582 continue; 11583 auto I = LPCI->getSecond().find(Pair.first); 11584 assert(I != LPCI->getSecond().end() && 11585 "Lastprivate must be rehistered already."); 11586 // bool Cmp = priv_a.Fired != 0; 11587 LValue BaseLVal = std::get<3>(I->getSecond()); 11588 LValue FiredLVal = 11589 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11590 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11591 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11592 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11593 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11594 // if (Cmp) { 11595 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11596 CGF.EmitBlock(ThenBB); 11597 Address Addr = CGF.GetAddrOfLocalVar(VD); 11598 LValue LVal; 11599 if (VD->getType()->isReferenceType()) 11600 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11601 AlignmentSource::Decl); 11602 else 11603 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11604 AlignmentSource::Decl); 11605 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11606 D.getBeginLoc()); 11607 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11608 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11609 // } 11610 } 11611 } 11612 11613 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11614 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11615 SourceLocation Loc) { 11616 if (CGF.getLangOpts().OpenMP < 50) 11617 return; 11618 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11619 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11620 "Unknown lastprivate conditional variable."); 11621 StringRef UniqueName = It->second; 11622 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11623 // The variable was not updated in the region - exit. 11624 if (!GV) 11625 return; 11626 LValue LPLVal = CGF.MakeAddrLValue( 11627 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11628 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11629 CGF.EmitStoreOfScalar(Res, PrivLVal); 11630 } 11631 11632 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11633 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11634 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11635 llvm_unreachable("Not supported in SIMD-only mode"); 11636 } 11637 11638 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11639 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11640 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11641 llvm_unreachable("Not supported in SIMD-only mode"); 11642 } 11643 11644 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11645 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11646 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11647 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11648 bool Tied, unsigned &NumberOfParts) { 11649 llvm_unreachable("Not supported in SIMD-only mode"); 11650 } 11651 11652 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11653 SourceLocation Loc, 11654 llvm::Function *OutlinedFn, 11655 ArrayRef<llvm::Value *> CapturedVars, 11656 const Expr *IfCond) { 11657 llvm_unreachable("Not supported in SIMD-only mode"); 11658 } 11659 11660 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11661 CodeGenFunction &CGF, StringRef CriticalName, 11662 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11663 const Expr *Hint) { 11664 llvm_unreachable("Not supported in SIMD-only mode"); 11665 } 11666 11667 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11668 const RegionCodeGenTy &MasterOpGen, 11669 SourceLocation Loc) { 11670 llvm_unreachable("Not supported in SIMD-only mode"); 11671 } 11672 11673 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11674 SourceLocation Loc) { 11675 llvm_unreachable("Not supported in SIMD-only mode"); 11676 } 11677 11678 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11679 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11680 SourceLocation Loc) { 11681 llvm_unreachable("Not supported in SIMD-only mode"); 11682 } 11683 11684 void CGOpenMPSIMDRuntime::emitSingleRegion( 11685 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11686 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11687 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11688 ArrayRef<const Expr *> AssignmentOps) { 11689 llvm_unreachable("Not supported in SIMD-only mode"); 11690 } 11691 11692 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11693 const RegionCodeGenTy &OrderedOpGen, 11694 SourceLocation Loc, 11695 bool IsThreads) { 11696 llvm_unreachable("Not supported in SIMD-only mode"); 11697 } 11698 11699 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11700 SourceLocation Loc, 11701 OpenMPDirectiveKind Kind, 11702 bool EmitChecks, 11703 bool ForceSimpleCall) { 11704 llvm_unreachable("Not supported in SIMD-only mode"); 11705 } 11706 11707 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11708 CodeGenFunction &CGF, SourceLocation Loc, 11709 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11710 bool Ordered, const DispatchRTInput &DispatchValues) { 11711 llvm_unreachable("Not supported in SIMD-only mode"); 11712 } 11713 11714 void CGOpenMPSIMDRuntime::emitForStaticInit( 11715 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11716 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11717 llvm_unreachable("Not supported in SIMD-only mode"); 11718 } 11719 11720 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11721 CodeGenFunction &CGF, SourceLocation Loc, 11722 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11723 llvm_unreachable("Not supported in SIMD-only mode"); 11724 } 11725 11726 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11727 SourceLocation Loc, 11728 unsigned IVSize, 11729 bool IVSigned) { 11730 llvm_unreachable("Not supported in SIMD-only mode"); 11731 } 11732 11733 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11734 SourceLocation Loc, 11735 OpenMPDirectiveKind DKind) { 11736 llvm_unreachable("Not supported in SIMD-only mode"); 11737 } 11738 11739 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11740 SourceLocation Loc, 11741 unsigned IVSize, bool IVSigned, 11742 Address IL, Address LB, 11743 Address UB, Address ST) { 11744 llvm_unreachable("Not supported in SIMD-only mode"); 11745 } 11746 11747 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11748 llvm::Value *NumThreads, 11749 SourceLocation Loc) { 11750 llvm_unreachable("Not supported in SIMD-only mode"); 11751 } 11752 11753 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11754 ProcBindKind ProcBind, 11755 SourceLocation Loc) { 11756 llvm_unreachable("Not supported in SIMD-only mode"); 11757 } 11758 11759 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11760 const VarDecl *VD, 11761 Address VDAddr, 11762 SourceLocation Loc) { 11763 llvm_unreachable("Not supported in SIMD-only mode"); 11764 } 11765 11766 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11767 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11768 CodeGenFunction *CGF) { 11769 llvm_unreachable("Not supported in SIMD-only mode"); 11770 } 11771 11772 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11773 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11774 llvm_unreachable("Not supported in SIMD-only mode"); 11775 } 11776 11777 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11778 ArrayRef<const Expr *> Vars, 11779 SourceLocation Loc, 11780 llvm::AtomicOrdering AO) { 11781 llvm_unreachable("Not supported in SIMD-only mode"); 11782 } 11783 11784 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11785 const OMPExecutableDirective &D, 11786 llvm::Function *TaskFunction, 11787 QualType SharedsTy, Address Shareds, 11788 const Expr *IfCond, 11789 const OMPTaskDataTy &Data) { 11790 llvm_unreachable("Not supported in SIMD-only mode"); 11791 } 11792 11793 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11794 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11795 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11796 const Expr *IfCond, const OMPTaskDataTy &Data) { 11797 llvm_unreachable("Not supported in SIMD-only mode"); 11798 } 11799 11800 void CGOpenMPSIMDRuntime::emitReduction( 11801 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11802 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11803 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11804 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11805 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11806 ReductionOps, Options); 11807 } 11808 11809 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11810 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11811 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11812 llvm_unreachable("Not supported in SIMD-only mode"); 11813 } 11814 11815 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 11816 SourceLocation Loc, 11817 bool IsWorksharingReduction) { 11818 llvm_unreachable("Not supported in SIMD-only mode"); 11819 } 11820 11821 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11822 SourceLocation Loc, 11823 ReductionCodeGen &RCG, 11824 unsigned N) { 11825 llvm_unreachable("Not supported in SIMD-only mode"); 11826 } 11827 11828 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11829 SourceLocation Loc, 11830 llvm::Value *ReductionsPtr, 11831 LValue SharedLVal) { 11832 llvm_unreachable("Not supported in SIMD-only mode"); 11833 } 11834 11835 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11836 SourceLocation Loc) { 11837 llvm_unreachable("Not supported in SIMD-only mode"); 11838 } 11839 11840 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11841 CodeGenFunction &CGF, SourceLocation Loc, 11842 OpenMPDirectiveKind CancelRegion) { 11843 llvm_unreachable("Not supported in SIMD-only mode"); 11844 } 11845 11846 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11847 SourceLocation Loc, const Expr *IfCond, 11848 OpenMPDirectiveKind CancelRegion) { 11849 llvm_unreachable("Not supported in SIMD-only mode"); 11850 } 11851 11852 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11853 const OMPExecutableDirective &D, StringRef ParentName, 11854 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11855 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11856 llvm_unreachable("Not supported in SIMD-only mode"); 11857 } 11858 11859 void CGOpenMPSIMDRuntime::emitTargetCall( 11860 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11861 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11862 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 11863 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11864 const OMPLoopDirective &D)> 11865 SizeEmitter) { 11866 llvm_unreachable("Not supported in SIMD-only mode"); 11867 } 11868 11869 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11870 llvm_unreachable("Not supported in SIMD-only mode"); 11871 } 11872 11873 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11874 llvm_unreachable("Not supported in SIMD-only mode"); 11875 } 11876 11877 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11878 return false; 11879 } 11880 11881 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11882 const OMPExecutableDirective &D, 11883 SourceLocation Loc, 11884 llvm::Function *OutlinedFn, 11885 ArrayRef<llvm::Value *> CapturedVars) { 11886 llvm_unreachable("Not supported in SIMD-only mode"); 11887 } 11888 11889 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11890 const Expr *NumTeams, 11891 const Expr *ThreadLimit, 11892 SourceLocation Loc) { 11893 llvm_unreachable("Not supported in SIMD-only mode"); 11894 } 11895 11896 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11897 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11898 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11899 llvm_unreachable("Not supported in SIMD-only mode"); 11900 } 11901 11902 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11903 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11904 const Expr *Device) { 11905 llvm_unreachable("Not supported in SIMD-only mode"); 11906 } 11907 11908 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11909 const OMPLoopDirective &D, 11910 ArrayRef<Expr *> NumIterations) { 11911 llvm_unreachable("Not supported in SIMD-only mode"); 11912 } 11913 11914 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11915 const OMPDependClause *C) { 11916 llvm_unreachable("Not supported in SIMD-only mode"); 11917 } 11918 11919 const VarDecl * 11920 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11921 const VarDecl *NativeParam) const { 11922 llvm_unreachable("Not supported in SIMD-only mode"); 11923 } 11924 11925 Address 11926 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11927 const VarDecl *NativeParam, 11928 const VarDecl *TargetParam) const { 11929 llvm_unreachable("Not supported in SIMD-only mode"); 11930 } 11931