1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 573 /// region. 574 class CleanupTy final : public EHScopeStack::Cleanup { 575 PrePostActionTy *Action; 576 577 public: 578 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 579 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 580 if (!CGF.HaveInsertPoint()) 581 return; 582 Action->Exit(CGF); 583 } 584 }; 585 586 } // anonymous namespace 587 588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 589 CodeGenFunction::RunCleanupsScope Scope(CGF); 590 if (PrePostAction) { 591 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 592 Callback(CodeGen, CGF, *PrePostAction); 593 } else { 594 PrePostActionTy Action; 595 Callback(CodeGen, CGF, Action); 596 } 597 } 598 599 /// Check if the combiner is a call to UDR combiner and if it is so return the 600 /// UDR decl used for reduction. 601 static const OMPDeclareReductionDecl * 602 getReductionInit(const Expr *ReductionOp) { 603 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 604 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 605 if (const auto *DRE = 606 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 607 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 608 return DRD; 609 return nullptr; 610 } 611 612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 613 const OMPDeclareReductionDecl *DRD, 614 const Expr *InitOp, 615 Address Private, Address Original, 616 QualType Ty) { 617 if (DRD->getInitializer()) { 618 std::pair<llvm::Function *, llvm::Function *> Reduction = 619 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 620 const auto *CE = cast<CallExpr>(InitOp); 621 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 622 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 623 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 624 const auto *LHSDRE = 625 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 626 const auto *RHSDRE = 627 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 628 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 629 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 630 [=]() { return Private; }); 631 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 632 [=]() { return Original; }); 633 (void)PrivateScope.Privatize(); 634 RValue Func = RValue::get(Reduction.second); 635 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 636 CGF.EmitIgnoredExpr(InitOp); 637 } else { 638 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 639 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 640 auto *GV = new llvm::GlobalVariable( 641 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 642 llvm::GlobalValue::PrivateLinkage, Init, Name); 643 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 644 RValue InitRVal; 645 switch (CGF.getEvaluationKind(Ty)) { 646 case TEK_Scalar: 647 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 648 break; 649 case TEK_Complex: 650 InitRVal = 651 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 652 break; 653 case TEK_Aggregate: 654 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 655 break; 656 } 657 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 658 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 659 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 660 /*IsInitializer=*/false); 661 } 662 } 663 664 /// Emit initialization of arrays of complex types. 665 /// \param DestAddr Address of the array. 666 /// \param Type Type of array. 667 /// \param Init Initial expression of array. 668 /// \param SrcAddr Address of the original array. 669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 670 QualType Type, bool EmitDeclareReductionInit, 671 const Expr *Init, 672 const OMPDeclareReductionDecl *DRD, 673 Address SrcAddr = Address::invalid()) { 674 // Perform element-by-element initialization. 675 QualType ElementTy; 676 677 // Drill down to the base element type on both arrays. 678 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 679 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 680 DestAddr = 681 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 682 if (DRD) 683 SrcAddr = 684 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 685 686 llvm::Value *SrcBegin = nullptr; 687 if (DRD) 688 SrcBegin = SrcAddr.getPointer(); 689 llvm::Value *DestBegin = DestAddr.getPointer(); 690 // Cast from pointer to array type to pointer to single element. 691 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 692 // The basic structure here is a while-do loop. 693 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 694 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 695 llvm::Value *IsEmpty = 696 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 697 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 698 699 // Enter the loop body, making that address the current address. 700 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 701 CGF.EmitBlock(BodyBB); 702 703 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 704 705 llvm::PHINode *SrcElementPHI = nullptr; 706 Address SrcElementCurrent = Address::invalid(); 707 if (DRD) { 708 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 709 "omp.arraycpy.srcElementPast"); 710 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 711 SrcElementCurrent = 712 Address(SrcElementPHI, 713 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 714 } 715 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 716 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 717 DestElementPHI->addIncoming(DestBegin, EntryBB); 718 Address DestElementCurrent = 719 Address(DestElementPHI, 720 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 721 722 // Emit copy. 723 { 724 CodeGenFunction::RunCleanupsScope InitScope(CGF); 725 if (EmitDeclareReductionInit) { 726 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 727 SrcElementCurrent, ElementTy); 728 } else 729 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 730 /*IsInitializer=*/false); 731 } 732 733 if (DRD) { 734 // Shift the address forward by one element. 735 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 736 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 737 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 738 } 739 740 // Shift the address forward by one element. 741 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 742 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 743 // Check whether we've reached the end. 744 llvm::Value *Done = 745 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 746 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 747 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 748 749 // Done. 750 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 751 } 752 753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 754 return CGF.EmitOMPSharedLValue(E); 755 } 756 757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 758 const Expr *E) { 759 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 760 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 761 return LValue(); 762 } 763 764 void ReductionCodeGen::emitAggregateInitialization( 765 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 766 const OMPDeclareReductionDecl *DRD) { 767 // Emit VarDecl with copy init for arrays. 768 // Get the address of the original variable captured in current 769 // captured region. 770 const auto *PrivateVD = 771 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 772 bool EmitDeclareReductionInit = 773 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 774 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 775 EmitDeclareReductionInit, 776 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 777 : PrivateVD->getInit(), 778 DRD, SharedLVal.getAddress(CGF)); 779 } 780 781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 782 ArrayRef<const Expr *> Origs, 783 ArrayRef<const Expr *> Privates, 784 ArrayRef<const Expr *> ReductionOps) { 785 ClausesData.reserve(Shareds.size()); 786 SharedAddresses.reserve(Shareds.size()); 787 Sizes.reserve(Shareds.size()); 788 BaseDecls.reserve(Shareds.size()); 789 const auto *IOrig = Origs.begin(); 790 const auto *IPriv = Privates.begin(); 791 const auto *IRed = ReductionOps.begin(); 792 for (const Expr *Ref : Shareds) { 793 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 794 std::advance(IOrig, 1); 795 std::advance(IPriv, 1); 796 std::advance(IRed, 1); 797 } 798 } 799 800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 801 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 802 "Number of generated lvalues must be exactly N."); 803 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 804 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 805 SharedAddresses.emplace_back(First, Second); 806 if (ClausesData[N].Shared == ClausesData[N].Ref) { 807 OrigAddresses.emplace_back(First, Second); 808 } else { 809 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 810 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 811 OrigAddresses.emplace_back(First, Second); 812 } 813 } 814 815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 816 const auto *PrivateVD = 817 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 818 QualType PrivateType = PrivateVD->getType(); 819 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 820 if (!PrivateType->isVariablyModifiedType()) { 821 Sizes.emplace_back( 822 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 823 nullptr); 824 return; 825 } 826 llvm::Value *Size; 827 llvm::Value *SizeInChars; 828 auto *ElemType = 829 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 830 ->getElementType(); 831 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 832 if (AsArraySection) { 833 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 834 OrigAddresses[N].first.getPointer(CGF)); 835 Size = CGF.Builder.CreateNUWAdd( 836 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 837 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 838 } else { 839 SizeInChars = 840 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 841 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 842 } 843 Sizes.emplace_back(SizeInChars, Size); 844 CodeGenFunction::OpaqueValueMapping OpaqueMap( 845 CGF, 846 cast<OpaqueValueExpr>( 847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 848 RValue::get(Size)); 849 CGF.EmitVariablyModifiedType(PrivateType); 850 } 851 852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 853 llvm::Value *Size) { 854 const auto *PrivateVD = 855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 856 QualType PrivateType = PrivateVD->getType(); 857 if (!PrivateType->isVariablyModifiedType()) { 858 assert(!Size && !Sizes[N].second && 859 "Size should be nullptr for non-variably modified reduction " 860 "items."); 861 return; 862 } 863 CodeGenFunction::OpaqueValueMapping OpaqueMap( 864 CGF, 865 cast<OpaqueValueExpr>( 866 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 867 RValue::get(Size)); 868 CGF.EmitVariablyModifiedType(PrivateType); 869 } 870 871 void ReductionCodeGen::emitInitialization( 872 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 873 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 874 assert(SharedAddresses.size() > N && "No variable was generated"); 875 const auto *PrivateVD = 876 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 877 const OMPDeclareReductionDecl *DRD = 878 getReductionInit(ClausesData[N].ReductionOp); 879 QualType PrivateType = PrivateVD->getType(); 880 PrivateAddr = CGF.Builder.CreateElementBitCast( 881 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 882 QualType SharedType = SharedAddresses[N].first.getType(); 883 SharedLVal = CGF.MakeAddrLValue( 884 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 885 CGF.ConvertTypeForMem(SharedType)), 886 SharedType, SharedAddresses[N].first.getBaseInfo(), 887 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 888 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 889 if (DRD && DRD->getInitializer()) 890 (void)DefaultInit(CGF); 891 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 892 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 893 (void)DefaultInit(CGF); 894 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 895 PrivateAddr, SharedLVal.getAddress(CGF), 896 SharedLVal.getType()); 897 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 898 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 899 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 900 PrivateVD->getType().getQualifiers(), 901 /*IsInitializer=*/false); 902 } 903 } 904 905 bool ReductionCodeGen::needCleanups(unsigned N) { 906 const auto *PrivateVD = 907 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 908 QualType PrivateType = PrivateVD->getType(); 909 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 910 return DTorKind != QualType::DK_none; 911 } 912 913 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 914 Address PrivateAddr) { 915 const auto *PrivateVD = 916 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 917 QualType PrivateType = PrivateVD->getType(); 918 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 919 if (needCleanups(N)) { 920 PrivateAddr = CGF.Builder.CreateElementBitCast( 921 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 922 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 923 } 924 } 925 926 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 927 LValue BaseLV) { 928 BaseTy = BaseTy.getNonReferenceType(); 929 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 930 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 931 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 932 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 933 } else { 934 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 935 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 936 } 937 BaseTy = BaseTy->getPointeeType(); 938 } 939 return CGF.MakeAddrLValue( 940 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 941 CGF.ConvertTypeForMem(ElTy)), 942 BaseLV.getType(), BaseLV.getBaseInfo(), 943 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 944 } 945 946 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 947 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 948 llvm::Value *Addr) { 949 Address Tmp = Address::invalid(); 950 Address TopTmp = Address::invalid(); 951 Address MostTopTmp = Address::invalid(); 952 BaseTy = BaseTy.getNonReferenceType(); 953 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 954 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 955 Tmp = CGF.CreateMemTemp(BaseTy); 956 if (TopTmp.isValid()) 957 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 958 else 959 MostTopTmp = Tmp; 960 TopTmp = Tmp; 961 BaseTy = BaseTy->getPointeeType(); 962 } 963 llvm::Type *Ty = BaseLVType; 964 if (Tmp.isValid()) 965 Ty = Tmp.getElementType(); 966 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 967 if (Tmp.isValid()) { 968 CGF.Builder.CreateStore(Addr, Tmp); 969 return MostTopTmp; 970 } 971 return Address(Addr, BaseLVAlignment); 972 } 973 974 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 975 const VarDecl *OrigVD = nullptr; 976 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 977 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 978 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 979 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 981 Base = TempASE->getBase()->IgnoreParenImpCasts(); 982 DE = cast<DeclRefExpr>(Base); 983 OrigVD = cast<VarDecl>(DE->getDecl()); 984 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 985 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 986 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 987 Base = TempASE->getBase()->IgnoreParenImpCasts(); 988 DE = cast<DeclRefExpr>(Base); 989 OrigVD = cast<VarDecl>(DE->getDecl()); 990 } 991 return OrigVD; 992 } 993 994 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 995 Address PrivateAddr) { 996 const DeclRefExpr *DE; 997 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 998 BaseDecls.emplace_back(OrigVD); 999 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1000 LValue BaseLValue = 1001 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1002 OriginalBaseLValue); 1003 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1004 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1005 llvm::Value *PrivatePointer = 1006 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1007 PrivateAddr.getPointer(), 1008 SharedAddresses[N].first.getAddress(CGF).getType()); 1009 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1010 return castToBase(CGF, OrigVD->getType(), 1011 SharedAddresses[N].first.getType(), 1012 OriginalBaseLValue.getAddress(CGF).getType(), 1013 OriginalBaseLValue.getAlignment(), Ptr); 1014 } 1015 BaseDecls.emplace_back( 1016 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1017 return PrivateAddr; 1018 } 1019 1020 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1021 const OMPDeclareReductionDecl *DRD = 1022 getReductionInit(ClausesData[N].ReductionOp); 1023 return DRD && DRD->getInitializer(); 1024 } 1025 1026 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1027 return CGF.EmitLoadOfPointerLValue( 1028 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1029 getThreadIDVariable()->getType()->castAs<PointerType>()); 1030 } 1031 1032 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1033 if (!CGF.HaveInsertPoint()) 1034 return; 1035 // 1.2.2 OpenMP Language Terminology 1036 // Structured block - An executable statement with a single entry at the 1037 // top and a single exit at the bottom. 1038 // The point of exit cannot be a branch out of the structured block. 1039 // longjmp() and throw() must not violate the entry/exit criteria. 1040 CGF.EHStack.pushTerminate(); 1041 CodeGen(CGF); 1042 CGF.EHStack.popTerminate(); 1043 } 1044 1045 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1046 CodeGenFunction &CGF) { 1047 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1048 getThreadIDVariable()->getType(), 1049 AlignmentSource::Decl); 1050 } 1051 1052 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1053 QualType FieldTy) { 1054 auto *Field = FieldDecl::Create( 1055 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1056 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1057 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1058 Field->setAccess(AS_public); 1059 DC->addDecl(Field); 1060 return Field; 1061 } 1062 1063 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1064 StringRef Separator) 1065 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1066 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1067 ASTContext &C = CGM.getContext(); 1068 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1069 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1070 RD->startDefinition(); 1071 // reserved_1 1072 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1073 // flags 1074 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1075 // reserved_2 1076 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1077 // reserved_3 1078 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1079 // psource 1080 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1081 RD->completeDefinition(); 1082 IdentQTy = C.getRecordType(RD); 1083 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1084 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1085 1086 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1087 OMPBuilder.initialize(); 1088 loadOffloadInfoMetadata(); 1089 } 1090 1091 void CGOpenMPRuntime::clear() { 1092 InternalVars.clear(); 1093 // Clean non-target variable declarations possibly used only in debug info. 1094 for (const auto &Data : EmittedNonTargetVariables) { 1095 if (!Data.getValue().pointsToAliveValue()) 1096 continue; 1097 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1098 if (!GV) 1099 continue; 1100 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1101 continue; 1102 GV->eraseFromParent(); 1103 } 1104 } 1105 1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1107 SmallString<128> Buffer; 1108 llvm::raw_svector_ostream OS(Buffer); 1109 StringRef Sep = FirstSeparator; 1110 for (StringRef Part : Parts) { 1111 OS << Sep << Part; 1112 Sep = Separator; 1113 } 1114 return std::string(OS.str()); 1115 } 1116 1117 static llvm::Function * 1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1119 const Expr *CombinerInitializer, const VarDecl *In, 1120 const VarDecl *Out, bool IsCombiner) { 1121 // void .omp_combiner.(Ty *in, Ty *out); 1122 ASTContext &C = CGM.getContext(); 1123 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1124 FunctionArgList Args; 1125 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1126 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1127 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1128 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1129 Args.push_back(&OmpOutParm); 1130 Args.push_back(&OmpInParm); 1131 const CGFunctionInfo &FnInfo = 1132 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1133 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1134 std::string Name = CGM.getOpenMPRuntime().getName( 1135 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1136 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1137 Name, &CGM.getModule()); 1138 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1139 if (CGM.getLangOpts().Optimize) { 1140 Fn->removeFnAttr(llvm::Attribute::NoInline); 1141 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1142 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1143 } 1144 CodeGenFunction CGF(CGM); 1145 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1146 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1148 Out->getLocation()); 1149 CodeGenFunction::OMPPrivateScope Scope(CGF); 1150 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1151 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1152 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1153 .getAddress(CGF); 1154 }); 1155 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1156 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1157 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1158 .getAddress(CGF); 1159 }); 1160 (void)Scope.Privatize(); 1161 if (!IsCombiner && Out->hasInit() && 1162 !CGF.isTrivialInitializer(Out->getInit())) { 1163 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1164 Out->getType().getQualifiers(), 1165 /*IsInitializer=*/true); 1166 } 1167 if (CombinerInitializer) 1168 CGF.EmitIgnoredExpr(CombinerInitializer); 1169 Scope.ForceCleanup(); 1170 CGF.FinishFunction(); 1171 return Fn; 1172 } 1173 1174 void CGOpenMPRuntime::emitUserDefinedReduction( 1175 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1176 if (UDRMap.count(D) > 0) 1177 return; 1178 llvm::Function *Combiner = emitCombinerOrInitializer( 1179 CGM, D->getType(), D->getCombiner(), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1182 /*IsCombiner=*/true); 1183 llvm::Function *Initializer = nullptr; 1184 if (const Expr *Init = D->getInitializer()) { 1185 Initializer = emitCombinerOrInitializer( 1186 CGM, D->getType(), 1187 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1188 : nullptr, 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1190 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1191 /*IsCombiner=*/false); 1192 } 1193 UDRMap.try_emplace(D, Combiner, Initializer); 1194 if (CGF) { 1195 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1196 Decls.second.push_back(D); 1197 } 1198 } 1199 1200 std::pair<llvm::Function *, llvm::Function *> 1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1202 auto I = UDRMap.find(D); 1203 if (I != UDRMap.end()) 1204 return I->second; 1205 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1206 return UDRMap.lookup(D); 1207 } 1208 1209 namespace { 1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1211 // Builder if one is present. 1212 struct PushAndPopStackRAII { 1213 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1214 bool HasCancel) 1215 : OMPBuilder(OMPBuilder) { 1216 if (!OMPBuilder) 1217 return; 1218 1219 // The following callback is the crucial part of clangs cleanup process. 1220 // 1221 // NOTE: 1222 // Once the OpenMPIRBuilder is used to create parallel regions (and 1223 // similar), the cancellation destination (Dest below) is determined via 1224 // IP. That means if we have variables to finalize we split the block at IP, 1225 // use the new block (=BB) as destination to build a JumpDest (via 1226 // getJumpDestInCurrentScope(BB)) which then is fed to 1227 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1228 // to push & pop an FinalizationInfo object. 1229 // The FiniCB will still be needed but at the point where the 1230 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1231 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1232 assert(IP.getBlock()->end() == IP.getPoint() && 1233 "Clang CG should cause non-terminated block!"); 1234 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1235 CGF.Builder.restoreIP(IP); 1236 CodeGenFunction::JumpDest Dest = 1237 CGF.getOMPCancelDestination(OMPD_parallel); 1238 CGF.EmitBranchThroughCleanup(Dest); 1239 }; 1240 1241 // TODO: Remove this once we emit parallel regions through the 1242 // OpenMPIRBuilder as it can do this setup internally. 1243 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1244 {FiniCB, OMPD_parallel, HasCancel}); 1245 OMPBuilder->pushFinalizationCB(std::move(FI)); 1246 } 1247 ~PushAndPopStackRAII() { 1248 if (OMPBuilder) 1249 OMPBuilder->popFinalizationCB(); 1250 } 1251 llvm::OpenMPIRBuilder *OMPBuilder; 1252 }; 1253 } // namespace 1254 1255 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1256 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1257 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1258 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1259 assert(ThreadIDVar->getType()->isPointerType() && 1260 "thread id variable must be of type kmp_int32 *"); 1261 CodeGenFunction CGF(CGM, true); 1262 bool HasCancel = false; 1263 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1264 HasCancel = OPD->hasCancel(); 1265 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1266 HasCancel = OPD->hasCancel(); 1267 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1268 HasCancel = OPSD->hasCancel(); 1269 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1270 HasCancel = OPFD->hasCancel(); 1271 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1272 HasCancel = OPFD->hasCancel(); 1273 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 else if (const auto *OPFD = 1276 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1277 HasCancel = OPFD->hasCancel(); 1278 else if (const auto *OPFD = 1279 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1280 HasCancel = OPFD->hasCancel(); 1281 1282 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1283 // parallel region to make cancellation barriers work properly. 1284 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1285 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1286 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1287 HasCancel, OutlinedHelperName); 1288 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1289 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1290 } 1291 1292 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1293 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1294 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1295 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1296 return emitParallelOrTeamsOutlinedFunction( 1297 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1298 } 1299 1300 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1301 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1303 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1304 return emitParallelOrTeamsOutlinedFunction( 1305 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1306 } 1307 1308 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1309 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1310 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1311 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1312 bool Tied, unsigned &NumberOfParts) { 1313 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1314 PrePostActionTy &) { 1315 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1316 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1317 llvm::Value *TaskArgs[] = { 1318 UpLoc, ThreadID, 1319 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1320 TaskTVar->getType()->castAs<PointerType>()) 1321 .getPointer(CGF)}; 1322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1323 CGM.getModule(), OMPRTL___kmpc_omp_task), 1324 TaskArgs); 1325 }; 1326 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1327 UntiedCodeGen); 1328 CodeGen.setAction(Action); 1329 assert(!ThreadIDVar->getType()->isPointerType() && 1330 "thread id variable must be of type kmp_int32 for tasks"); 1331 const OpenMPDirectiveKind Region = 1332 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1333 : OMPD_task; 1334 const CapturedStmt *CS = D.getCapturedStmt(Region); 1335 bool HasCancel = false; 1336 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1339 HasCancel = TD->hasCancel(); 1340 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1341 HasCancel = TD->hasCancel(); 1342 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1343 HasCancel = TD->hasCancel(); 1344 1345 CodeGenFunction CGF(CGM, true); 1346 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1347 InnermostKind, HasCancel, Action); 1348 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1349 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1350 if (!Tied) 1351 NumberOfParts = Action.getNumberOfParts(); 1352 return Res; 1353 } 1354 1355 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1356 const RecordDecl *RD, const CGRecordLayout &RL, 1357 ArrayRef<llvm::Constant *> Data) { 1358 llvm::StructType *StructTy = RL.getLLVMType(); 1359 unsigned PrevIdx = 0; 1360 ConstantInitBuilder CIBuilder(CGM); 1361 auto DI = Data.begin(); 1362 for (const FieldDecl *FD : RD->fields()) { 1363 unsigned Idx = RL.getLLVMFieldNo(FD); 1364 // Fill the alignment. 1365 for (unsigned I = PrevIdx; I < Idx; ++I) 1366 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1367 PrevIdx = Idx + 1; 1368 Fields.add(*DI); 1369 ++DI; 1370 } 1371 } 1372 1373 template <class... As> 1374 static llvm::GlobalVariable * 1375 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1376 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1377 As &&... Args) { 1378 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1379 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1380 ConstantInitBuilder CIBuilder(CGM); 1381 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1382 buildStructValue(Fields, CGM, RD, RL, Data); 1383 return Fields.finishAndCreateGlobal( 1384 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1385 std::forward<As>(Args)...); 1386 } 1387 1388 template <typename T> 1389 static void 1390 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1391 ArrayRef<llvm::Constant *> Data, 1392 T &Parent) { 1393 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1394 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1395 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1396 buildStructValue(Fields, CGM, RD, RL, Data); 1397 Fields.finishAndAddTo(Parent); 1398 } 1399 1400 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1401 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1402 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1403 FlagsTy FlagsKey(Flags, Reserved2Flags); 1404 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1405 if (!Entry) { 1406 if (!DefaultOpenMPPSource) { 1407 // Initialize default location for psource field of ident_t structure of 1408 // all ident_t objects. Format is ";file;function;line;column;;". 1409 // Taken from 1410 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1411 DefaultOpenMPPSource = 1412 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1413 DefaultOpenMPPSource = 1414 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1415 } 1416 1417 llvm::Constant *Data[] = { 1418 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1419 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1420 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1421 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1422 llvm::GlobalValue *DefaultOpenMPLocation = 1423 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1424 llvm::GlobalValue::PrivateLinkage); 1425 DefaultOpenMPLocation->setUnnamedAddr( 1426 llvm::GlobalValue::UnnamedAddr::Global); 1427 1428 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1429 } 1430 return Address(Entry, Align); 1431 } 1432 1433 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1434 bool AtCurrentPoint) { 1435 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1436 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1437 1438 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1439 if (AtCurrentPoint) { 1440 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1441 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1442 } else { 1443 Elem.second.ServiceInsertPt = 1444 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1445 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1446 } 1447 } 1448 1449 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1450 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1451 if (Elem.second.ServiceInsertPt) { 1452 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1453 Elem.second.ServiceInsertPt = nullptr; 1454 Ptr->eraseFromParent(); 1455 } 1456 } 1457 1458 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1459 SourceLocation Loc, 1460 unsigned Flags) { 1461 Flags |= OMP_IDENT_KMPC; 1462 // If no debug info is generated - return global default location. 1463 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1464 Loc.isInvalid()) 1465 return getOrCreateDefaultLocation(Flags).getPointer(); 1466 1467 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1468 1469 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1470 Address LocValue = Address::invalid(); 1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1472 if (I != OpenMPLocThreadIDMap.end()) 1473 LocValue = Address(I->second.DebugLoc, Align); 1474 1475 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1476 // GetOpenMPThreadID was called before this routine. 1477 if (!LocValue.isValid()) { 1478 // Generate "ident_t .kmpc_loc.addr;" 1479 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1480 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1481 Elem.second.DebugLoc = AI.getPointer(); 1482 LocValue = AI; 1483 1484 if (!Elem.second.ServiceInsertPt) 1485 setLocThreadIdInsertPt(CGF); 1486 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1487 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1488 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1489 CGF.getTypeSize(IdentQTy)); 1490 } 1491 1492 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1493 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1494 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1495 LValue PSource = 1496 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1497 1498 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1499 if (OMPDebugLoc == nullptr) { 1500 SmallString<128> Buffer2; 1501 llvm::raw_svector_ostream OS2(Buffer2); 1502 // Build debug location 1503 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1504 OS2 << ";" << PLoc.getFilename() << ";"; 1505 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1506 OS2 << FD->getQualifiedNameAsString(); 1507 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1508 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1509 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1510 } 1511 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1512 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1513 1514 // Our callers always pass this to a runtime function, so for 1515 // convenience, go ahead and return a naked pointer. 1516 return LocValue.getPointer(); 1517 } 1518 1519 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1520 SourceLocation Loc) { 1521 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1522 1523 llvm::Value *ThreadID = nullptr; 1524 // Check whether we've already cached a load of the thread id in this 1525 // function. 1526 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1527 if (I != OpenMPLocThreadIDMap.end()) { 1528 ThreadID = I->second.ThreadID; 1529 if (ThreadID != nullptr) 1530 return ThreadID; 1531 } 1532 // If exceptions are enabled, do not use parameter to avoid possible crash. 1533 if (auto *OMPRegionInfo = 1534 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1535 if (OMPRegionInfo->getThreadIDVariable()) { 1536 // Check if this an outlined function with thread id passed as argument. 1537 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1538 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1539 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1540 !CGF.getLangOpts().CXXExceptions || 1541 CGF.Builder.GetInsertBlock() == TopBlock || 1542 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1543 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1544 TopBlock || 1545 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1546 CGF.Builder.GetInsertBlock()) { 1547 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1548 // If value loaded in entry block, cache it and use it everywhere in 1549 // function. 1550 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1551 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1552 Elem.second.ThreadID = ThreadID; 1553 } 1554 return ThreadID; 1555 } 1556 } 1557 } 1558 1559 // This is not an outlined function region - need to call __kmpc_int32 1560 // kmpc_global_thread_num(ident_t *loc). 1561 // Generate thread id value and cache this value for use across the 1562 // function. 1563 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1564 if (!Elem.second.ServiceInsertPt) 1565 setLocThreadIdInsertPt(CGF); 1566 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1567 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1568 llvm::CallInst *Call = CGF.Builder.CreateCall( 1569 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1570 OMPRTL___kmpc_global_thread_num), 1571 emitUpdateLocation(CGF, Loc)); 1572 Call->setCallingConv(CGF.getRuntimeCC()); 1573 Elem.second.ThreadID = Call; 1574 return Call; 1575 } 1576 1577 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1578 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1579 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1580 clearLocThreadIdInsertPt(CGF); 1581 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1582 } 1583 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1584 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1585 UDRMap.erase(D); 1586 FunctionUDRMap.erase(CGF.CurFn); 1587 } 1588 auto I = FunctionUDMMap.find(CGF.CurFn); 1589 if (I != FunctionUDMMap.end()) { 1590 for(const auto *D : I->second) 1591 UDMMap.erase(D); 1592 FunctionUDMMap.erase(I); 1593 } 1594 LastprivateConditionalToTypes.erase(CGF.CurFn); 1595 } 1596 1597 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1598 return IdentTy->getPointerTo(); 1599 } 1600 1601 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1602 if (!Kmpc_MicroTy) { 1603 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1604 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1605 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1606 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1607 } 1608 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1616 : "__kmpc_for_static_init_4u") 1617 : (IVSigned ? "__kmpc_for_static_init_8" 1618 : "__kmpc_for_static_init_8u"); 1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1621 llvm::Type *TypeParams[] = { 1622 getIdentTyPointerTy(), // loc 1623 CGM.Int32Ty, // tid 1624 CGM.Int32Ty, // schedtype 1625 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1626 PtrTy, // p_lower 1627 PtrTy, // p_upper 1628 PtrTy, // p_stride 1629 ITy, // incr 1630 ITy // chunk 1631 }; 1632 auto *FnTy = 1633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1634 return CGM.CreateRuntimeFunction(FnTy, Name); 1635 } 1636 1637 llvm::FunctionCallee 1638 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1639 assert((IVSize == 32 || IVSize == 64) && 1640 "IV size is not compatible with the omp runtime"); 1641 StringRef Name = 1642 IVSize == 32 1643 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1644 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1645 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1646 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1647 CGM.Int32Ty, // tid 1648 CGM.Int32Ty, // schedtype 1649 ITy, // lower 1650 ITy, // upper 1651 ITy, // stride 1652 ITy // chunk 1653 }; 1654 auto *FnTy = 1655 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1656 return CGM.CreateRuntimeFunction(FnTy, Name); 1657 } 1658 1659 llvm::FunctionCallee 1660 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1661 assert((IVSize == 32 || IVSize == 64) && 1662 "IV size is not compatible with the omp runtime"); 1663 StringRef Name = 1664 IVSize == 32 1665 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1666 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1667 llvm::Type *TypeParams[] = { 1668 getIdentTyPointerTy(), // loc 1669 CGM.Int32Ty, // tid 1670 }; 1671 auto *FnTy = 1672 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1673 return CGM.CreateRuntimeFunction(FnTy, Name); 1674 } 1675 1676 llvm::FunctionCallee 1677 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1678 assert((IVSize == 32 || IVSize == 64) && 1679 "IV size is not compatible with the omp runtime"); 1680 StringRef Name = 1681 IVSize == 32 1682 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1683 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1684 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1685 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1686 llvm::Type *TypeParams[] = { 1687 getIdentTyPointerTy(), // loc 1688 CGM.Int32Ty, // tid 1689 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1690 PtrTy, // p_lower 1691 PtrTy, // p_upper 1692 PtrTy // p_stride 1693 }; 1694 auto *FnTy = 1695 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1696 return CGM.CreateRuntimeFunction(FnTy, Name); 1697 } 1698 1699 /// Obtain information that uniquely identifies a target entry. This 1700 /// consists of the file and device IDs as well as line number associated with 1701 /// the relevant entry source location. 1702 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1703 unsigned &DeviceID, unsigned &FileID, 1704 unsigned &LineNum) { 1705 SourceManager &SM = C.getSourceManager(); 1706 1707 // The loc should be always valid and have a file ID (the user cannot use 1708 // #pragma directives in macros) 1709 1710 assert(Loc.isValid() && "Source location is expected to be always valid."); 1711 1712 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1713 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1714 1715 llvm::sys::fs::UniqueID ID; 1716 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1717 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1718 << PLoc.getFilename() << EC.message(); 1719 1720 DeviceID = ID.getDevice(); 1721 FileID = ID.getFile(); 1722 LineNum = PLoc.getLine(); 1723 } 1724 1725 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1726 if (CGM.getLangOpts().OpenMPSimd) 1727 return Address::invalid(); 1728 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1729 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1730 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1731 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1732 HasRequiresUnifiedSharedMemory))) { 1733 SmallString<64> PtrName; 1734 { 1735 llvm::raw_svector_ostream OS(PtrName); 1736 OS << CGM.getMangledName(GlobalDecl(VD)); 1737 if (!VD->isExternallyVisible()) { 1738 unsigned DeviceID, FileID, Line; 1739 getTargetEntryUniqueInfo(CGM.getContext(), 1740 VD->getCanonicalDecl()->getBeginLoc(), 1741 DeviceID, FileID, Line); 1742 OS << llvm::format("_%x", FileID); 1743 } 1744 OS << "_decl_tgt_ref_ptr"; 1745 } 1746 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1747 if (!Ptr) { 1748 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1749 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1750 PtrName); 1751 1752 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1753 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1754 1755 if (!CGM.getLangOpts().OpenMPIsDevice) 1756 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1757 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1758 } 1759 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1760 } 1761 return Address::invalid(); 1762 } 1763 1764 llvm::Constant * 1765 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1766 assert(!CGM.getLangOpts().OpenMPUseTLS || 1767 !CGM.getContext().getTargetInfo().isTLSSupported()); 1768 // Lookup the entry, lazily creating it if necessary. 1769 std::string Suffix = getName({"cache", ""}); 1770 return getOrCreateInternalVariable( 1771 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1772 } 1773 1774 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1775 const VarDecl *VD, 1776 Address VDAddr, 1777 SourceLocation Loc) { 1778 if (CGM.getLangOpts().OpenMPUseTLS && 1779 CGM.getContext().getTargetInfo().isTLSSupported()) 1780 return VDAddr; 1781 1782 llvm::Type *VarTy = VDAddr.getElementType(); 1783 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1784 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1785 CGM.Int8PtrTy), 1786 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1787 getOrCreateThreadPrivateCache(VD)}; 1788 return Address(CGF.EmitRuntimeCall( 1789 OMPBuilder.getOrCreateRuntimeFunction( 1790 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1791 Args), 1792 VDAddr.getAlignment()); 1793 } 1794 1795 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1796 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1797 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1798 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1799 // library. 1800 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1801 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1802 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1803 OMPLoc); 1804 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1805 // to register constructor/destructor for variable. 1806 llvm::Value *Args[] = { 1807 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1808 Ctor, CopyCtor, Dtor}; 1809 CGF.EmitRuntimeCall( 1810 OMPBuilder.getOrCreateRuntimeFunction( 1811 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1812 Args); 1813 } 1814 1815 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1816 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1817 bool PerformInit, CodeGenFunction *CGF) { 1818 if (CGM.getLangOpts().OpenMPUseTLS && 1819 CGM.getContext().getTargetInfo().isTLSSupported()) 1820 return nullptr; 1821 1822 VD = VD->getDefinition(CGM.getContext()); 1823 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1824 QualType ASTTy = VD->getType(); 1825 1826 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1827 const Expr *Init = VD->getAnyInitializer(); 1828 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1829 // Generate function that re-emits the declaration's initializer into the 1830 // threadprivate copy of the variable VD 1831 CodeGenFunction CtorCGF(CGM); 1832 FunctionArgList Args; 1833 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1834 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1835 ImplicitParamDecl::Other); 1836 Args.push_back(&Dst); 1837 1838 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1839 CGM.getContext().VoidPtrTy, Args); 1840 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1841 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1842 llvm::Function *Fn = 1843 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1844 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1845 Args, Loc, Loc); 1846 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1847 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1848 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1849 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1850 Arg = CtorCGF.Builder.CreateElementBitCast( 1851 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1852 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1853 /*IsInitializer=*/true); 1854 ArgVal = CtorCGF.EmitLoadOfScalar( 1855 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1856 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1857 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1858 CtorCGF.FinishFunction(); 1859 Ctor = Fn; 1860 } 1861 if (VD->getType().isDestructedType() != QualType::DK_none) { 1862 // Generate function that emits destructor call for the threadprivate copy 1863 // of the variable VD 1864 CodeGenFunction DtorCGF(CGM); 1865 FunctionArgList Args; 1866 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1867 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1868 ImplicitParamDecl::Other); 1869 Args.push_back(&Dst); 1870 1871 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1872 CGM.getContext().VoidTy, Args); 1873 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1874 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1875 llvm::Function *Fn = 1876 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1877 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1878 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1879 Loc, Loc); 1880 // Create a scope with an artificial location for the body of this function. 1881 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1882 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1883 DtorCGF.GetAddrOfLocalVar(&Dst), 1884 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1885 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1886 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1887 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1888 DtorCGF.FinishFunction(); 1889 Dtor = Fn; 1890 } 1891 // Do not emit init function if it is not required. 1892 if (!Ctor && !Dtor) 1893 return nullptr; 1894 1895 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1896 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1897 /*isVarArg=*/false) 1898 ->getPointerTo(); 1899 // Copying constructor for the threadprivate variable. 1900 // Must be NULL - reserved by runtime, but currently it requires that this 1901 // parameter is always NULL. Otherwise it fires assertion. 1902 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1903 if (Ctor == nullptr) { 1904 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1905 /*isVarArg=*/false) 1906 ->getPointerTo(); 1907 Ctor = llvm::Constant::getNullValue(CtorTy); 1908 } 1909 if (Dtor == nullptr) { 1910 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1911 /*isVarArg=*/false) 1912 ->getPointerTo(); 1913 Dtor = llvm::Constant::getNullValue(DtorTy); 1914 } 1915 if (!CGF) { 1916 auto *InitFunctionTy = 1917 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1918 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1919 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1920 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1921 CodeGenFunction InitCGF(CGM); 1922 FunctionArgList ArgList; 1923 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1924 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1925 Loc, Loc); 1926 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1927 InitCGF.FinishFunction(); 1928 return InitFunction; 1929 } 1930 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1931 } 1932 return nullptr; 1933 } 1934 1935 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1936 llvm::GlobalVariable *Addr, 1937 bool PerformInit) { 1938 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1939 !CGM.getLangOpts().OpenMPIsDevice) 1940 return false; 1941 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1942 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1943 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1944 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1945 HasRequiresUnifiedSharedMemory)) 1946 return CGM.getLangOpts().OpenMPIsDevice; 1947 VD = VD->getDefinition(CGM.getContext()); 1948 assert(VD && "Unknown VarDecl"); 1949 1950 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1951 return CGM.getLangOpts().OpenMPIsDevice; 1952 1953 QualType ASTTy = VD->getType(); 1954 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1955 1956 // Produce the unique prefix to identify the new target regions. We use 1957 // the source location of the variable declaration which we know to not 1958 // conflict with any target region. 1959 unsigned DeviceID; 1960 unsigned FileID; 1961 unsigned Line; 1962 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1963 SmallString<128> Buffer, Out; 1964 { 1965 llvm::raw_svector_ostream OS(Buffer); 1966 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1967 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1968 } 1969 1970 const Expr *Init = VD->getAnyInitializer(); 1971 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1972 llvm::Constant *Ctor; 1973 llvm::Constant *ID; 1974 if (CGM.getLangOpts().OpenMPIsDevice) { 1975 // Generate function that re-emits the declaration's initializer into 1976 // the threadprivate copy of the variable VD 1977 CodeGenFunction CtorCGF(CGM); 1978 1979 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1980 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1981 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1982 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1983 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1984 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1985 FunctionArgList(), Loc, Loc); 1986 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1987 CtorCGF.EmitAnyExprToMem(Init, 1988 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1989 Init->getType().getQualifiers(), 1990 /*IsInitializer=*/true); 1991 CtorCGF.FinishFunction(); 1992 Ctor = Fn; 1993 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1994 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1995 } else { 1996 Ctor = new llvm::GlobalVariable( 1997 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1998 llvm::GlobalValue::PrivateLinkage, 1999 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2000 ID = Ctor; 2001 } 2002 2003 // Register the information for the entry associated with the constructor. 2004 Out.clear(); 2005 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2006 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2007 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2008 } 2009 if (VD->getType().isDestructedType() != QualType::DK_none) { 2010 llvm::Constant *Dtor; 2011 llvm::Constant *ID; 2012 if (CGM.getLangOpts().OpenMPIsDevice) { 2013 // Generate function that emits destructor call for the threadprivate 2014 // copy of the variable VD 2015 CodeGenFunction DtorCGF(CGM); 2016 2017 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2018 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2019 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 2020 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2021 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2022 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2023 FunctionArgList(), Loc, Loc); 2024 // Create a scope with an artificial location for the body of this 2025 // function. 2026 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2027 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2028 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2029 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2030 DtorCGF.FinishFunction(); 2031 Dtor = Fn; 2032 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2033 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2034 } else { 2035 Dtor = new llvm::GlobalVariable( 2036 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2037 llvm::GlobalValue::PrivateLinkage, 2038 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2039 ID = Dtor; 2040 } 2041 // Register the information for the entry associated with the destructor. 2042 Out.clear(); 2043 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2044 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2045 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2046 } 2047 return CGM.getLangOpts().OpenMPIsDevice; 2048 } 2049 2050 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2051 QualType VarType, 2052 StringRef Name) { 2053 std::string Suffix = getName({"artificial", ""}); 2054 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2055 llvm::Value *GAddr = 2056 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2057 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2058 CGM.getTarget().isTLSSupported()) { 2059 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2060 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2061 } 2062 std::string CacheSuffix = getName({"cache", ""}); 2063 llvm::Value *Args[] = { 2064 emitUpdateLocation(CGF, SourceLocation()), 2065 getThreadID(CGF, SourceLocation()), 2066 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2067 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2068 /*isSigned=*/false), 2069 getOrCreateInternalVariable( 2070 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2071 return Address( 2072 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2073 CGF.EmitRuntimeCall( 2074 OMPBuilder.getOrCreateRuntimeFunction( 2075 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2076 Args), 2077 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2078 CGM.getContext().getTypeAlignInChars(VarType)); 2079 } 2080 2081 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2082 const RegionCodeGenTy &ThenGen, 2083 const RegionCodeGenTy &ElseGen) { 2084 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2085 2086 // If the condition constant folds and can be elided, try to avoid emitting 2087 // the condition and the dead arm of the if/else. 2088 bool CondConstant; 2089 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2090 if (CondConstant) 2091 ThenGen(CGF); 2092 else 2093 ElseGen(CGF); 2094 return; 2095 } 2096 2097 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2098 // emit the conditional branch. 2099 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2100 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2101 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2102 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2103 2104 // Emit the 'then' code. 2105 CGF.EmitBlock(ThenBlock); 2106 ThenGen(CGF); 2107 CGF.EmitBranch(ContBlock); 2108 // Emit the 'else' code if present. 2109 // There is no need to emit line number for unconditional branch. 2110 (void)ApplyDebugLocation::CreateEmpty(CGF); 2111 CGF.EmitBlock(ElseBlock); 2112 ElseGen(CGF); 2113 // There is no need to emit line number for unconditional branch. 2114 (void)ApplyDebugLocation::CreateEmpty(CGF); 2115 CGF.EmitBranch(ContBlock); 2116 // Emit the continuation block for code after the if. 2117 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2118 } 2119 2120 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2121 llvm::Function *OutlinedFn, 2122 ArrayRef<llvm::Value *> CapturedVars, 2123 const Expr *IfCond) { 2124 if (!CGF.HaveInsertPoint()) 2125 return; 2126 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2127 auto &M = CGM.getModule(); 2128 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2129 this](CodeGenFunction &CGF, PrePostActionTy &) { 2130 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2131 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2132 llvm::Value *Args[] = { 2133 RTLoc, 2134 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2135 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2136 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2137 RealArgs.append(std::begin(Args), std::end(Args)); 2138 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2139 2140 llvm::FunctionCallee RTLFn = 2141 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2142 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2143 }; 2144 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2145 this](CodeGenFunction &CGF, PrePostActionTy &) { 2146 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2147 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2148 // Build calls: 2149 // __kmpc_serialized_parallel(&Loc, GTid); 2150 llvm::Value *Args[] = {RTLoc, ThreadID}; 2151 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2152 M, OMPRTL___kmpc_serialized_parallel), 2153 Args); 2154 2155 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2156 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2157 Address ZeroAddrBound = 2158 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2159 /*Name=*/".bound.zero.addr"); 2160 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2161 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2162 // ThreadId for serialized parallels is 0. 2163 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2164 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2165 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2166 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2167 2168 // __kmpc_end_serialized_parallel(&Loc, GTid); 2169 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2170 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2171 M, OMPRTL___kmpc_end_serialized_parallel), 2172 EndArgs); 2173 }; 2174 if (IfCond) { 2175 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2176 } else { 2177 RegionCodeGenTy ThenRCG(ThenGen); 2178 ThenRCG(CGF); 2179 } 2180 } 2181 2182 // If we're inside an (outlined) parallel region, use the region info's 2183 // thread-ID variable (it is passed in a first argument of the outlined function 2184 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2185 // regular serial code region, get thread ID by calling kmp_int32 2186 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2187 // return the address of that temp. 2188 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2189 SourceLocation Loc) { 2190 if (auto *OMPRegionInfo = 2191 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2192 if (OMPRegionInfo->getThreadIDVariable()) 2193 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2194 2195 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2196 QualType Int32Ty = 2197 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2198 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2199 CGF.EmitStoreOfScalar(ThreadID, 2200 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2201 2202 return ThreadIDTemp; 2203 } 2204 2205 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2206 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2207 SmallString<256> Buffer; 2208 llvm::raw_svector_ostream Out(Buffer); 2209 Out << Name; 2210 StringRef RuntimeName = Out.str(); 2211 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2212 if (Elem.second) { 2213 assert(Elem.second->getType()->getPointerElementType() == Ty && 2214 "OMP internal variable has different type than requested"); 2215 return &*Elem.second; 2216 } 2217 2218 return Elem.second = new llvm::GlobalVariable( 2219 CGM.getModule(), Ty, /*IsConstant*/ false, 2220 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2221 Elem.first(), /*InsertBefore=*/nullptr, 2222 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2223 } 2224 2225 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2226 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2227 std::string Name = getName({Prefix, "var"}); 2228 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2229 } 2230 2231 namespace { 2232 /// Common pre(post)-action for different OpenMP constructs. 2233 class CommonActionTy final : public PrePostActionTy { 2234 llvm::FunctionCallee EnterCallee; 2235 ArrayRef<llvm::Value *> EnterArgs; 2236 llvm::FunctionCallee ExitCallee; 2237 ArrayRef<llvm::Value *> ExitArgs; 2238 bool Conditional; 2239 llvm::BasicBlock *ContBlock = nullptr; 2240 2241 public: 2242 CommonActionTy(llvm::FunctionCallee EnterCallee, 2243 ArrayRef<llvm::Value *> EnterArgs, 2244 llvm::FunctionCallee ExitCallee, 2245 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2246 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2247 ExitArgs(ExitArgs), Conditional(Conditional) {} 2248 void Enter(CodeGenFunction &CGF) override { 2249 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2250 if (Conditional) { 2251 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2252 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2253 ContBlock = CGF.createBasicBlock("omp_if.end"); 2254 // Generate the branch (If-stmt) 2255 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2256 CGF.EmitBlock(ThenBlock); 2257 } 2258 } 2259 void Done(CodeGenFunction &CGF) { 2260 // Emit the rest of blocks/branches 2261 CGF.EmitBranch(ContBlock); 2262 CGF.EmitBlock(ContBlock, true); 2263 } 2264 void Exit(CodeGenFunction &CGF) override { 2265 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2266 } 2267 }; 2268 } // anonymous namespace 2269 2270 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2271 StringRef CriticalName, 2272 const RegionCodeGenTy &CriticalOpGen, 2273 SourceLocation Loc, const Expr *Hint) { 2274 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2275 // CriticalOpGen(); 2276 // __kmpc_end_critical(ident_t *, gtid, Lock); 2277 // Prepare arguments and build a call to __kmpc_critical 2278 if (!CGF.HaveInsertPoint()) 2279 return; 2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2281 getCriticalRegionLock(CriticalName)}; 2282 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2283 std::end(Args)); 2284 if (Hint) { 2285 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2286 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2287 } 2288 CommonActionTy Action( 2289 OMPBuilder.getOrCreateRuntimeFunction( 2290 CGM.getModule(), 2291 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2292 EnterArgs, 2293 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2294 OMPRTL___kmpc_end_critical), 2295 Args); 2296 CriticalOpGen.setAction(Action); 2297 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2298 } 2299 2300 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2301 const RegionCodeGenTy &MasterOpGen, 2302 SourceLocation Loc) { 2303 if (!CGF.HaveInsertPoint()) 2304 return; 2305 // if(__kmpc_master(ident_t *, gtid)) { 2306 // MasterOpGen(); 2307 // __kmpc_end_master(ident_t *, gtid); 2308 // } 2309 // Prepare arguments and build a call to __kmpc_master 2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2311 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2312 CGM.getModule(), OMPRTL___kmpc_master), 2313 Args, 2314 OMPBuilder.getOrCreateRuntimeFunction( 2315 CGM.getModule(), OMPRTL___kmpc_end_master), 2316 Args, 2317 /*Conditional=*/true); 2318 MasterOpGen.setAction(Action); 2319 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2320 Action.Done(CGF); 2321 } 2322 2323 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2324 SourceLocation Loc) { 2325 if (!CGF.HaveInsertPoint()) 2326 return; 2327 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2328 OMPBuilder.CreateTaskyield(CGF.Builder); 2329 } else { 2330 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2331 llvm::Value *Args[] = { 2332 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2333 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2334 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2335 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2336 Args); 2337 } 2338 2339 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2340 Region->emitUntiedSwitch(CGF); 2341 } 2342 2343 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2344 const RegionCodeGenTy &TaskgroupOpGen, 2345 SourceLocation Loc) { 2346 if (!CGF.HaveInsertPoint()) 2347 return; 2348 // __kmpc_taskgroup(ident_t *, gtid); 2349 // TaskgroupOpGen(); 2350 // __kmpc_end_taskgroup(ident_t *, gtid); 2351 // Prepare arguments and build a call to __kmpc_taskgroup 2352 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2353 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2354 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2355 Args, 2356 OMPBuilder.getOrCreateRuntimeFunction( 2357 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2358 Args); 2359 TaskgroupOpGen.setAction(Action); 2360 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2361 } 2362 2363 /// Given an array of pointers to variables, project the address of a 2364 /// given variable. 2365 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2366 unsigned Index, const VarDecl *Var) { 2367 // Pull out the pointer to the variable. 2368 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2369 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2370 2371 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2372 Addr = CGF.Builder.CreateElementBitCast( 2373 Addr, CGF.ConvertTypeForMem(Var->getType())); 2374 return Addr; 2375 } 2376 2377 static llvm::Value *emitCopyprivateCopyFunction( 2378 CodeGenModule &CGM, llvm::Type *ArgsType, 2379 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2380 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2381 SourceLocation Loc) { 2382 ASTContext &C = CGM.getContext(); 2383 // void copy_func(void *LHSArg, void *RHSArg); 2384 FunctionArgList Args; 2385 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2386 ImplicitParamDecl::Other); 2387 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2388 ImplicitParamDecl::Other); 2389 Args.push_back(&LHSArg); 2390 Args.push_back(&RHSArg); 2391 const auto &CGFI = 2392 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2393 std::string Name = 2394 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2395 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2396 llvm::GlobalValue::InternalLinkage, Name, 2397 &CGM.getModule()); 2398 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2399 Fn->setDoesNotRecurse(); 2400 CodeGenFunction CGF(CGM); 2401 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2402 // Dest = (void*[n])(LHSArg); 2403 // Src = (void*[n])(RHSArg); 2404 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2405 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2406 ArgsType), CGF.getPointerAlign()); 2407 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2408 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2409 ArgsType), CGF.getPointerAlign()); 2410 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2411 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2412 // ... 2413 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2414 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2415 const auto *DestVar = 2416 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2417 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2418 2419 const auto *SrcVar = 2420 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2421 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2422 2423 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2424 QualType Type = VD->getType(); 2425 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2426 } 2427 CGF.FinishFunction(); 2428 return Fn; 2429 } 2430 2431 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2432 const RegionCodeGenTy &SingleOpGen, 2433 SourceLocation Loc, 2434 ArrayRef<const Expr *> CopyprivateVars, 2435 ArrayRef<const Expr *> SrcExprs, 2436 ArrayRef<const Expr *> DstExprs, 2437 ArrayRef<const Expr *> AssignmentOps) { 2438 if (!CGF.HaveInsertPoint()) 2439 return; 2440 assert(CopyprivateVars.size() == SrcExprs.size() && 2441 CopyprivateVars.size() == DstExprs.size() && 2442 CopyprivateVars.size() == AssignmentOps.size()); 2443 ASTContext &C = CGM.getContext(); 2444 // int32 did_it = 0; 2445 // if(__kmpc_single(ident_t *, gtid)) { 2446 // SingleOpGen(); 2447 // __kmpc_end_single(ident_t *, gtid); 2448 // did_it = 1; 2449 // } 2450 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2451 // <copy_func>, did_it); 2452 2453 Address DidIt = Address::invalid(); 2454 if (!CopyprivateVars.empty()) { 2455 // int32 did_it = 0; 2456 QualType KmpInt32Ty = 2457 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2458 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2459 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2460 } 2461 // Prepare arguments and build a call to __kmpc_single 2462 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2463 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2464 CGM.getModule(), OMPRTL___kmpc_single), 2465 Args, 2466 OMPBuilder.getOrCreateRuntimeFunction( 2467 CGM.getModule(), OMPRTL___kmpc_end_single), 2468 Args, 2469 /*Conditional=*/true); 2470 SingleOpGen.setAction(Action); 2471 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2472 if (DidIt.isValid()) { 2473 // did_it = 1; 2474 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2475 } 2476 Action.Done(CGF); 2477 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2478 // <copy_func>, did_it); 2479 if (DidIt.isValid()) { 2480 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2481 QualType CopyprivateArrayTy = C.getConstantArrayType( 2482 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2483 /*IndexTypeQuals=*/0); 2484 // Create a list of all private variables for copyprivate. 2485 Address CopyprivateList = 2486 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2487 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2488 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2489 CGF.Builder.CreateStore( 2490 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2491 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2492 CGF.VoidPtrTy), 2493 Elem); 2494 } 2495 // Build function that copies private values from single region to all other 2496 // threads in the corresponding parallel region. 2497 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2498 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2499 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2500 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2501 Address CL = 2502 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2503 CGF.VoidPtrTy); 2504 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2505 llvm::Value *Args[] = { 2506 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2507 getThreadID(CGF, Loc), // i32 <gtid> 2508 BufSize, // size_t <buf_size> 2509 CL.getPointer(), // void *<copyprivate list> 2510 CpyFn, // void (*) (void *, void *) <copy_func> 2511 DidItVal // i32 did_it 2512 }; 2513 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2514 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2515 Args); 2516 } 2517 } 2518 2519 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2520 const RegionCodeGenTy &OrderedOpGen, 2521 SourceLocation Loc, bool IsThreads) { 2522 if (!CGF.HaveInsertPoint()) 2523 return; 2524 // __kmpc_ordered(ident_t *, gtid); 2525 // OrderedOpGen(); 2526 // __kmpc_end_ordered(ident_t *, gtid); 2527 // Prepare arguments and build a call to __kmpc_ordered 2528 if (IsThreads) { 2529 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2530 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2531 CGM.getModule(), OMPRTL___kmpc_ordered), 2532 Args, 2533 OMPBuilder.getOrCreateRuntimeFunction( 2534 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2535 Args); 2536 OrderedOpGen.setAction(Action); 2537 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2538 return; 2539 } 2540 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2541 } 2542 2543 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2544 unsigned Flags; 2545 if (Kind == OMPD_for) 2546 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2547 else if (Kind == OMPD_sections) 2548 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2549 else if (Kind == OMPD_single) 2550 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2551 else if (Kind == OMPD_barrier) 2552 Flags = OMP_IDENT_BARRIER_EXPL; 2553 else 2554 Flags = OMP_IDENT_BARRIER_IMPL; 2555 return Flags; 2556 } 2557 2558 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2559 CodeGenFunction &CGF, const OMPLoopDirective &S, 2560 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2561 // Check if the loop directive is actually a doacross loop directive. In this 2562 // case choose static, 1 schedule. 2563 if (llvm::any_of( 2564 S.getClausesOfKind<OMPOrderedClause>(), 2565 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2566 ScheduleKind = OMPC_SCHEDULE_static; 2567 // Chunk size is 1 in this case. 2568 llvm::APInt ChunkSize(32, 1); 2569 ChunkExpr = IntegerLiteral::Create( 2570 CGF.getContext(), ChunkSize, 2571 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2572 SourceLocation()); 2573 } 2574 } 2575 2576 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2577 OpenMPDirectiveKind Kind, bool EmitChecks, 2578 bool ForceSimpleCall) { 2579 // Check if we should use the OMPBuilder 2580 auto *OMPRegionInfo = 2581 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2582 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2583 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( 2584 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2585 return; 2586 } 2587 2588 if (!CGF.HaveInsertPoint()) 2589 return; 2590 // Build call __kmpc_cancel_barrier(loc, thread_id); 2591 // Build call __kmpc_barrier(loc, thread_id); 2592 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2593 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2594 // thread_id); 2595 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2596 getThreadID(CGF, Loc)}; 2597 if (OMPRegionInfo) { 2598 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2599 llvm::Value *Result = CGF.EmitRuntimeCall( 2600 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2601 OMPRTL___kmpc_cancel_barrier), 2602 Args); 2603 if (EmitChecks) { 2604 // if (__kmpc_cancel_barrier()) { 2605 // exit from construct; 2606 // } 2607 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2608 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2609 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2610 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2611 CGF.EmitBlock(ExitBB); 2612 // exit from construct; 2613 CodeGenFunction::JumpDest CancelDestination = 2614 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2615 CGF.EmitBranchThroughCleanup(CancelDestination); 2616 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2617 } 2618 return; 2619 } 2620 } 2621 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2622 CGM.getModule(), OMPRTL___kmpc_barrier), 2623 Args); 2624 } 2625 2626 /// Map the OpenMP loop schedule to the runtime enumeration. 2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2628 bool Chunked, bool Ordered) { 2629 switch (ScheduleKind) { 2630 case OMPC_SCHEDULE_static: 2631 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2632 : (Ordered ? OMP_ord_static : OMP_sch_static); 2633 case OMPC_SCHEDULE_dynamic: 2634 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2635 case OMPC_SCHEDULE_guided: 2636 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2637 case OMPC_SCHEDULE_runtime: 2638 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2639 case OMPC_SCHEDULE_auto: 2640 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2641 case OMPC_SCHEDULE_unknown: 2642 assert(!Chunked && "chunk was specified but schedule kind not known"); 2643 return Ordered ? OMP_ord_static : OMP_sch_static; 2644 } 2645 llvm_unreachable("Unexpected runtime schedule"); 2646 } 2647 2648 /// Map the OpenMP distribute schedule to the runtime enumeration. 2649 static OpenMPSchedType 2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2651 // only static is allowed for dist_schedule 2652 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2653 } 2654 2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2656 bool Chunked) const { 2657 OpenMPSchedType Schedule = 2658 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2659 return Schedule == OMP_sch_static; 2660 } 2661 2662 bool CGOpenMPRuntime::isStaticNonchunked( 2663 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2664 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2665 return Schedule == OMP_dist_sch_static; 2666 } 2667 2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2669 bool Chunked) const { 2670 OpenMPSchedType Schedule = 2671 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2672 return Schedule == OMP_sch_static_chunked; 2673 } 2674 2675 bool CGOpenMPRuntime::isStaticChunked( 2676 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2677 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2678 return Schedule == OMP_dist_sch_static_chunked; 2679 } 2680 2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2682 OpenMPSchedType Schedule = 2683 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2684 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2685 return Schedule != OMP_sch_static; 2686 } 2687 2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2689 OpenMPScheduleClauseModifier M1, 2690 OpenMPScheduleClauseModifier M2) { 2691 int Modifier = 0; 2692 switch (M1) { 2693 case OMPC_SCHEDULE_MODIFIER_monotonic: 2694 Modifier = OMP_sch_modifier_monotonic; 2695 break; 2696 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2697 Modifier = OMP_sch_modifier_nonmonotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_simd: 2700 if (Schedule == OMP_sch_static_chunked) 2701 Schedule = OMP_sch_static_balanced_chunked; 2702 break; 2703 case OMPC_SCHEDULE_MODIFIER_last: 2704 case OMPC_SCHEDULE_MODIFIER_unknown: 2705 break; 2706 } 2707 switch (M2) { 2708 case OMPC_SCHEDULE_MODIFIER_monotonic: 2709 Modifier = OMP_sch_modifier_monotonic; 2710 break; 2711 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2712 Modifier = OMP_sch_modifier_nonmonotonic; 2713 break; 2714 case OMPC_SCHEDULE_MODIFIER_simd: 2715 if (Schedule == OMP_sch_static_chunked) 2716 Schedule = OMP_sch_static_balanced_chunked; 2717 break; 2718 case OMPC_SCHEDULE_MODIFIER_last: 2719 case OMPC_SCHEDULE_MODIFIER_unknown: 2720 break; 2721 } 2722 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2723 // If the static schedule kind is specified or if the ordered clause is 2724 // specified, and if the nonmonotonic modifier is not specified, the effect is 2725 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2726 // modifier is specified, the effect is as if the nonmonotonic modifier is 2727 // specified. 2728 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2729 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2730 Schedule == OMP_sch_static_balanced_chunked || 2731 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2732 Schedule == OMP_dist_sch_static_chunked || 2733 Schedule == OMP_dist_sch_static)) 2734 Modifier = OMP_sch_modifier_nonmonotonic; 2735 } 2736 return Schedule | Modifier; 2737 } 2738 2739 void CGOpenMPRuntime::emitForDispatchInit( 2740 CodeGenFunction &CGF, SourceLocation Loc, 2741 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2742 bool Ordered, const DispatchRTInput &DispatchValues) { 2743 if (!CGF.HaveInsertPoint()) 2744 return; 2745 OpenMPSchedType Schedule = getRuntimeSchedule( 2746 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2747 assert(Ordered || 2748 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2749 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2750 Schedule != OMP_sch_static_balanced_chunked)); 2751 // Call __kmpc_dispatch_init( 2752 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2753 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2754 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2755 2756 // If the Chunk was not specified in the clause - use default value 1. 2757 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2758 : CGF.Builder.getIntN(IVSize, 1); 2759 llvm::Value *Args[] = { 2760 emitUpdateLocation(CGF, Loc), 2761 getThreadID(CGF, Loc), 2762 CGF.Builder.getInt32(addMonoNonMonoModifier( 2763 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2764 DispatchValues.LB, // Lower 2765 DispatchValues.UB, // Upper 2766 CGF.Builder.getIntN(IVSize, 1), // Stride 2767 Chunk // Chunk 2768 }; 2769 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2770 } 2771 2772 static void emitForStaticInitCall( 2773 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2774 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2775 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2776 const CGOpenMPRuntime::StaticRTInput &Values) { 2777 if (!CGF.HaveInsertPoint()) 2778 return; 2779 2780 assert(!Values.Ordered); 2781 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2782 Schedule == OMP_sch_static_balanced_chunked || 2783 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2784 Schedule == OMP_dist_sch_static || 2785 Schedule == OMP_dist_sch_static_chunked); 2786 2787 // Call __kmpc_for_static_init( 2788 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2789 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2790 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2791 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2792 llvm::Value *Chunk = Values.Chunk; 2793 if (Chunk == nullptr) { 2794 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2795 Schedule == OMP_dist_sch_static) && 2796 "expected static non-chunked schedule"); 2797 // If the Chunk was not specified in the clause - use default value 1. 2798 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2799 } else { 2800 assert((Schedule == OMP_sch_static_chunked || 2801 Schedule == OMP_sch_static_balanced_chunked || 2802 Schedule == OMP_ord_static_chunked || 2803 Schedule == OMP_dist_sch_static_chunked) && 2804 "expected static chunked schedule"); 2805 } 2806 llvm::Value *Args[] = { 2807 UpdateLocation, 2808 ThreadId, 2809 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2810 M2)), // Schedule type 2811 Values.IL.getPointer(), // &isLastIter 2812 Values.LB.getPointer(), // &LB 2813 Values.UB.getPointer(), // &UB 2814 Values.ST.getPointer(), // &Stride 2815 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2816 Chunk // Chunk 2817 }; 2818 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2819 } 2820 2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2822 SourceLocation Loc, 2823 OpenMPDirectiveKind DKind, 2824 const OpenMPScheduleTy &ScheduleKind, 2825 const StaticRTInput &Values) { 2826 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2827 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2828 assert(isOpenMPWorksharingDirective(DKind) && 2829 "Expected loop-based or sections-based directive."); 2830 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2831 isOpenMPLoopDirective(DKind) 2832 ? OMP_IDENT_WORK_LOOP 2833 : OMP_IDENT_WORK_SECTIONS); 2834 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2835 llvm::FunctionCallee StaticInitFunction = 2836 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2837 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2838 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2839 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2840 } 2841 2842 void CGOpenMPRuntime::emitDistributeStaticInit( 2843 CodeGenFunction &CGF, SourceLocation Loc, 2844 OpenMPDistScheduleClauseKind SchedKind, 2845 const CGOpenMPRuntime::StaticRTInput &Values) { 2846 OpenMPSchedType ScheduleNum = 2847 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2848 llvm::Value *UpdatedLocation = 2849 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2850 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2851 llvm::FunctionCallee StaticInitFunction = 2852 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2853 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2854 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2855 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2856 } 2857 2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2859 SourceLocation Loc, 2860 OpenMPDirectiveKind DKind) { 2861 if (!CGF.HaveInsertPoint()) 2862 return; 2863 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2864 llvm::Value *Args[] = { 2865 emitUpdateLocation(CGF, Loc, 2866 isOpenMPDistributeDirective(DKind) 2867 ? OMP_IDENT_WORK_DISTRIBUTE 2868 : isOpenMPLoopDirective(DKind) 2869 ? OMP_IDENT_WORK_LOOP 2870 : OMP_IDENT_WORK_SECTIONS), 2871 getThreadID(CGF, Loc)}; 2872 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2873 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2874 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2875 Args); 2876 } 2877 2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2879 SourceLocation Loc, 2880 unsigned IVSize, 2881 bool IVSigned) { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2885 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2886 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2887 } 2888 2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2890 SourceLocation Loc, unsigned IVSize, 2891 bool IVSigned, Address IL, 2892 Address LB, Address UB, 2893 Address ST) { 2894 // Call __kmpc_dispatch_next( 2895 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2896 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2897 // kmp_int[32|64] *p_stride); 2898 llvm::Value *Args[] = { 2899 emitUpdateLocation(CGF, Loc), 2900 getThreadID(CGF, Loc), 2901 IL.getPointer(), // &isLastIter 2902 LB.getPointer(), // &Lower 2903 UB.getPointer(), // &Upper 2904 ST.getPointer() // &Stride 2905 }; 2906 llvm::Value *Call = 2907 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2908 return CGF.EmitScalarConversion( 2909 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2910 CGF.getContext().BoolTy, Loc); 2911 } 2912 2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2914 llvm::Value *NumThreads, 2915 SourceLocation Loc) { 2916 if (!CGF.HaveInsertPoint()) 2917 return; 2918 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2919 llvm::Value *Args[] = { 2920 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2921 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2922 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2923 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2924 Args); 2925 } 2926 2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2928 ProcBindKind ProcBind, 2929 SourceLocation Loc) { 2930 if (!CGF.HaveInsertPoint()) 2931 return; 2932 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2933 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2934 llvm::Value *Args[] = { 2935 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2936 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2937 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2939 Args); 2940 } 2941 2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2943 SourceLocation Loc, llvm::AtomicOrdering AO) { 2944 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2945 OMPBuilder.CreateFlush(CGF.Builder); 2946 } else { 2947 if (!CGF.HaveInsertPoint()) 2948 return; 2949 // Build call void __kmpc_flush(ident_t *loc) 2950 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2951 CGM.getModule(), OMPRTL___kmpc_flush), 2952 emitUpdateLocation(CGF, Loc)); 2953 } 2954 } 2955 2956 namespace { 2957 /// Indexes of fields for type kmp_task_t. 2958 enum KmpTaskTFields { 2959 /// List of shared variables. 2960 KmpTaskTShareds, 2961 /// Task routine. 2962 KmpTaskTRoutine, 2963 /// Partition id for the untied tasks. 2964 KmpTaskTPartId, 2965 /// Function with call of destructors for private variables. 2966 Data1, 2967 /// Task priority. 2968 Data2, 2969 /// (Taskloops only) Lower bound. 2970 KmpTaskTLowerBound, 2971 /// (Taskloops only) Upper bound. 2972 KmpTaskTUpperBound, 2973 /// (Taskloops only) Stride. 2974 KmpTaskTStride, 2975 /// (Taskloops only) Is last iteration flag. 2976 KmpTaskTLastIter, 2977 /// (Taskloops only) Reduction data. 2978 KmpTaskTReductions, 2979 }; 2980 } // anonymous namespace 2981 2982 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2983 return OffloadEntriesTargetRegion.empty() && 2984 OffloadEntriesDeviceGlobalVar.empty(); 2985 } 2986 2987 /// Initialize target region entry. 2988 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2989 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2990 StringRef ParentName, unsigned LineNum, 2991 unsigned Order) { 2992 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2993 "only required for the device " 2994 "code generation."); 2995 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2996 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2997 OMPTargetRegionEntryTargetRegion); 2998 ++OffloadingEntriesNum; 2999 } 3000 3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3002 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3003 StringRef ParentName, unsigned LineNum, 3004 llvm::Constant *Addr, llvm::Constant *ID, 3005 OMPTargetRegionEntryKind Flags) { 3006 // If we are emitting code for a target, the entry is already initialized, 3007 // only has to be registered. 3008 if (CGM.getLangOpts().OpenMPIsDevice) { 3009 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3010 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3011 DiagnosticsEngine::Error, 3012 "Unable to find target region on line '%0' in the device code."); 3013 CGM.getDiags().Report(DiagID) << LineNum; 3014 return; 3015 } 3016 auto &Entry = 3017 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3018 assert(Entry.isValid() && "Entry not initialized!"); 3019 Entry.setAddress(Addr); 3020 Entry.setID(ID); 3021 Entry.setFlags(Flags); 3022 } else { 3023 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3024 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3025 ++OffloadingEntriesNum; 3026 } 3027 } 3028 3029 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3030 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3031 unsigned LineNum) const { 3032 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3033 if (PerDevice == OffloadEntriesTargetRegion.end()) 3034 return false; 3035 auto PerFile = PerDevice->second.find(FileID); 3036 if (PerFile == PerDevice->second.end()) 3037 return false; 3038 auto PerParentName = PerFile->second.find(ParentName); 3039 if (PerParentName == PerFile->second.end()) 3040 return false; 3041 auto PerLine = PerParentName->second.find(LineNum); 3042 if (PerLine == PerParentName->second.end()) 3043 return false; 3044 // Fail if this entry is already registered. 3045 if (PerLine->second.getAddress() || PerLine->second.getID()) 3046 return false; 3047 return true; 3048 } 3049 3050 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3051 const OffloadTargetRegionEntryInfoActTy &Action) { 3052 // Scan all target region entries and perform the provided action. 3053 for (const auto &D : OffloadEntriesTargetRegion) 3054 for (const auto &F : D.second) 3055 for (const auto &P : F.second) 3056 for (const auto &L : P.second) 3057 Action(D.first, F.first, P.first(), L.first, L.second); 3058 } 3059 3060 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3061 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3062 OMPTargetGlobalVarEntryKind Flags, 3063 unsigned Order) { 3064 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3065 "only required for the device " 3066 "code generation."); 3067 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3068 ++OffloadingEntriesNum; 3069 } 3070 3071 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3072 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3073 CharUnits VarSize, 3074 OMPTargetGlobalVarEntryKind Flags, 3075 llvm::GlobalValue::LinkageTypes Linkage) { 3076 if (CGM.getLangOpts().OpenMPIsDevice) { 3077 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3078 assert(Entry.isValid() && Entry.getFlags() == Flags && 3079 "Entry not initialized!"); 3080 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3081 "Resetting with the new address."); 3082 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3083 if (Entry.getVarSize().isZero()) { 3084 Entry.setVarSize(VarSize); 3085 Entry.setLinkage(Linkage); 3086 } 3087 return; 3088 } 3089 Entry.setVarSize(VarSize); 3090 Entry.setLinkage(Linkage); 3091 Entry.setAddress(Addr); 3092 } else { 3093 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3094 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3095 assert(Entry.isValid() && Entry.getFlags() == Flags && 3096 "Entry not initialized!"); 3097 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3098 "Resetting with the new address."); 3099 if (Entry.getVarSize().isZero()) { 3100 Entry.setVarSize(VarSize); 3101 Entry.setLinkage(Linkage); 3102 } 3103 return; 3104 } 3105 OffloadEntriesDeviceGlobalVar.try_emplace( 3106 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3107 ++OffloadingEntriesNum; 3108 } 3109 } 3110 3111 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3112 actOnDeviceGlobalVarEntriesInfo( 3113 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3114 // Scan all target region entries and perform the provided action. 3115 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3116 Action(E.getKey(), E.getValue()); 3117 } 3118 3119 void CGOpenMPRuntime::createOffloadEntry( 3120 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3121 llvm::GlobalValue::LinkageTypes Linkage) { 3122 StringRef Name = Addr->getName(); 3123 llvm::Module &M = CGM.getModule(); 3124 llvm::LLVMContext &C = M.getContext(); 3125 3126 // Create constant string with the name. 3127 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3128 3129 std::string StringName = getName({"omp_offloading", "entry_name"}); 3130 auto *Str = new llvm::GlobalVariable( 3131 M, StrPtrInit->getType(), /*isConstant=*/true, 3132 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3133 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3134 3135 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3136 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3137 llvm::ConstantInt::get(CGM.SizeTy, Size), 3138 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3139 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3140 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3141 llvm::GlobalVariable *Entry = createGlobalStruct( 3142 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3143 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3144 3145 // The entry has to be created in the section the linker expects it to be. 3146 Entry->setSection("omp_offloading_entries"); 3147 } 3148 3149 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3150 // Emit the offloading entries and metadata so that the device codegen side 3151 // can easily figure out what to emit. The produced metadata looks like 3152 // this: 3153 // 3154 // !omp_offload.info = !{!1, ...} 3155 // 3156 // Right now we only generate metadata for function that contain target 3157 // regions. 3158 3159 // If we are in simd mode or there are no entries, we don't need to do 3160 // anything. 3161 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3162 return; 3163 3164 llvm::Module &M = CGM.getModule(); 3165 llvm::LLVMContext &C = M.getContext(); 3166 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3167 SourceLocation, StringRef>, 3168 16> 3169 OrderedEntries(OffloadEntriesInfoManager.size()); 3170 llvm::SmallVector<StringRef, 16> ParentFunctions( 3171 OffloadEntriesInfoManager.size()); 3172 3173 // Auxiliary methods to create metadata values and strings. 3174 auto &&GetMDInt = [this](unsigned V) { 3175 return llvm::ConstantAsMetadata::get( 3176 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3177 }; 3178 3179 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3180 3181 // Create the offloading info metadata node. 3182 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3183 3184 // Create function that emits metadata for each target region entry; 3185 auto &&TargetRegionMetadataEmitter = 3186 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3187 &GetMDString]( 3188 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3189 unsigned Line, 3190 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3191 // Generate metadata for target regions. Each entry of this metadata 3192 // contains: 3193 // - Entry 0 -> Kind of this type of metadata (0). 3194 // - Entry 1 -> Device ID of the file where the entry was identified. 3195 // - Entry 2 -> File ID of the file where the entry was identified. 3196 // - Entry 3 -> Mangled name of the function where the entry was 3197 // identified. 3198 // - Entry 4 -> Line in the file where the entry was identified. 3199 // - Entry 5 -> Order the entry was created. 3200 // The first element of the metadata node is the kind. 3201 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3202 GetMDInt(FileID), GetMDString(ParentName), 3203 GetMDInt(Line), GetMDInt(E.getOrder())}; 3204 3205 SourceLocation Loc; 3206 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3207 E = CGM.getContext().getSourceManager().fileinfo_end(); 3208 I != E; ++I) { 3209 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3210 I->getFirst()->getUniqueID().getFile() == FileID) { 3211 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3212 I->getFirst(), Line, 1); 3213 break; 3214 } 3215 } 3216 // Save this entry in the right position of the ordered entries array. 3217 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3218 ParentFunctions[E.getOrder()] = ParentName; 3219 3220 // Add metadata to the named metadata node. 3221 MD->addOperand(llvm::MDNode::get(C, Ops)); 3222 }; 3223 3224 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3225 TargetRegionMetadataEmitter); 3226 3227 // Create function that emits metadata for each device global variable entry; 3228 auto &&DeviceGlobalVarMetadataEmitter = 3229 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3230 MD](StringRef MangledName, 3231 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3232 &E) { 3233 // Generate metadata for global variables. Each entry of this metadata 3234 // contains: 3235 // - Entry 0 -> Kind of this type of metadata (1). 3236 // - Entry 1 -> Mangled name of the variable. 3237 // - Entry 2 -> Declare target kind. 3238 // - Entry 3 -> Order the entry was created. 3239 // The first element of the metadata node is the kind. 3240 llvm::Metadata *Ops[] = { 3241 GetMDInt(E.getKind()), GetMDString(MangledName), 3242 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3243 3244 // Save this entry in the right position of the ordered entries array. 3245 OrderedEntries[E.getOrder()] = 3246 std::make_tuple(&E, SourceLocation(), MangledName); 3247 3248 // Add metadata to the named metadata node. 3249 MD->addOperand(llvm::MDNode::get(C, Ops)); 3250 }; 3251 3252 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3253 DeviceGlobalVarMetadataEmitter); 3254 3255 for (const auto &E : OrderedEntries) { 3256 assert(std::get<0>(E) && "All ordered entries must exist!"); 3257 if (const auto *CE = 3258 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3259 std::get<0>(E))) { 3260 if (!CE->getID() || !CE->getAddress()) { 3261 // Do not blame the entry if the parent funtion is not emitted. 3262 StringRef FnName = ParentFunctions[CE->getOrder()]; 3263 if (!CGM.GetGlobalValue(FnName)) 3264 continue; 3265 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3266 DiagnosticsEngine::Error, 3267 "Offloading entry for target region in %0 is incorrect: either the " 3268 "address or the ID is invalid."); 3269 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3270 continue; 3271 } 3272 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3273 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3274 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3275 OffloadEntryInfoDeviceGlobalVar>( 3276 std::get<0>(E))) { 3277 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3278 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3279 CE->getFlags()); 3280 switch (Flags) { 3281 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3282 if (CGM.getLangOpts().OpenMPIsDevice && 3283 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3284 continue; 3285 if (!CE->getAddress()) { 3286 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3287 DiagnosticsEngine::Error, "Offloading entry for declare target " 3288 "variable %0 is incorrect: the " 3289 "address is invalid."); 3290 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3291 continue; 3292 } 3293 // The vaiable has no definition - no need to add the entry. 3294 if (CE->getVarSize().isZero()) 3295 continue; 3296 break; 3297 } 3298 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3299 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3300 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3301 "Declaret target link address is set."); 3302 if (CGM.getLangOpts().OpenMPIsDevice) 3303 continue; 3304 if (!CE->getAddress()) { 3305 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3306 DiagnosticsEngine::Error, 3307 "Offloading entry for declare target variable is incorrect: the " 3308 "address is invalid."); 3309 CGM.getDiags().Report(DiagID); 3310 continue; 3311 } 3312 break; 3313 } 3314 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3315 CE->getVarSize().getQuantity(), Flags, 3316 CE->getLinkage()); 3317 } else { 3318 llvm_unreachable("Unsupported entry kind."); 3319 } 3320 } 3321 } 3322 3323 /// Loads all the offload entries information from the host IR 3324 /// metadata. 3325 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3326 // If we are in target mode, load the metadata from the host IR. This code has 3327 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3328 3329 if (!CGM.getLangOpts().OpenMPIsDevice) 3330 return; 3331 3332 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3333 return; 3334 3335 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3336 if (auto EC = Buf.getError()) { 3337 CGM.getDiags().Report(diag::err_cannot_open_file) 3338 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3339 return; 3340 } 3341 3342 llvm::LLVMContext C; 3343 auto ME = expectedToErrorOrAndEmitErrors( 3344 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3345 3346 if (auto EC = ME.getError()) { 3347 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3348 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3349 CGM.getDiags().Report(DiagID) 3350 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3351 return; 3352 } 3353 3354 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3355 if (!MD) 3356 return; 3357 3358 for (llvm::MDNode *MN : MD->operands()) { 3359 auto &&GetMDInt = [MN](unsigned Idx) { 3360 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3361 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3362 }; 3363 3364 auto &&GetMDString = [MN](unsigned Idx) { 3365 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3366 return V->getString(); 3367 }; 3368 3369 switch (GetMDInt(0)) { 3370 default: 3371 llvm_unreachable("Unexpected metadata!"); 3372 break; 3373 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3374 OffloadingEntryInfoTargetRegion: 3375 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3376 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3377 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3378 /*Order=*/GetMDInt(5)); 3379 break; 3380 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3381 OffloadingEntryInfoDeviceGlobalVar: 3382 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3383 /*MangledName=*/GetMDString(1), 3384 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3385 /*Flags=*/GetMDInt(2)), 3386 /*Order=*/GetMDInt(3)); 3387 break; 3388 } 3389 } 3390 } 3391 3392 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3393 if (!KmpRoutineEntryPtrTy) { 3394 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3395 ASTContext &C = CGM.getContext(); 3396 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3397 FunctionProtoType::ExtProtoInfo EPI; 3398 KmpRoutineEntryPtrQTy = C.getPointerType( 3399 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3400 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3401 } 3402 } 3403 3404 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3405 // Make sure the type of the entry is already created. This is the type we 3406 // have to create: 3407 // struct __tgt_offload_entry{ 3408 // void *addr; // Pointer to the offload entry info. 3409 // // (function or global) 3410 // char *name; // Name of the function or global. 3411 // size_t size; // Size of the entry info (0 if it a function). 3412 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3413 // int32_t reserved; // Reserved, to use by the runtime library. 3414 // }; 3415 if (TgtOffloadEntryQTy.isNull()) { 3416 ASTContext &C = CGM.getContext(); 3417 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3418 RD->startDefinition(); 3419 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3420 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3421 addFieldToRecordDecl(C, RD, C.getSizeType()); 3422 addFieldToRecordDecl( 3423 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3424 addFieldToRecordDecl( 3425 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3426 RD->completeDefinition(); 3427 RD->addAttr(PackedAttr::CreateImplicit(C)); 3428 TgtOffloadEntryQTy = C.getRecordType(RD); 3429 } 3430 return TgtOffloadEntryQTy; 3431 } 3432 3433 namespace { 3434 struct PrivateHelpersTy { 3435 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3436 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3437 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3438 PrivateElemInit(PrivateElemInit) {} 3439 const Expr *OriginalRef = nullptr; 3440 const VarDecl *Original = nullptr; 3441 const VarDecl *PrivateCopy = nullptr; 3442 const VarDecl *PrivateElemInit = nullptr; 3443 }; 3444 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3445 } // anonymous namespace 3446 3447 static RecordDecl * 3448 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3449 if (!Privates.empty()) { 3450 ASTContext &C = CGM.getContext(); 3451 // Build struct .kmp_privates_t. { 3452 // /* private vars */ 3453 // }; 3454 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3455 RD->startDefinition(); 3456 for (const auto &Pair : Privates) { 3457 const VarDecl *VD = Pair.second.Original; 3458 QualType Type = VD->getType().getNonReferenceType(); 3459 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3460 if (VD->hasAttrs()) { 3461 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3462 E(VD->getAttrs().end()); 3463 I != E; ++I) 3464 FD->addAttr(*I); 3465 } 3466 } 3467 RD->completeDefinition(); 3468 return RD; 3469 } 3470 return nullptr; 3471 } 3472 3473 static RecordDecl * 3474 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3475 QualType KmpInt32Ty, 3476 QualType KmpRoutineEntryPointerQTy) { 3477 ASTContext &C = CGM.getContext(); 3478 // Build struct kmp_task_t { 3479 // void * shareds; 3480 // kmp_routine_entry_t routine; 3481 // kmp_int32 part_id; 3482 // kmp_cmplrdata_t data1; 3483 // kmp_cmplrdata_t data2; 3484 // For taskloops additional fields: 3485 // kmp_uint64 lb; 3486 // kmp_uint64 ub; 3487 // kmp_int64 st; 3488 // kmp_int32 liter; 3489 // void * reductions; 3490 // }; 3491 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3492 UD->startDefinition(); 3493 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3494 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3495 UD->completeDefinition(); 3496 QualType KmpCmplrdataTy = C.getRecordType(UD); 3497 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3498 RD->startDefinition(); 3499 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3500 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3501 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3502 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3503 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3504 if (isOpenMPTaskLoopDirective(Kind)) { 3505 QualType KmpUInt64Ty = 3506 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3507 QualType KmpInt64Ty = 3508 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3509 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3510 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3511 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3512 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3513 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3514 } 3515 RD->completeDefinition(); 3516 return RD; 3517 } 3518 3519 static RecordDecl * 3520 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3521 ArrayRef<PrivateDataTy> Privates) { 3522 ASTContext &C = CGM.getContext(); 3523 // Build struct kmp_task_t_with_privates { 3524 // kmp_task_t task_data; 3525 // .kmp_privates_t. privates; 3526 // }; 3527 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3528 RD->startDefinition(); 3529 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3530 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3531 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3532 RD->completeDefinition(); 3533 return RD; 3534 } 3535 3536 /// Emit a proxy function which accepts kmp_task_t as the second 3537 /// argument. 3538 /// \code 3539 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3540 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3541 /// For taskloops: 3542 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3543 /// tt->reductions, tt->shareds); 3544 /// return 0; 3545 /// } 3546 /// \endcode 3547 static llvm::Function * 3548 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3549 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3550 QualType KmpTaskTWithPrivatesPtrQTy, 3551 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3552 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3553 llvm::Value *TaskPrivatesMap) { 3554 ASTContext &C = CGM.getContext(); 3555 FunctionArgList Args; 3556 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3557 ImplicitParamDecl::Other); 3558 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3559 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3560 ImplicitParamDecl::Other); 3561 Args.push_back(&GtidArg); 3562 Args.push_back(&TaskTypeArg); 3563 const auto &TaskEntryFnInfo = 3564 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3565 llvm::FunctionType *TaskEntryTy = 3566 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3567 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3568 auto *TaskEntry = llvm::Function::Create( 3569 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3570 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3571 TaskEntry->setDoesNotRecurse(); 3572 CodeGenFunction CGF(CGM); 3573 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3574 Loc, Loc); 3575 3576 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3577 // tt, 3578 // For taskloops: 3579 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3580 // tt->task_data.shareds); 3581 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3582 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3583 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3584 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3585 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3586 const auto *KmpTaskTWithPrivatesQTyRD = 3587 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3588 LValue Base = 3589 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3590 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3591 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3592 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3593 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3594 3595 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3596 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3597 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3598 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3599 CGF.ConvertTypeForMem(SharedsPtrTy)); 3600 3601 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3602 llvm::Value *PrivatesParam; 3603 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3604 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3605 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3606 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3607 } else { 3608 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3609 } 3610 3611 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3612 TaskPrivatesMap, 3613 CGF.Builder 3614 .CreatePointerBitCastOrAddrSpaceCast( 3615 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3616 .getPointer()}; 3617 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3618 std::end(CommonArgs)); 3619 if (isOpenMPTaskLoopDirective(Kind)) { 3620 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3621 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3622 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3623 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3624 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3625 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3626 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3627 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3628 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3629 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3630 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3631 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3632 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3633 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3634 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3635 CallArgs.push_back(LBParam); 3636 CallArgs.push_back(UBParam); 3637 CallArgs.push_back(StParam); 3638 CallArgs.push_back(LIParam); 3639 CallArgs.push_back(RParam); 3640 } 3641 CallArgs.push_back(SharedsParam); 3642 3643 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3644 CallArgs); 3645 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3646 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3647 CGF.FinishFunction(); 3648 return TaskEntry; 3649 } 3650 3651 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3652 SourceLocation Loc, 3653 QualType KmpInt32Ty, 3654 QualType KmpTaskTWithPrivatesPtrQTy, 3655 QualType KmpTaskTWithPrivatesQTy) { 3656 ASTContext &C = CGM.getContext(); 3657 FunctionArgList Args; 3658 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3659 ImplicitParamDecl::Other); 3660 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3661 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3662 ImplicitParamDecl::Other); 3663 Args.push_back(&GtidArg); 3664 Args.push_back(&TaskTypeArg); 3665 const auto &DestructorFnInfo = 3666 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3667 llvm::FunctionType *DestructorFnTy = 3668 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3669 std::string Name = 3670 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3671 auto *DestructorFn = 3672 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3673 Name, &CGM.getModule()); 3674 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3675 DestructorFnInfo); 3676 DestructorFn->setDoesNotRecurse(); 3677 CodeGenFunction CGF(CGM); 3678 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3679 Args, Loc, Loc); 3680 3681 LValue Base = CGF.EmitLoadOfPointerLValue( 3682 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3683 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3684 const auto *KmpTaskTWithPrivatesQTyRD = 3685 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3686 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3687 Base = CGF.EmitLValueForField(Base, *FI); 3688 for (const auto *Field : 3689 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3690 if (QualType::DestructionKind DtorKind = 3691 Field->getType().isDestructedType()) { 3692 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3693 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3694 } 3695 } 3696 CGF.FinishFunction(); 3697 return DestructorFn; 3698 } 3699 3700 /// Emit a privates mapping function for correct handling of private and 3701 /// firstprivate variables. 3702 /// \code 3703 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3704 /// **noalias priv1,..., <tyn> **noalias privn) { 3705 /// *priv1 = &.privates.priv1; 3706 /// ...; 3707 /// *privn = &.privates.privn; 3708 /// } 3709 /// \endcode 3710 static llvm::Value * 3711 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3712 ArrayRef<const Expr *> PrivateVars, 3713 ArrayRef<const Expr *> FirstprivateVars, 3714 ArrayRef<const Expr *> LastprivateVars, 3715 QualType PrivatesQTy, 3716 ArrayRef<PrivateDataTy> Privates) { 3717 ASTContext &C = CGM.getContext(); 3718 FunctionArgList Args; 3719 ImplicitParamDecl TaskPrivatesArg( 3720 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3721 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3722 ImplicitParamDecl::Other); 3723 Args.push_back(&TaskPrivatesArg); 3724 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3725 unsigned Counter = 1; 3726 for (const Expr *E : PrivateVars) { 3727 Args.push_back(ImplicitParamDecl::Create( 3728 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3729 C.getPointerType(C.getPointerType(E->getType())) 3730 .withConst() 3731 .withRestrict(), 3732 ImplicitParamDecl::Other)); 3733 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3734 PrivateVarsPos[VD] = Counter; 3735 ++Counter; 3736 } 3737 for (const Expr *E : FirstprivateVars) { 3738 Args.push_back(ImplicitParamDecl::Create( 3739 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3740 C.getPointerType(C.getPointerType(E->getType())) 3741 .withConst() 3742 .withRestrict(), 3743 ImplicitParamDecl::Other)); 3744 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3745 PrivateVarsPos[VD] = Counter; 3746 ++Counter; 3747 } 3748 for (const Expr *E : LastprivateVars) { 3749 Args.push_back(ImplicitParamDecl::Create( 3750 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3751 C.getPointerType(C.getPointerType(E->getType())) 3752 .withConst() 3753 .withRestrict(), 3754 ImplicitParamDecl::Other)); 3755 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3756 PrivateVarsPos[VD] = Counter; 3757 ++Counter; 3758 } 3759 const auto &TaskPrivatesMapFnInfo = 3760 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3761 llvm::FunctionType *TaskPrivatesMapTy = 3762 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3763 std::string Name = 3764 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3765 auto *TaskPrivatesMap = llvm::Function::Create( 3766 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3767 &CGM.getModule()); 3768 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3769 TaskPrivatesMapFnInfo); 3770 if (CGM.getLangOpts().Optimize) { 3771 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3772 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3773 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3774 } 3775 CodeGenFunction CGF(CGM); 3776 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3777 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3778 3779 // *privi = &.privates.privi; 3780 LValue Base = CGF.EmitLoadOfPointerLValue( 3781 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3782 TaskPrivatesArg.getType()->castAs<PointerType>()); 3783 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3784 Counter = 0; 3785 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3786 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3787 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3788 LValue RefLVal = 3789 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3790 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3791 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3792 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3793 ++Counter; 3794 } 3795 CGF.FinishFunction(); 3796 return TaskPrivatesMap; 3797 } 3798 3799 /// Emit initialization for private variables in task-based directives. 3800 static void emitPrivatesInit(CodeGenFunction &CGF, 3801 const OMPExecutableDirective &D, 3802 Address KmpTaskSharedsPtr, LValue TDBase, 3803 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3804 QualType SharedsTy, QualType SharedsPtrTy, 3805 const OMPTaskDataTy &Data, 3806 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3807 ASTContext &C = CGF.getContext(); 3808 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3809 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3810 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3811 ? OMPD_taskloop 3812 : OMPD_task; 3813 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3814 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3815 LValue SrcBase; 3816 bool IsTargetTask = 3817 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3818 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3819 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3820 // PointersArray and SizesArray. The original variables for these arrays are 3821 // not captured and we get their addresses explicitly. 3822 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3823 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3824 SrcBase = CGF.MakeAddrLValue( 3825 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3826 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3827 SharedsTy); 3828 } 3829 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3830 for (const PrivateDataTy &Pair : Privates) { 3831 const VarDecl *VD = Pair.second.PrivateCopy; 3832 const Expr *Init = VD->getAnyInitializer(); 3833 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3834 !CGF.isTrivialInitializer(Init)))) { 3835 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3836 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3837 const VarDecl *OriginalVD = Pair.second.Original; 3838 // Check if the variable is the target-based BasePointersArray, 3839 // PointersArray or SizesArray. 3840 LValue SharedRefLValue; 3841 QualType Type = PrivateLValue.getType(); 3842 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3843 if (IsTargetTask && !SharedField) { 3844 assert(isa<ImplicitParamDecl>(OriginalVD) && 3845 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3846 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3847 ->getNumParams() == 0 && 3848 isa<TranslationUnitDecl>( 3849 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3850 ->getDeclContext()) && 3851 "Expected artificial target data variable."); 3852 SharedRefLValue = 3853 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3854 } else if (ForDup) { 3855 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3856 SharedRefLValue = CGF.MakeAddrLValue( 3857 Address(SharedRefLValue.getPointer(CGF), 3858 C.getDeclAlign(OriginalVD)), 3859 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3860 SharedRefLValue.getTBAAInfo()); 3861 } else if (CGF.LambdaCaptureFields.count( 3862 Pair.second.Original->getCanonicalDecl()) > 0 || 3863 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3864 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3865 } else { 3866 // Processing for implicitly captured variables. 3867 InlinedOpenMPRegionRAII Region( 3868 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3869 /*HasCancel=*/false); 3870 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3871 } 3872 if (Type->isArrayType()) { 3873 // Initialize firstprivate array. 3874 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3875 // Perform simple memcpy. 3876 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3877 } else { 3878 // Initialize firstprivate array using element-by-element 3879 // initialization. 3880 CGF.EmitOMPAggregateAssign( 3881 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3882 Type, 3883 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3884 Address SrcElement) { 3885 // Clean up any temporaries needed by the initialization. 3886 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3887 InitScope.addPrivate( 3888 Elem, [SrcElement]() -> Address { return SrcElement; }); 3889 (void)InitScope.Privatize(); 3890 // Emit initialization for single element. 3891 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3892 CGF, &CapturesInfo); 3893 CGF.EmitAnyExprToMem(Init, DestElement, 3894 Init->getType().getQualifiers(), 3895 /*IsInitializer=*/false); 3896 }); 3897 } 3898 } else { 3899 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3900 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3901 return SharedRefLValue.getAddress(CGF); 3902 }); 3903 (void)InitScope.Privatize(); 3904 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3905 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3906 /*capturedByInit=*/false); 3907 } 3908 } else { 3909 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3910 } 3911 } 3912 ++FI; 3913 } 3914 } 3915 3916 /// Check if duplication function is required for taskloops. 3917 static bool checkInitIsRequired(CodeGenFunction &CGF, 3918 ArrayRef<PrivateDataTy> Privates) { 3919 bool InitRequired = false; 3920 for (const PrivateDataTy &Pair : Privates) { 3921 const VarDecl *VD = Pair.second.PrivateCopy; 3922 const Expr *Init = VD->getAnyInitializer(); 3923 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3924 !CGF.isTrivialInitializer(Init)); 3925 if (InitRequired) 3926 break; 3927 } 3928 return InitRequired; 3929 } 3930 3931 3932 /// Emit task_dup function (for initialization of 3933 /// private/firstprivate/lastprivate vars and last_iter flag) 3934 /// \code 3935 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3936 /// lastpriv) { 3937 /// // setup lastprivate flag 3938 /// task_dst->last = lastpriv; 3939 /// // could be constructor calls here... 3940 /// } 3941 /// \endcode 3942 static llvm::Value * 3943 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3944 const OMPExecutableDirective &D, 3945 QualType KmpTaskTWithPrivatesPtrQTy, 3946 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3947 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3948 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3949 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3950 ASTContext &C = CGM.getContext(); 3951 FunctionArgList Args; 3952 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3953 KmpTaskTWithPrivatesPtrQTy, 3954 ImplicitParamDecl::Other); 3955 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3956 KmpTaskTWithPrivatesPtrQTy, 3957 ImplicitParamDecl::Other); 3958 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3959 ImplicitParamDecl::Other); 3960 Args.push_back(&DstArg); 3961 Args.push_back(&SrcArg); 3962 Args.push_back(&LastprivArg); 3963 const auto &TaskDupFnInfo = 3964 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3965 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3966 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3967 auto *TaskDup = llvm::Function::Create( 3968 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3969 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3970 TaskDup->setDoesNotRecurse(); 3971 CodeGenFunction CGF(CGM); 3972 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3973 Loc); 3974 3975 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3976 CGF.GetAddrOfLocalVar(&DstArg), 3977 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3978 // task_dst->liter = lastpriv; 3979 if (WithLastIter) { 3980 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3981 LValue Base = CGF.EmitLValueForField( 3982 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3983 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3984 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3985 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3986 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3987 } 3988 3989 // Emit initial values for private copies (if any). 3990 assert(!Privates.empty()); 3991 Address KmpTaskSharedsPtr = Address::invalid(); 3992 if (!Data.FirstprivateVars.empty()) { 3993 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3994 CGF.GetAddrOfLocalVar(&SrcArg), 3995 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3996 LValue Base = CGF.EmitLValueForField( 3997 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3998 KmpTaskSharedsPtr = Address( 3999 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4000 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4001 KmpTaskTShareds)), 4002 Loc), 4003 CGM.getNaturalTypeAlignment(SharedsTy)); 4004 } 4005 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4006 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4007 CGF.FinishFunction(); 4008 return TaskDup; 4009 } 4010 4011 /// Checks if destructor function is required to be generated. 4012 /// \return true if cleanups are required, false otherwise. 4013 static bool 4014 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4015 bool NeedsCleanup = false; 4016 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4017 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4018 for (const FieldDecl *FD : PrivateRD->fields()) { 4019 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4020 if (NeedsCleanup) 4021 break; 4022 } 4023 return NeedsCleanup; 4024 } 4025 4026 namespace { 4027 /// Loop generator for OpenMP iterator expression. 4028 class OMPIteratorGeneratorScope final 4029 : public CodeGenFunction::OMPPrivateScope { 4030 CodeGenFunction &CGF; 4031 const OMPIteratorExpr *E = nullptr; 4032 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4033 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4034 OMPIteratorGeneratorScope() = delete; 4035 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4036 4037 public: 4038 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4039 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4040 if (!E) 4041 return; 4042 SmallVector<llvm::Value *, 4> Uppers; 4043 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4044 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4045 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4046 addPrivate(VD, [&CGF, VD]() { 4047 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4048 }); 4049 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4050 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4051 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4052 "counter.addr"); 4053 }); 4054 } 4055 Privatize(); 4056 4057 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4058 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4059 LValue CLVal = 4060 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4061 HelperData.CounterVD->getType()); 4062 // Counter = 0; 4063 CGF.EmitStoreOfScalar( 4064 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4065 CLVal); 4066 CodeGenFunction::JumpDest &ContDest = 4067 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4068 CodeGenFunction::JumpDest &ExitDest = 4069 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4070 // N = <number-of_iterations>; 4071 llvm::Value *N = Uppers[I]; 4072 // cont: 4073 // if (Counter < N) goto body; else goto exit; 4074 CGF.EmitBlock(ContDest.getBlock()); 4075 auto *CVal = 4076 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4077 llvm::Value *Cmp = 4078 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4079 ? CGF.Builder.CreateICmpSLT(CVal, N) 4080 : CGF.Builder.CreateICmpULT(CVal, N); 4081 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4082 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4083 // body: 4084 CGF.EmitBlock(BodyBB); 4085 // Iteri = Begini + Counter * Stepi; 4086 CGF.EmitIgnoredExpr(HelperData.Update); 4087 } 4088 } 4089 ~OMPIteratorGeneratorScope() { 4090 if (!E) 4091 return; 4092 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4093 // Counter = Counter + 1; 4094 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4095 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4096 // goto cont; 4097 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4098 // exit: 4099 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4100 } 4101 } 4102 }; 4103 } // namespace 4104 4105 static std::pair<llvm::Value *, llvm::Value *> 4106 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4107 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4108 llvm::Value *Addr; 4109 if (OASE) { 4110 const Expr *Base = OASE->getBase(); 4111 Addr = CGF.EmitScalarExpr(Base); 4112 } else { 4113 Addr = CGF.EmitLValue(E).getPointer(CGF); 4114 } 4115 llvm::Value *SizeVal; 4116 QualType Ty = E->getType(); 4117 if (OASE) { 4118 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4119 for (const Expr *SE : OASE->getDimensions()) { 4120 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4121 Sz = CGF.EmitScalarConversion( 4122 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4123 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4124 } 4125 } else if (const auto *ASE = 4126 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4127 LValue UpAddrLVal = 4128 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4129 llvm::Value *UpAddr = 4130 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4131 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4132 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4133 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4134 } else { 4135 SizeVal = CGF.getTypeSize(Ty); 4136 } 4137 return std::make_pair(Addr, SizeVal); 4138 } 4139 4140 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4141 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4142 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4143 if (KmpTaskAffinityInfoTy.isNull()) { 4144 RecordDecl *KmpAffinityInfoRD = 4145 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4146 KmpAffinityInfoRD->startDefinition(); 4147 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4148 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4149 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4150 KmpAffinityInfoRD->completeDefinition(); 4151 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4152 } 4153 } 4154 4155 CGOpenMPRuntime::TaskResultTy 4156 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4157 const OMPExecutableDirective &D, 4158 llvm::Function *TaskFunction, QualType SharedsTy, 4159 Address Shareds, const OMPTaskDataTy &Data) { 4160 ASTContext &C = CGM.getContext(); 4161 llvm::SmallVector<PrivateDataTy, 4> Privates; 4162 // Aggregate privates and sort them by the alignment. 4163 const auto *I = Data.PrivateCopies.begin(); 4164 for (const Expr *E : Data.PrivateVars) { 4165 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4166 Privates.emplace_back( 4167 C.getDeclAlign(VD), 4168 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4169 /*PrivateElemInit=*/nullptr)); 4170 ++I; 4171 } 4172 I = Data.FirstprivateCopies.begin(); 4173 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4174 for (const Expr *E : Data.FirstprivateVars) { 4175 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4176 Privates.emplace_back( 4177 C.getDeclAlign(VD), 4178 PrivateHelpersTy( 4179 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4180 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4181 ++I; 4182 ++IElemInitRef; 4183 } 4184 I = Data.LastprivateCopies.begin(); 4185 for (const Expr *E : Data.LastprivateVars) { 4186 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4187 Privates.emplace_back( 4188 C.getDeclAlign(VD), 4189 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4190 /*PrivateElemInit=*/nullptr)); 4191 ++I; 4192 } 4193 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4194 return L.first > R.first; 4195 }); 4196 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4197 // Build type kmp_routine_entry_t (if not built yet). 4198 emitKmpRoutineEntryT(KmpInt32Ty); 4199 // Build type kmp_task_t (if not built yet). 4200 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4201 if (SavedKmpTaskloopTQTy.isNull()) { 4202 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4203 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4204 } 4205 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4206 } else { 4207 assert((D.getDirectiveKind() == OMPD_task || 4208 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4209 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4210 "Expected taskloop, task or target directive"); 4211 if (SavedKmpTaskTQTy.isNull()) { 4212 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4213 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4214 } 4215 KmpTaskTQTy = SavedKmpTaskTQTy; 4216 } 4217 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4218 // Build particular struct kmp_task_t for the given task. 4219 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4220 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4221 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4222 QualType KmpTaskTWithPrivatesPtrQTy = 4223 C.getPointerType(KmpTaskTWithPrivatesQTy); 4224 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4225 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4226 KmpTaskTWithPrivatesTy->getPointerTo(); 4227 llvm::Value *KmpTaskTWithPrivatesTySize = 4228 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4229 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4230 4231 // Emit initial values for private copies (if any). 4232 llvm::Value *TaskPrivatesMap = nullptr; 4233 llvm::Type *TaskPrivatesMapTy = 4234 std::next(TaskFunction->arg_begin(), 3)->getType(); 4235 if (!Privates.empty()) { 4236 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4237 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4238 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4239 FI->getType(), Privates); 4240 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4241 TaskPrivatesMap, TaskPrivatesMapTy); 4242 } else { 4243 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4244 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4245 } 4246 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4247 // kmp_task_t *tt); 4248 llvm::Function *TaskEntry = emitProxyTaskFunction( 4249 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4250 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4251 TaskPrivatesMap); 4252 4253 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4254 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4255 // kmp_routine_entry_t *task_entry); 4256 // Task flags. Format is taken from 4257 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4258 // description of kmp_tasking_flags struct. 4259 enum { 4260 TiedFlag = 0x1, 4261 FinalFlag = 0x2, 4262 DestructorsFlag = 0x8, 4263 PriorityFlag = 0x20, 4264 DetachableFlag = 0x40, 4265 }; 4266 unsigned Flags = Data.Tied ? TiedFlag : 0; 4267 bool NeedsCleanup = false; 4268 if (!Privates.empty()) { 4269 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4270 if (NeedsCleanup) 4271 Flags = Flags | DestructorsFlag; 4272 } 4273 if (Data.Priority.getInt()) 4274 Flags = Flags | PriorityFlag; 4275 if (D.hasClausesOfKind<OMPDetachClause>()) 4276 Flags = Flags | DetachableFlag; 4277 llvm::Value *TaskFlags = 4278 Data.Final.getPointer() 4279 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4280 CGF.Builder.getInt32(FinalFlag), 4281 CGF.Builder.getInt32(/*C=*/0)) 4282 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4283 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4284 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4285 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4286 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4287 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4288 TaskEntry, KmpRoutineEntryPtrTy)}; 4289 llvm::Value *NewTask; 4290 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4291 // Check if we have any device clause associated with the directive. 4292 const Expr *Device = nullptr; 4293 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4294 Device = C->getDevice(); 4295 // Emit device ID if any otherwise use default value. 4296 llvm::Value *DeviceID; 4297 if (Device) 4298 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4299 CGF.Int64Ty, /*isSigned=*/true); 4300 else 4301 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4302 AllocArgs.push_back(DeviceID); 4303 NewTask = CGF.EmitRuntimeCall( 4304 OMPBuilder.getOrCreateRuntimeFunction( 4305 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4306 AllocArgs); 4307 } else { 4308 NewTask = 4309 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4310 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4311 AllocArgs); 4312 } 4313 // Emit detach clause initialization. 4314 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4315 // task_descriptor); 4316 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4317 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4318 LValue EvtLVal = CGF.EmitLValue(Evt); 4319 4320 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4321 // int gtid, kmp_task_t *task); 4322 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4323 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4324 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4325 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4326 OMPBuilder.getOrCreateRuntimeFunction( 4327 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4328 {Loc, Tid, NewTask}); 4329 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4330 Evt->getExprLoc()); 4331 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4332 } 4333 // Process affinity clauses. 4334 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4335 // Process list of affinity data. 4336 ASTContext &C = CGM.getContext(); 4337 Address AffinitiesArray = Address::invalid(); 4338 // Calculate number of elements to form the array of affinity data. 4339 llvm::Value *NumOfElements = nullptr; 4340 unsigned NumAffinities = 0; 4341 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4342 if (const Expr *Modifier = C->getModifier()) { 4343 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4344 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4345 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4346 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4347 NumOfElements = 4348 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4349 } 4350 } else { 4351 NumAffinities += C->varlist_size(); 4352 } 4353 } 4354 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4355 // Fields ids in kmp_task_affinity_info record. 4356 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4357 4358 QualType KmpTaskAffinityInfoArrayTy; 4359 if (NumOfElements) { 4360 NumOfElements = CGF.Builder.CreateNUWAdd( 4361 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4362 OpaqueValueExpr OVE( 4363 Loc, 4364 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4365 VK_RValue); 4366 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4367 RValue::get(NumOfElements)); 4368 KmpTaskAffinityInfoArrayTy = 4369 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4370 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4371 // Properly emit variable-sized array. 4372 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4373 ImplicitParamDecl::Other); 4374 CGF.EmitVarDecl(*PD); 4375 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4376 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4377 /*isSigned=*/false); 4378 } else { 4379 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4380 KmpTaskAffinityInfoTy, 4381 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4382 ArrayType::Normal, /*IndexTypeQuals=*/0); 4383 AffinitiesArray = 4384 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4385 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4386 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4387 /*isSigned=*/false); 4388 } 4389 4390 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4391 // Fill array by elements without iterators. 4392 unsigned Pos = 0; 4393 bool HasIterator = false; 4394 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4395 if (C->getModifier()) { 4396 HasIterator = true; 4397 continue; 4398 } 4399 for (const Expr *E : C->varlists()) { 4400 llvm::Value *Addr; 4401 llvm::Value *Size; 4402 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4403 LValue Base = 4404 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4405 KmpTaskAffinityInfoTy); 4406 // affs[i].base_addr = &<Affinities[i].second>; 4407 LValue BaseAddrLVal = CGF.EmitLValueForField( 4408 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4409 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4410 BaseAddrLVal); 4411 // affs[i].len = sizeof(<Affinities[i].second>); 4412 LValue LenLVal = CGF.EmitLValueForField( 4413 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4414 CGF.EmitStoreOfScalar(Size, LenLVal); 4415 ++Pos; 4416 } 4417 } 4418 LValue PosLVal; 4419 if (HasIterator) { 4420 PosLVal = CGF.MakeAddrLValue( 4421 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4422 C.getSizeType()); 4423 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4424 } 4425 // Process elements with iterators. 4426 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4427 const Expr *Modifier = C->getModifier(); 4428 if (!Modifier) 4429 continue; 4430 OMPIteratorGeneratorScope IteratorScope( 4431 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4432 for (const Expr *E : C->varlists()) { 4433 llvm::Value *Addr; 4434 llvm::Value *Size; 4435 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4436 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4437 LValue Base = CGF.MakeAddrLValue( 4438 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4439 AffinitiesArray.getAlignment()), 4440 KmpTaskAffinityInfoTy); 4441 // affs[i].base_addr = &<Affinities[i].second>; 4442 LValue BaseAddrLVal = CGF.EmitLValueForField( 4443 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4444 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4445 BaseAddrLVal); 4446 // affs[i].len = sizeof(<Affinities[i].second>); 4447 LValue LenLVal = CGF.EmitLValueForField( 4448 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4449 CGF.EmitStoreOfScalar(Size, LenLVal); 4450 Idx = CGF.Builder.CreateNUWAdd( 4451 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4452 CGF.EmitStoreOfScalar(Idx, PosLVal); 4453 } 4454 } 4455 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4456 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4457 // naffins, kmp_task_affinity_info_t *affin_list); 4458 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4459 llvm::Value *GTid = getThreadID(CGF, Loc); 4460 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4461 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4462 // FIXME: Emit the function and ignore its result for now unless the 4463 // runtime function is properly implemented. 4464 (void)CGF.EmitRuntimeCall( 4465 OMPBuilder.getOrCreateRuntimeFunction( 4466 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4467 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4468 } 4469 llvm::Value *NewTaskNewTaskTTy = 4470 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4471 NewTask, KmpTaskTWithPrivatesPtrTy); 4472 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4473 KmpTaskTWithPrivatesQTy); 4474 LValue TDBase = 4475 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4476 // Fill the data in the resulting kmp_task_t record. 4477 // Copy shareds if there are any. 4478 Address KmpTaskSharedsPtr = Address::invalid(); 4479 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4480 KmpTaskSharedsPtr = 4481 Address(CGF.EmitLoadOfScalar( 4482 CGF.EmitLValueForField( 4483 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4484 KmpTaskTShareds)), 4485 Loc), 4486 CGM.getNaturalTypeAlignment(SharedsTy)); 4487 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4488 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4489 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4490 } 4491 // Emit initial values for private copies (if any). 4492 TaskResultTy Result; 4493 if (!Privates.empty()) { 4494 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4495 SharedsTy, SharedsPtrTy, Data, Privates, 4496 /*ForDup=*/false); 4497 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4498 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4499 Result.TaskDupFn = emitTaskDupFunction( 4500 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4501 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4502 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4503 } 4504 } 4505 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4506 enum { Priority = 0, Destructors = 1 }; 4507 // Provide pointer to function with destructors for privates. 4508 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4509 const RecordDecl *KmpCmplrdataUD = 4510 (*FI)->getType()->getAsUnionType()->getDecl(); 4511 if (NeedsCleanup) { 4512 llvm::Value *DestructorFn = emitDestructorsFunction( 4513 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4514 KmpTaskTWithPrivatesQTy); 4515 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4516 LValue DestructorsLV = CGF.EmitLValueForField( 4517 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4518 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4519 DestructorFn, KmpRoutineEntryPtrTy), 4520 DestructorsLV); 4521 } 4522 // Set priority. 4523 if (Data.Priority.getInt()) { 4524 LValue Data2LV = CGF.EmitLValueForField( 4525 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4526 LValue PriorityLV = CGF.EmitLValueForField( 4527 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4528 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4529 } 4530 Result.NewTask = NewTask; 4531 Result.TaskEntry = TaskEntry; 4532 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4533 Result.TDBase = TDBase; 4534 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4535 return Result; 4536 } 4537 4538 namespace { 4539 /// Dependence kind for RTL. 4540 enum RTLDependenceKindTy { 4541 DepIn = 0x01, 4542 DepInOut = 0x3, 4543 DepMutexInOutSet = 0x4 4544 }; 4545 /// Fields ids in kmp_depend_info record. 4546 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4547 } // namespace 4548 4549 /// Translates internal dependency kind into the runtime kind. 4550 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4551 RTLDependenceKindTy DepKind; 4552 switch (K) { 4553 case OMPC_DEPEND_in: 4554 DepKind = DepIn; 4555 break; 4556 // Out and InOut dependencies must use the same code. 4557 case OMPC_DEPEND_out: 4558 case OMPC_DEPEND_inout: 4559 DepKind = DepInOut; 4560 break; 4561 case OMPC_DEPEND_mutexinoutset: 4562 DepKind = DepMutexInOutSet; 4563 break; 4564 case OMPC_DEPEND_source: 4565 case OMPC_DEPEND_sink: 4566 case OMPC_DEPEND_depobj: 4567 case OMPC_DEPEND_unknown: 4568 llvm_unreachable("Unknown task dependence type"); 4569 } 4570 return DepKind; 4571 } 4572 4573 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4574 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4575 QualType &FlagsTy) { 4576 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4577 if (KmpDependInfoTy.isNull()) { 4578 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4579 KmpDependInfoRD->startDefinition(); 4580 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4581 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4582 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4583 KmpDependInfoRD->completeDefinition(); 4584 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4585 } 4586 } 4587 4588 std::pair<llvm::Value *, LValue> 4589 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4590 SourceLocation Loc) { 4591 ASTContext &C = CGM.getContext(); 4592 QualType FlagsTy; 4593 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4594 RecordDecl *KmpDependInfoRD = 4595 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4596 LValue Base = CGF.EmitLoadOfPointerLValue( 4597 DepobjLVal.getAddress(CGF), 4598 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4599 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4600 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4601 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4602 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4603 Base.getTBAAInfo()); 4604 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4605 Addr.getPointer(), 4606 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4607 LValue NumDepsBase = CGF.MakeAddrLValue( 4608 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4609 Base.getBaseInfo(), Base.getTBAAInfo()); 4610 // NumDeps = deps[i].base_addr; 4611 LValue BaseAddrLVal = CGF.EmitLValueForField( 4612 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4613 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4614 return std::make_pair(NumDeps, Base); 4615 } 4616 4617 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4618 llvm::PointerUnion<unsigned *, LValue *> Pos, 4619 const OMPTaskDataTy::DependData &Data, 4620 Address DependenciesArray) { 4621 CodeGenModule &CGM = CGF.CGM; 4622 ASTContext &C = CGM.getContext(); 4623 QualType FlagsTy; 4624 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4625 RecordDecl *KmpDependInfoRD = 4626 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4627 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4628 4629 OMPIteratorGeneratorScope IteratorScope( 4630 CGF, cast_or_null<OMPIteratorExpr>( 4631 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4632 : nullptr)); 4633 for (const Expr *E : Data.DepExprs) { 4634 llvm::Value *Addr; 4635 llvm::Value *Size; 4636 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4637 LValue Base; 4638 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4639 Base = CGF.MakeAddrLValue( 4640 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4641 } else { 4642 LValue &PosLVal = *Pos.get<LValue *>(); 4643 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4644 Base = CGF.MakeAddrLValue( 4645 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4646 DependenciesArray.getAlignment()), 4647 KmpDependInfoTy); 4648 } 4649 // deps[i].base_addr = &<Dependencies[i].second>; 4650 LValue BaseAddrLVal = CGF.EmitLValueForField( 4651 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4652 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4653 BaseAddrLVal); 4654 // deps[i].len = sizeof(<Dependencies[i].second>); 4655 LValue LenLVal = CGF.EmitLValueForField( 4656 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4657 CGF.EmitStoreOfScalar(Size, LenLVal); 4658 // deps[i].flags = <Dependencies[i].first>; 4659 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4660 LValue FlagsLVal = CGF.EmitLValueForField( 4661 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4662 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4663 FlagsLVal); 4664 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4665 ++(*P); 4666 } else { 4667 LValue &PosLVal = *Pos.get<LValue *>(); 4668 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4669 Idx = CGF.Builder.CreateNUWAdd(Idx, 4670 llvm::ConstantInt::get(Idx->getType(), 1)); 4671 CGF.EmitStoreOfScalar(Idx, PosLVal); 4672 } 4673 } 4674 } 4675 4676 static SmallVector<llvm::Value *, 4> 4677 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4678 const OMPTaskDataTy::DependData &Data) { 4679 assert(Data.DepKind == OMPC_DEPEND_depobj && 4680 "Expected depobj dependecy kind."); 4681 SmallVector<llvm::Value *, 4> Sizes; 4682 SmallVector<LValue, 4> SizeLVals; 4683 ASTContext &C = CGF.getContext(); 4684 QualType FlagsTy; 4685 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4686 RecordDecl *KmpDependInfoRD = 4687 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4688 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4689 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4690 { 4691 OMPIteratorGeneratorScope IteratorScope( 4692 CGF, cast_or_null<OMPIteratorExpr>( 4693 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4694 : nullptr)); 4695 for (const Expr *E : Data.DepExprs) { 4696 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4697 LValue Base = CGF.EmitLoadOfPointerLValue( 4698 DepobjLVal.getAddress(CGF), 4699 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4700 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4701 Base.getAddress(CGF), KmpDependInfoPtrT); 4702 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4703 Base.getTBAAInfo()); 4704 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4705 Addr.getPointer(), 4706 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4707 LValue NumDepsBase = CGF.MakeAddrLValue( 4708 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4709 Base.getBaseInfo(), Base.getTBAAInfo()); 4710 // NumDeps = deps[i].base_addr; 4711 LValue BaseAddrLVal = CGF.EmitLValueForField( 4712 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4713 llvm::Value *NumDeps = 4714 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4715 LValue NumLVal = CGF.MakeAddrLValue( 4716 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4717 C.getUIntPtrType()); 4718 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4719 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4720 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4721 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4722 CGF.EmitStoreOfScalar(Add, NumLVal); 4723 SizeLVals.push_back(NumLVal); 4724 } 4725 } 4726 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4727 llvm::Value *Size = 4728 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4729 Sizes.push_back(Size); 4730 } 4731 return Sizes; 4732 } 4733 4734 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4735 LValue PosLVal, 4736 const OMPTaskDataTy::DependData &Data, 4737 Address DependenciesArray) { 4738 assert(Data.DepKind == OMPC_DEPEND_depobj && 4739 "Expected depobj dependecy kind."); 4740 ASTContext &C = CGF.getContext(); 4741 QualType FlagsTy; 4742 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4743 RecordDecl *KmpDependInfoRD = 4744 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4745 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4746 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4747 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4748 { 4749 OMPIteratorGeneratorScope IteratorScope( 4750 CGF, cast_or_null<OMPIteratorExpr>( 4751 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4752 : nullptr)); 4753 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4754 const Expr *E = Data.DepExprs[I]; 4755 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4756 LValue Base = CGF.EmitLoadOfPointerLValue( 4757 DepobjLVal.getAddress(CGF), 4758 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4759 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4760 Base.getAddress(CGF), KmpDependInfoPtrT); 4761 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4762 Base.getTBAAInfo()); 4763 4764 // Get number of elements in a single depobj. 4765 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4766 Addr.getPointer(), 4767 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4768 LValue NumDepsBase = CGF.MakeAddrLValue( 4769 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4770 Base.getBaseInfo(), Base.getTBAAInfo()); 4771 // NumDeps = deps[i].base_addr; 4772 LValue BaseAddrLVal = CGF.EmitLValueForField( 4773 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4774 llvm::Value *NumDeps = 4775 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4776 4777 // memcopy dependency data. 4778 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4779 ElSize, 4780 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4781 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4782 Address DepAddr = 4783 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4784 DependenciesArray.getAlignment()); 4785 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4786 4787 // Increase pos. 4788 // pos += size; 4789 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4790 CGF.EmitStoreOfScalar(Add, PosLVal); 4791 } 4792 } 4793 } 4794 4795 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4796 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4797 SourceLocation Loc) { 4798 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4799 return D.DepExprs.empty(); 4800 })) 4801 return std::make_pair(nullptr, Address::invalid()); 4802 // Process list of dependencies. 4803 ASTContext &C = CGM.getContext(); 4804 Address DependenciesArray = Address::invalid(); 4805 llvm::Value *NumOfElements = nullptr; 4806 unsigned NumDependencies = std::accumulate( 4807 Dependencies.begin(), Dependencies.end(), 0, 4808 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4809 return D.DepKind == OMPC_DEPEND_depobj 4810 ? V 4811 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4812 }); 4813 QualType FlagsTy; 4814 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4815 bool HasDepobjDeps = false; 4816 bool HasRegularWithIterators = false; 4817 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4818 llvm::Value *NumOfRegularWithIterators = 4819 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4820 // Calculate number of depobj dependecies and regular deps with the iterators. 4821 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4822 if (D.DepKind == OMPC_DEPEND_depobj) { 4823 SmallVector<llvm::Value *, 4> Sizes = 4824 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4825 for (llvm::Value *Size : Sizes) { 4826 NumOfDepobjElements = 4827 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4828 } 4829 HasDepobjDeps = true; 4830 continue; 4831 } 4832 // Include number of iterations, if any. 4833 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4834 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4835 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4836 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4837 NumOfRegularWithIterators = 4838 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4839 } 4840 HasRegularWithIterators = true; 4841 continue; 4842 } 4843 } 4844 4845 QualType KmpDependInfoArrayTy; 4846 if (HasDepobjDeps || HasRegularWithIterators) { 4847 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4848 /*isSigned=*/false); 4849 if (HasDepobjDeps) { 4850 NumOfElements = 4851 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4852 } 4853 if (HasRegularWithIterators) { 4854 NumOfElements = 4855 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4856 } 4857 OpaqueValueExpr OVE(Loc, 4858 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4859 VK_RValue); 4860 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4861 RValue::get(NumOfElements)); 4862 KmpDependInfoArrayTy = 4863 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4864 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4865 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4866 // Properly emit variable-sized array. 4867 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4868 ImplicitParamDecl::Other); 4869 CGF.EmitVarDecl(*PD); 4870 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4871 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4872 /*isSigned=*/false); 4873 } else { 4874 KmpDependInfoArrayTy = C.getConstantArrayType( 4875 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4876 ArrayType::Normal, /*IndexTypeQuals=*/0); 4877 DependenciesArray = 4878 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4879 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4880 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4881 /*isSigned=*/false); 4882 } 4883 unsigned Pos = 0; 4884 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4885 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4886 Dependencies[I].IteratorExpr) 4887 continue; 4888 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4889 DependenciesArray); 4890 } 4891 // Copy regular dependecies with iterators. 4892 LValue PosLVal = CGF.MakeAddrLValue( 4893 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4894 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4895 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4896 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4897 !Dependencies[I].IteratorExpr) 4898 continue; 4899 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4900 DependenciesArray); 4901 } 4902 // Copy final depobj arrays without iterators. 4903 if (HasDepobjDeps) { 4904 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4905 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4906 continue; 4907 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4908 DependenciesArray); 4909 } 4910 } 4911 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4912 DependenciesArray, CGF.VoidPtrTy); 4913 return std::make_pair(NumOfElements, DependenciesArray); 4914 } 4915 4916 Address CGOpenMPRuntime::emitDepobjDependClause( 4917 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4918 SourceLocation Loc) { 4919 if (Dependencies.DepExprs.empty()) 4920 return Address::invalid(); 4921 // Process list of dependencies. 4922 ASTContext &C = CGM.getContext(); 4923 Address DependenciesArray = Address::invalid(); 4924 unsigned NumDependencies = Dependencies.DepExprs.size(); 4925 QualType FlagsTy; 4926 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4927 RecordDecl *KmpDependInfoRD = 4928 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4929 4930 llvm::Value *Size; 4931 // Define type kmp_depend_info[<Dependencies.size()>]; 4932 // For depobj reserve one extra element to store the number of elements. 4933 // It is required to handle depobj(x) update(in) construct. 4934 // kmp_depend_info[<Dependencies.size()>] deps; 4935 llvm::Value *NumDepsVal; 4936 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4937 if (const auto *IE = 4938 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4939 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4940 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4941 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4942 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4943 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4944 } 4945 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4946 NumDepsVal); 4947 CharUnits SizeInBytes = 4948 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4949 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4950 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4951 NumDepsVal = 4952 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4953 } else { 4954 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4955 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4956 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4957 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4958 Size = CGM.getSize(Sz.alignTo(Align)); 4959 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4960 } 4961 // Need to allocate on the dynamic memory. 4962 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4963 // Use default allocator. 4964 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4965 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4966 4967 llvm::Value *Addr = 4968 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4969 CGM.getModule(), OMPRTL___kmpc_alloc), 4970 Args, ".dep.arr.addr"); 4971 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4972 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4973 DependenciesArray = Address(Addr, Align); 4974 // Write number of elements in the first element of array for depobj. 4975 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4976 // deps[i].base_addr = NumDependencies; 4977 LValue BaseAddrLVal = CGF.EmitLValueForField( 4978 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4979 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4980 llvm::PointerUnion<unsigned *, LValue *> Pos; 4981 unsigned Idx = 1; 4982 LValue PosLVal; 4983 if (Dependencies.IteratorExpr) { 4984 PosLVal = CGF.MakeAddrLValue( 4985 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4986 C.getSizeType()); 4987 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4988 /*IsInit=*/true); 4989 Pos = &PosLVal; 4990 } else { 4991 Pos = &Idx; 4992 } 4993 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4994 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4995 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4996 return DependenciesArray; 4997 } 4998 4999 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5000 SourceLocation Loc) { 5001 ASTContext &C = CGM.getContext(); 5002 QualType FlagsTy; 5003 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5004 LValue Base = CGF.EmitLoadOfPointerLValue( 5005 DepobjLVal.getAddress(CGF), 5006 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5007 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5008 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5009 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5010 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5011 Addr.getPointer(), 5012 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5013 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5014 CGF.VoidPtrTy); 5015 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5016 // Use default allocator. 5017 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5018 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5019 5020 // _kmpc_free(gtid, addr, nullptr); 5021 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5022 CGM.getModule(), OMPRTL___kmpc_free), 5023 Args); 5024 } 5025 5026 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5027 OpenMPDependClauseKind NewDepKind, 5028 SourceLocation Loc) { 5029 ASTContext &C = CGM.getContext(); 5030 QualType FlagsTy; 5031 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5032 RecordDecl *KmpDependInfoRD = 5033 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5034 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5035 llvm::Value *NumDeps; 5036 LValue Base; 5037 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5038 5039 Address Begin = Base.getAddress(CGF); 5040 // Cast from pointer to array type to pointer to single element. 5041 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5042 // The basic structure here is a while-do loop. 5043 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5044 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5045 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5046 CGF.EmitBlock(BodyBB); 5047 llvm::PHINode *ElementPHI = 5048 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5049 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5050 Begin = Address(ElementPHI, Begin.getAlignment()); 5051 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5052 Base.getTBAAInfo()); 5053 // deps[i].flags = NewDepKind; 5054 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5055 LValue FlagsLVal = CGF.EmitLValueForField( 5056 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5057 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5058 FlagsLVal); 5059 5060 // Shift the address forward by one element. 5061 Address ElementNext = 5062 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5063 ElementPHI->addIncoming(ElementNext.getPointer(), 5064 CGF.Builder.GetInsertBlock()); 5065 llvm::Value *IsEmpty = 5066 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5067 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5068 // Done. 5069 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5070 } 5071 5072 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5073 const OMPExecutableDirective &D, 5074 llvm::Function *TaskFunction, 5075 QualType SharedsTy, Address Shareds, 5076 const Expr *IfCond, 5077 const OMPTaskDataTy &Data) { 5078 if (!CGF.HaveInsertPoint()) 5079 return; 5080 5081 TaskResultTy Result = 5082 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5083 llvm::Value *NewTask = Result.NewTask; 5084 llvm::Function *TaskEntry = Result.TaskEntry; 5085 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5086 LValue TDBase = Result.TDBase; 5087 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5088 // Process list of dependences. 5089 Address DependenciesArray = Address::invalid(); 5090 llvm::Value *NumOfElements; 5091 std::tie(NumOfElements, DependenciesArray) = 5092 emitDependClause(CGF, Data.Dependences, Loc); 5093 5094 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5095 // libcall. 5096 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5097 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5098 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5099 // list is not empty 5100 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5101 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5102 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5103 llvm::Value *DepTaskArgs[7]; 5104 if (!Data.Dependences.empty()) { 5105 DepTaskArgs[0] = UpLoc; 5106 DepTaskArgs[1] = ThreadID; 5107 DepTaskArgs[2] = NewTask; 5108 DepTaskArgs[3] = NumOfElements; 5109 DepTaskArgs[4] = DependenciesArray.getPointer(); 5110 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5111 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5112 } 5113 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5114 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5115 if (!Data.Tied) { 5116 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5117 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5118 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5119 } 5120 if (!Data.Dependences.empty()) { 5121 CGF.EmitRuntimeCall( 5122 OMPBuilder.getOrCreateRuntimeFunction( 5123 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5124 DepTaskArgs); 5125 } else { 5126 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5127 CGM.getModule(), OMPRTL___kmpc_omp_task), 5128 TaskArgs); 5129 } 5130 // Check if parent region is untied and build return for untied task; 5131 if (auto *Region = 5132 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5133 Region->emitUntiedSwitch(CGF); 5134 }; 5135 5136 llvm::Value *DepWaitTaskArgs[6]; 5137 if (!Data.Dependences.empty()) { 5138 DepWaitTaskArgs[0] = UpLoc; 5139 DepWaitTaskArgs[1] = ThreadID; 5140 DepWaitTaskArgs[2] = NumOfElements; 5141 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5142 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5143 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5144 } 5145 auto &M = CGM.getModule(); 5146 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5147 TaskEntry, &Data, &DepWaitTaskArgs, 5148 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5149 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5150 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5151 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5152 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5153 // is specified. 5154 if (!Data.Dependences.empty()) 5155 CGF.EmitRuntimeCall( 5156 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5157 DepWaitTaskArgs); 5158 // Call proxy_task_entry(gtid, new_task); 5159 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5160 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5161 Action.Enter(CGF); 5162 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5163 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5164 OutlinedFnArgs); 5165 }; 5166 5167 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5168 // kmp_task_t *new_task); 5169 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5170 // kmp_task_t *new_task); 5171 RegionCodeGenTy RCG(CodeGen); 5172 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5173 M, OMPRTL___kmpc_omp_task_begin_if0), 5174 TaskArgs, 5175 OMPBuilder.getOrCreateRuntimeFunction( 5176 M, OMPRTL___kmpc_omp_task_complete_if0), 5177 TaskArgs); 5178 RCG.setAction(Action); 5179 RCG(CGF); 5180 }; 5181 5182 if (IfCond) { 5183 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5184 } else { 5185 RegionCodeGenTy ThenRCG(ThenCodeGen); 5186 ThenRCG(CGF); 5187 } 5188 } 5189 5190 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5191 const OMPLoopDirective &D, 5192 llvm::Function *TaskFunction, 5193 QualType SharedsTy, Address Shareds, 5194 const Expr *IfCond, 5195 const OMPTaskDataTy &Data) { 5196 if (!CGF.HaveInsertPoint()) 5197 return; 5198 TaskResultTy Result = 5199 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5200 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5201 // libcall. 5202 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5203 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5204 // sched, kmp_uint64 grainsize, void *task_dup); 5205 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5206 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5207 llvm::Value *IfVal; 5208 if (IfCond) { 5209 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5210 /*isSigned=*/true); 5211 } else { 5212 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5213 } 5214 5215 LValue LBLVal = CGF.EmitLValueForField( 5216 Result.TDBase, 5217 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5218 const auto *LBVar = 5219 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5220 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5221 LBLVal.getQuals(), 5222 /*IsInitializer=*/true); 5223 LValue UBLVal = CGF.EmitLValueForField( 5224 Result.TDBase, 5225 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5226 const auto *UBVar = 5227 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5228 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5229 UBLVal.getQuals(), 5230 /*IsInitializer=*/true); 5231 LValue StLVal = CGF.EmitLValueForField( 5232 Result.TDBase, 5233 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5234 const auto *StVar = 5235 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5236 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5237 StLVal.getQuals(), 5238 /*IsInitializer=*/true); 5239 // Store reductions address. 5240 LValue RedLVal = CGF.EmitLValueForField( 5241 Result.TDBase, 5242 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5243 if (Data.Reductions) { 5244 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5245 } else { 5246 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5247 CGF.getContext().VoidPtrTy); 5248 } 5249 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5250 llvm::Value *TaskArgs[] = { 5251 UpLoc, 5252 ThreadID, 5253 Result.NewTask, 5254 IfVal, 5255 LBLVal.getPointer(CGF), 5256 UBLVal.getPointer(CGF), 5257 CGF.EmitLoadOfScalar(StLVal, Loc), 5258 llvm::ConstantInt::getSigned( 5259 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5260 llvm::ConstantInt::getSigned( 5261 CGF.IntTy, Data.Schedule.getPointer() 5262 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5263 : NoSchedule), 5264 Data.Schedule.getPointer() 5265 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5266 /*isSigned=*/false) 5267 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5268 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5269 Result.TaskDupFn, CGF.VoidPtrTy) 5270 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5271 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5272 CGM.getModule(), OMPRTL___kmpc_taskloop), 5273 TaskArgs); 5274 } 5275 5276 /// Emit reduction operation for each element of array (required for 5277 /// array sections) LHS op = RHS. 5278 /// \param Type Type of array. 5279 /// \param LHSVar Variable on the left side of the reduction operation 5280 /// (references element of array in original variable). 5281 /// \param RHSVar Variable on the right side of the reduction operation 5282 /// (references element of array in original variable). 5283 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5284 /// RHSVar. 5285 static void EmitOMPAggregateReduction( 5286 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5287 const VarDecl *RHSVar, 5288 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5289 const Expr *, const Expr *)> &RedOpGen, 5290 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5291 const Expr *UpExpr = nullptr) { 5292 // Perform element-by-element initialization. 5293 QualType ElementTy; 5294 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5295 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5296 5297 // Drill down to the base element type on both arrays. 5298 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5299 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5300 5301 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5302 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5303 // Cast from pointer to array type to pointer to single element. 5304 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5305 // The basic structure here is a while-do loop. 5306 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5307 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5308 llvm::Value *IsEmpty = 5309 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5310 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5311 5312 // Enter the loop body, making that address the current address. 5313 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5314 CGF.EmitBlock(BodyBB); 5315 5316 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5317 5318 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5319 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5320 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5321 Address RHSElementCurrent = 5322 Address(RHSElementPHI, 5323 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5324 5325 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5326 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5327 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5328 Address LHSElementCurrent = 5329 Address(LHSElementPHI, 5330 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5331 5332 // Emit copy. 5333 CodeGenFunction::OMPPrivateScope Scope(CGF); 5334 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5335 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5336 Scope.Privatize(); 5337 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5338 Scope.ForceCleanup(); 5339 5340 // Shift the address forward by one element. 5341 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5342 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5343 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5344 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5345 // Check whether we've reached the end. 5346 llvm::Value *Done = 5347 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5348 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5349 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5350 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5351 5352 // Done. 5353 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5354 } 5355 5356 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5357 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5358 /// UDR combiner function. 5359 static void emitReductionCombiner(CodeGenFunction &CGF, 5360 const Expr *ReductionOp) { 5361 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5362 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5363 if (const auto *DRE = 5364 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5365 if (const auto *DRD = 5366 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5367 std::pair<llvm::Function *, llvm::Function *> Reduction = 5368 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5369 RValue Func = RValue::get(Reduction.first); 5370 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5371 CGF.EmitIgnoredExpr(ReductionOp); 5372 return; 5373 } 5374 CGF.EmitIgnoredExpr(ReductionOp); 5375 } 5376 5377 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5378 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5379 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5380 ArrayRef<const Expr *> ReductionOps) { 5381 ASTContext &C = CGM.getContext(); 5382 5383 // void reduction_func(void *LHSArg, void *RHSArg); 5384 FunctionArgList Args; 5385 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5386 ImplicitParamDecl::Other); 5387 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5388 ImplicitParamDecl::Other); 5389 Args.push_back(&LHSArg); 5390 Args.push_back(&RHSArg); 5391 const auto &CGFI = 5392 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5393 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5395 llvm::GlobalValue::InternalLinkage, Name, 5396 &CGM.getModule()); 5397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5398 Fn->setDoesNotRecurse(); 5399 CodeGenFunction CGF(CGM); 5400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5401 5402 // Dst = (void*[n])(LHSArg); 5403 // Src = (void*[n])(RHSArg); 5404 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5405 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5406 ArgsType), CGF.getPointerAlign()); 5407 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5408 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5409 ArgsType), CGF.getPointerAlign()); 5410 5411 // ... 5412 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5413 // ... 5414 CodeGenFunction::OMPPrivateScope Scope(CGF); 5415 auto IPriv = Privates.begin(); 5416 unsigned Idx = 0; 5417 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5418 const auto *RHSVar = 5419 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5420 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5421 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5422 }); 5423 const auto *LHSVar = 5424 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5425 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5426 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5427 }); 5428 QualType PrivTy = (*IPriv)->getType(); 5429 if (PrivTy->isVariablyModifiedType()) { 5430 // Get array size and emit VLA type. 5431 ++Idx; 5432 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5433 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5434 const VariableArrayType *VLA = 5435 CGF.getContext().getAsVariableArrayType(PrivTy); 5436 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5437 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5438 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5439 CGF.EmitVariablyModifiedType(PrivTy); 5440 } 5441 } 5442 Scope.Privatize(); 5443 IPriv = Privates.begin(); 5444 auto ILHS = LHSExprs.begin(); 5445 auto IRHS = RHSExprs.begin(); 5446 for (const Expr *E : ReductionOps) { 5447 if ((*IPriv)->getType()->isArrayType()) { 5448 // Emit reduction for array section. 5449 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5450 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5451 EmitOMPAggregateReduction( 5452 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5453 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5454 emitReductionCombiner(CGF, E); 5455 }); 5456 } else { 5457 // Emit reduction for array subscript or single variable. 5458 emitReductionCombiner(CGF, E); 5459 } 5460 ++IPriv; 5461 ++ILHS; 5462 ++IRHS; 5463 } 5464 Scope.ForceCleanup(); 5465 CGF.FinishFunction(); 5466 return Fn; 5467 } 5468 5469 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5470 const Expr *ReductionOp, 5471 const Expr *PrivateRef, 5472 const DeclRefExpr *LHS, 5473 const DeclRefExpr *RHS) { 5474 if (PrivateRef->getType()->isArrayType()) { 5475 // Emit reduction for array section. 5476 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5477 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5478 EmitOMPAggregateReduction( 5479 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5480 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5481 emitReductionCombiner(CGF, ReductionOp); 5482 }); 5483 } else { 5484 // Emit reduction for array subscript or single variable. 5485 emitReductionCombiner(CGF, ReductionOp); 5486 } 5487 } 5488 5489 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5490 ArrayRef<const Expr *> Privates, 5491 ArrayRef<const Expr *> LHSExprs, 5492 ArrayRef<const Expr *> RHSExprs, 5493 ArrayRef<const Expr *> ReductionOps, 5494 ReductionOptionsTy Options) { 5495 if (!CGF.HaveInsertPoint()) 5496 return; 5497 5498 bool WithNowait = Options.WithNowait; 5499 bool SimpleReduction = Options.SimpleReduction; 5500 5501 // Next code should be emitted for reduction: 5502 // 5503 // static kmp_critical_name lock = { 0 }; 5504 // 5505 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5506 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5507 // ... 5508 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5509 // *(Type<n>-1*)rhs[<n>-1]); 5510 // } 5511 // 5512 // ... 5513 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5514 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5515 // RedList, reduce_func, &<lock>)) { 5516 // case 1: 5517 // ... 5518 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5519 // ... 5520 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5521 // break; 5522 // case 2: 5523 // ... 5524 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5525 // ... 5526 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5527 // break; 5528 // default:; 5529 // } 5530 // 5531 // if SimpleReduction is true, only the next code is generated: 5532 // ... 5533 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5534 // ... 5535 5536 ASTContext &C = CGM.getContext(); 5537 5538 if (SimpleReduction) { 5539 CodeGenFunction::RunCleanupsScope Scope(CGF); 5540 auto IPriv = Privates.begin(); 5541 auto ILHS = LHSExprs.begin(); 5542 auto IRHS = RHSExprs.begin(); 5543 for (const Expr *E : ReductionOps) { 5544 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5545 cast<DeclRefExpr>(*IRHS)); 5546 ++IPriv; 5547 ++ILHS; 5548 ++IRHS; 5549 } 5550 return; 5551 } 5552 5553 // 1. Build a list of reduction variables. 5554 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5555 auto Size = RHSExprs.size(); 5556 for (const Expr *E : Privates) { 5557 if (E->getType()->isVariablyModifiedType()) 5558 // Reserve place for array size. 5559 ++Size; 5560 } 5561 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5562 QualType ReductionArrayTy = 5563 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5564 /*IndexTypeQuals=*/0); 5565 Address ReductionList = 5566 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5567 auto IPriv = Privates.begin(); 5568 unsigned Idx = 0; 5569 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5570 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5571 CGF.Builder.CreateStore( 5572 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5573 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5574 Elem); 5575 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5576 // Store array size. 5577 ++Idx; 5578 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5579 llvm::Value *Size = CGF.Builder.CreateIntCast( 5580 CGF.getVLASize( 5581 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5582 .NumElts, 5583 CGF.SizeTy, /*isSigned=*/false); 5584 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5585 Elem); 5586 } 5587 } 5588 5589 // 2. Emit reduce_func(). 5590 llvm::Function *ReductionFn = emitReductionFunction( 5591 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5592 LHSExprs, RHSExprs, ReductionOps); 5593 5594 // 3. Create static kmp_critical_name lock = { 0 }; 5595 std::string Name = getName({"reduction"}); 5596 llvm::Value *Lock = getCriticalRegionLock(Name); 5597 5598 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5599 // RedList, reduce_func, &<lock>); 5600 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5601 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5602 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5603 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5604 ReductionList.getPointer(), CGF.VoidPtrTy); 5605 llvm::Value *Args[] = { 5606 IdentTLoc, // ident_t *<loc> 5607 ThreadId, // i32 <gtid> 5608 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5609 ReductionArrayTySize, // size_type sizeof(RedList) 5610 RL, // void *RedList 5611 ReductionFn, // void (*) (void *, void *) <reduce_func> 5612 Lock // kmp_critical_name *&<lock> 5613 }; 5614 llvm::Value *Res = CGF.EmitRuntimeCall( 5615 OMPBuilder.getOrCreateRuntimeFunction( 5616 CGM.getModule(), 5617 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5618 Args); 5619 5620 // 5. Build switch(res) 5621 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5622 llvm::SwitchInst *SwInst = 5623 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5624 5625 // 6. Build case 1: 5626 // ... 5627 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5628 // ... 5629 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5630 // break; 5631 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5632 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5633 CGF.EmitBlock(Case1BB); 5634 5635 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5636 llvm::Value *EndArgs[] = { 5637 IdentTLoc, // ident_t *<loc> 5638 ThreadId, // i32 <gtid> 5639 Lock // kmp_critical_name *&<lock> 5640 }; 5641 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5642 CodeGenFunction &CGF, PrePostActionTy &Action) { 5643 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5644 auto IPriv = Privates.begin(); 5645 auto ILHS = LHSExprs.begin(); 5646 auto IRHS = RHSExprs.begin(); 5647 for (const Expr *E : ReductionOps) { 5648 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5649 cast<DeclRefExpr>(*IRHS)); 5650 ++IPriv; 5651 ++ILHS; 5652 ++IRHS; 5653 } 5654 }; 5655 RegionCodeGenTy RCG(CodeGen); 5656 CommonActionTy Action( 5657 nullptr, llvm::None, 5658 OMPBuilder.getOrCreateRuntimeFunction( 5659 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5660 : OMPRTL___kmpc_end_reduce), 5661 EndArgs); 5662 RCG.setAction(Action); 5663 RCG(CGF); 5664 5665 CGF.EmitBranch(DefaultBB); 5666 5667 // 7. Build case 2: 5668 // ... 5669 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5670 // ... 5671 // break; 5672 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5673 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5674 CGF.EmitBlock(Case2BB); 5675 5676 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5677 CodeGenFunction &CGF, PrePostActionTy &Action) { 5678 auto ILHS = LHSExprs.begin(); 5679 auto IRHS = RHSExprs.begin(); 5680 auto IPriv = Privates.begin(); 5681 for (const Expr *E : ReductionOps) { 5682 const Expr *XExpr = nullptr; 5683 const Expr *EExpr = nullptr; 5684 const Expr *UpExpr = nullptr; 5685 BinaryOperatorKind BO = BO_Comma; 5686 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5687 if (BO->getOpcode() == BO_Assign) { 5688 XExpr = BO->getLHS(); 5689 UpExpr = BO->getRHS(); 5690 } 5691 } 5692 // Try to emit update expression as a simple atomic. 5693 const Expr *RHSExpr = UpExpr; 5694 if (RHSExpr) { 5695 // Analyze RHS part of the whole expression. 5696 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5697 RHSExpr->IgnoreParenImpCasts())) { 5698 // If this is a conditional operator, analyze its condition for 5699 // min/max reduction operator. 5700 RHSExpr = ACO->getCond(); 5701 } 5702 if (const auto *BORHS = 5703 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5704 EExpr = BORHS->getRHS(); 5705 BO = BORHS->getOpcode(); 5706 } 5707 } 5708 if (XExpr) { 5709 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5710 auto &&AtomicRedGen = [BO, VD, 5711 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5712 const Expr *EExpr, const Expr *UpExpr) { 5713 LValue X = CGF.EmitLValue(XExpr); 5714 RValue E; 5715 if (EExpr) 5716 E = CGF.EmitAnyExpr(EExpr); 5717 CGF.EmitOMPAtomicSimpleUpdateExpr( 5718 X, E, BO, /*IsXLHSInRHSPart=*/true, 5719 llvm::AtomicOrdering::Monotonic, Loc, 5720 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5721 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5722 PrivateScope.addPrivate( 5723 VD, [&CGF, VD, XRValue, Loc]() { 5724 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5725 CGF.emitOMPSimpleStore( 5726 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5727 VD->getType().getNonReferenceType(), Loc); 5728 return LHSTemp; 5729 }); 5730 (void)PrivateScope.Privatize(); 5731 return CGF.EmitAnyExpr(UpExpr); 5732 }); 5733 }; 5734 if ((*IPriv)->getType()->isArrayType()) { 5735 // Emit atomic reduction for array section. 5736 const auto *RHSVar = 5737 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5738 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5739 AtomicRedGen, XExpr, EExpr, UpExpr); 5740 } else { 5741 // Emit atomic reduction for array subscript or single variable. 5742 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5743 } 5744 } else { 5745 // Emit as a critical region. 5746 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5747 const Expr *, const Expr *) { 5748 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5749 std::string Name = RT.getName({"atomic_reduction"}); 5750 RT.emitCriticalRegion( 5751 CGF, Name, 5752 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5753 Action.Enter(CGF); 5754 emitReductionCombiner(CGF, E); 5755 }, 5756 Loc); 5757 }; 5758 if ((*IPriv)->getType()->isArrayType()) { 5759 const auto *LHSVar = 5760 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5761 const auto *RHSVar = 5762 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5763 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5764 CritRedGen); 5765 } else { 5766 CritRedGen(CGF, nullptr, nullptr, nullptr); 5767 } 5768 } 5769 ++ILHS; 5770 ++IRHS; 5771 ++IPriv; 5772 } 5773 }; 5774 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5775 if (!WithNowait) { 5776 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5777 llvm::Value *EndArgs[] = { 5778 IdentTLoc, // ident_t *<loc> 5779 ThreadId, // i32 <gtid> 5780 Lock // kmp_critical_name *&<lock> 5781 }; 5782 CommonActionTy Action(nullptr, llvm::None, 5783 OMPBuilder.getOrCreateRuntimeFunction( 5784 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5785 EndArgs); 5786 AtomicRCG.setAction(Action); 5787 AtomicRCG(CGF); 5788 } else { 5789 AtomicRCG(CGF); 5790 } 5791 5792 CGF.EmitBranch(DefaultBB); 5793 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5794 } 5795 5796 /// Generates unique name for artificial threadprivate variables. 5797 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5798 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5799 const Expr *Ref) { 5800 SmallString<256> Buffer; 5801 llvm::raw_svector_ostream Out(Buffer); 5802 const clang::DeclRefExpr *DE; 5803 const VarDecl *D = ::getBaseDecl(Ref, DE); 5804 if (!D) 5805 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5806 D = D->getCanonicalDecl(); 5807 std::string Name = CGM.getOpenMPRuntime().getName( 5808 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5809 Out << Prefix << Name << "_" 5810 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5811 return std::string(Out.str()); 5812 } 5813 5814 /// Emits reduction initializer function: 5815 /// \code 5816 /// void @.red_init(void* %arg, void* %orig) { 5817 /// %0 = bitcast void* %arg to <type>* 5818 /// store <type> <init>, <type>* %0 5819 /// ret void 5820 /// } 5821 /// \endcode 5822 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5823 SourceLocation Loc, 5824 ReductionCodeGen &RCG, unsigned N) { 5825 ASTContext &C = CGM.getContext(); 5826 QualType VoidPtrTy = C.VoidPtrTy; 5827 VoidPtrTy.addRestrict(); 5828 FunctionArgList Args; 5829 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5830 ImplicitParamDecl::Other); 5831 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5832 ImplicitParamDecl::Other); 5833 Args.emplace_back(&Param); 5834 Args.emplace_back(&ParamOrig); 5835 const auto &FnInfo = 5836 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5837 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5838 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5839 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5840 Name, &CGM.getModule()); 5841 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5842 Fn->setDoesNotRecurse(); 5843 CodeGenFunction CGF(CGM); 5844 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5845 Address PrivateAddr = CGF.EmitLoadOfPointer( 5846 CGF.GetAddrOfLocalVar(&Param), 5847 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5848 llvm::Value *Size = nullptr; 5849 // If the size of the reduction item is non-constant, load it from global 5850 // threadprivate variable. 5851 if (RCG.getSizes(N).second) { 5852 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5853 CGF, CGM.getContext().getSizeType(), 5854 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5855 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5856 CGM.getContext().getSizeType(), Loc); 5857 } 5858 RCG.emitAggregateType(CGF, N, Size); 5859 LValue OrigLVal; 5860 // If initializer uses initializer from declare reduction construct, emit a 5861 // pointer to the address of the original reduction item (reuired by reduction 5862 // initializer) 5863 if (RCG.usesReductionInitializer(N)) { 5864 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5865 SharedAddr = CGF.EmitLoadOfPointer( 5866 SharedAddr, 5867 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5868 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5869 } else { 5870 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5871 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5872 CGM.getContext().VoidPtrTy); 5873 } 5874 // Emit the initializer: 5875 // %0 = bitcast void* %arg to <type>* 5876 // store <type> <init>, <type>* %0 5877 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5878 [](CodeGenFunction &) { return false; }); 5879 CGF.FinishFunction(); 5880 return Fn; 5881 } 5882 5883 /// Emits reduction combiner function: 5884 /// \code 5885 /// void @.red_comb(void* %arg0, void* %arg1) { 5886 /// %lhs = bitcast void* %arg0 to <type>* 5887 /// %rhs = bitcast void* %arg1 to <type>* 5888 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5889 /// store <type> %2, <type>* %lhs 5890 /// ret void 5891 /// } 5892 /// \endcode 5893 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5894 SourceLocation Loc, 5895 ReductionCodeGen &RCG, unsigned N, 5896 const Expr *ReductionOp, 5897 const Expr *LHS, const Expr *RHS, 5898 const Expr *PrivateRef) { 5899 ASTContext &C = CGM.getContext(); 5900 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5901 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5902 FunctionArgList Args; 5903 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5904 C.VoidPtrTy, ImplicitParamDecl::Other); 5905 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5906 ImplicitParamDecl::Other); 5907 Args.emplace_back(&ParamInOut); 5908 Args.emplace_back(&ParamIn); 5909 const auto &FnInfo = 5910 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5911 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5912 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5913 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5914 Name, &CGM.getModule()); 5915 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5916 Fn->setDoesNotRecurse(); 5917 CodeGenFunction CGF(CGM); 5918 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5919 llvm::Value *Size = nullptr; 5920 // If the size of the reduction item is non-constant, load it from global 5921 // threadprivate variable. 5922 if (RCG.getSizes(N).second) { 5923 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5924 CGF, CGM.getContext().getSizeType(), 5925 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5926 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5927 CGM.getContext().getSizeType(), Loc); 5928 } 5929 RCG.emitAggregateType(CGF, N, Size); 5930 // Remap lhs and rhs variables to the addresses of the function arguments. 5931 // %lhs = bitcast void* %arg0 to <type>* 5932 // %rhs = bitcast void* %arg1 to <type>* 5933 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5934 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5935 // Pull out the pointer to the variable. 5936 Address PtrAddr = CGF.EmitLoadOfPointer( 5937 CGF.GetAddrOfLocalVar(&ParamInOut), 5938 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5939 return CGF.Builder.CreateElementBitCast( 5940 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5941 }); 5942 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5943 // Pull out the pointer to the variable. 5944 Address PtrAddr = CGF.EmitLoadOfPointer( 5945 CGF.GetAddrOfLocalVar(&ParamIn), 5946 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5947 return CGF.Builder.CreateElementBitCast( 5948 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5949 }); 5950 PrivateScope.Privatize(); 5951 // Emit the combiner body: 5952 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5953 // store <type> %2, <type>* %lhs 5954 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5955 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5956 cast<DeclRefExpr>(RHS)); 5957 CGF.FinishFunction(); 5958 return Fn; 5959 } 5960 5961 /// Emits reduction finalizer function: 5962 /// \code 5963 /// void @.red_fini(void* %arg) { 5964 /// %0 = bitcast void* %arg to <type>* 5965 /// <destroy>(<type>* %0) 5966 /// ret void 5967 /// } 5968 /// \endcode 5969 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5970 SourceLocation Loc, 5971 ReductionCodeGen &RCG, unsigned N) { 5972 if (!RCG.needCleanups(N)) 5973 return nullptr; 5974 ASTContext &C = CGM.getContext(); 5975 FunctionArgList Args; 5976 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5977 ImplicitParamDecl::Other); 5978 Args.emplace_back(&Param); 5979 const auto &FnInfo = 5980 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5981 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5982 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5983 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5984 Name, &CGM.getModule()); 5985 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5986 Fn->setDoesNotRecurse(); 5987 CodeGenFunction CGF(CGM); 5988 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5989 Address PrivateAddr = CGF.EmitLoadOfPointer( 5990 CGF.GetAddrOfLocalVar(&Param), 5991 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5992 llvm::Value *Size = nullptr; 5993 // If the size of the reduction item is non-constant, load it from global 5994 // threadprivate variable. 5995 if (RCG.getSizes(N).second) { 5996 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5997 CGF, CGM.getContext().getSizeType(), 5998 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5999 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6000 CGM.getContext().getSizeType(), Loc); 6001 } 6002 RCG.emitAggregateType(CGF, N, Size); 6003 // Emit the finalizer body: 6004 // <destroy>(<type>* %0) 6005 RCG.emitCleanups(CGF, N, PrivateAddr); 6006 CGF.FinishFunction(Loc); 6007 return Fn; 6008 } 6009 6010 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6011 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6012 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6013 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6014 return nullptr; 6015 6016 // Build typedef struct: 6017 // kmp_taskred_input { 6018 // void *reduce_shar; // shared reduction item 6019 // void *reduce_orig; // original reduction item used for initialization 6020 // size_t reduce_size; // size of data item 6021 // void *reduce_init; // data initialization routine 6022 // void *reduce_fini; // data finalization routine 6023 // void *reduce_comb; // data combiner routine 6024 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6025 // } kmp_taskred_input_t; 6026 ASTContext &C = CGM.getContext(); 6027 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6028 RD->startDefinition(); 6029 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6030 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6031 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6032 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6033 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6034 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6035 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6036 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6037 RD->completeDefinition(); 6038 QualType RDType = C.getRecordType(RD); 6039 unsigned Size = Data.ReductionVars.size(); 6040 llvm::APInt ArraySize(/*numBits=*/64, Size); 6041 QualType ArrayRDType = C.getConstantArrayType( 6042 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6043 // kmp_task_red_input_t .rd_input.[Size]; 6044 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6045 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6046 Data.ReductionCopies, Data.ReductionOps); 6047 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6048 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6049 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6050 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6051 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6052 TaskRedInput.getPointer(), Idxs, 6053 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6054 ".rd_input.gep."); 6055 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6056 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6057 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6058 RCG.emitSharedOrigLValue(CGF, Cnt); 6059 llvm::Value *CastedShared = 6060 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6061 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6062 // ElemLVal.reduce_orig = &Origs[Cnt]; 6063 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6064 llvm::Value *CastedOrig = 6065 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6066 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6067 RCG.emitAggregateType(CGF, Cnt); 6068 llvm::Value *SizeValInChars; 6069 llvm::Value *SizeVal; 6070 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6071 // We use delayed creation/initialization for VLAs and array sections. It is 6072 // required because runtime does not provide the way to pass the sizes of 6073 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6074 // threadprivate global variables are used to store these values and use 6075 // them in the functions. 6076 bool DelayedCreation = !!SizeVal; 6077 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6078 /*isSigned=*/false); 6079 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6080 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6081 // ElemLVal.reduce_init = init; 6082 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6083 llvm::Value *InitAddr = 6084 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6085 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6086 // ElemLVal.reduce_fini = fini; 6087 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6088 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6089 llvm::Value *FiniAddr = Fini 6090 ? CGF.EmitCastToVoidPtr(Fini) 6091 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6092 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6093 // ElemLVal.reduce_comb = comb; 6094 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6095 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6096 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6097 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6098 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6099 // ElemLVal.flags = 0; 6100 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6101 if (DelayedCreation) { 6102 CGF.EmitStoreOfScalar( 6103 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6104 FlagsLVal); 6105 } else 6106 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6107 FlagsLVal.getType()); 6108 } 6109 if (Data.IsReductionWithTaskMod) { 6110 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6111 // is_ws, int num, void *data); 6112 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6113 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6114 CGM.IntTy, /*isSigned=*/true); 6115 llvm::Value *Args[] = { 6116 IdentTLoc, GTid, 6117 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6118 /*isSigned=*/true), 6119 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6120 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6121 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6122 return CGF.EmitRuntimeCall( 6123 OMPBuilder.getOrCreateRuntimeFunction( 6124 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6125 Args); 6126 } 6127 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6128 llvm::Value *Args[] = { 6129 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6130 /*isSigned=*/true), 6131 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6132 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6133 CGM.VoidPtrTy)}; 6134 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6135 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6136 Args); 6137 } 6138 6139 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6140 SourceLocation Loc, 6141 bool IsWorksharingReduction) { 6142 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6143 // is_ws, int num, void *data); 6144 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6145 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6146 CGM.IntTy, /*isSigned=*/true); 6147 llvm::Value *Args[] = {IdentTLoc, GTid, 6148 llvm::ConstantInt::get(CGM.IntTy, 6149 IsWorksharingReduction ? 1 : 0, 6150 /*isSigned=*/true)}; 6151 (void)CGF.EmitRuntimeCall( 6152 OMPBuilder.getOrCreateRuntimeFunction( 6153 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6154 Args); 6155 } 6156 6157 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6158 SourceLocation Loc, 6159 ReductionCodeGen &RCG, 6160 unsigned N) { 6161 auto Sizes = RCG.getSizes(N); 6162 // Emit threadprivate global variable if the type is non-constant 6163 // (Sizes.second = nullptr). 6164 if (Sizes.second) { 6165 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6166 /*isSigned=*/false); 6167 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6168 CGF, CGM.getContext().getSizeType(), 6169 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6170 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6171 } 6172 } 6173 6174 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6175 SourceLocation Loc, 6176 llvm::Value *ReductionsPtr, 6177 LValue SharedLVal) { 6178 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6179 // *d); 6180 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6181 CGM.IntTy, 6182 /*isSigned=*/true), 6183 ReductionsPtr, 6184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6185 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6186 return Address( 6187 CGF.EmitRuntimeCall( 6188 OMPBuilder.getOrCreateRuntimeFunction( 6189 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6190 Args), 6191 SharedLVal.getAlignment()); 6192 } 6193 6194 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6195 SourceLocation Loc) { 6196 if (!CGF.HaveInsertPoint()) 6197 return; 6198 6199 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6200 OMPBuilder.CreateTaskwait(CGF.Builder); 6201 } else { 6202 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6203 // global_tid); 6204 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6205 // Ignore return result until untied tasks are supported. 6206 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6207 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6208 Args); 6209 } 6210 6211 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6212 Region->emitUntiedSwitch(CGF); 6213 } 6214 6215 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6216 OpenMPDirectiveKind InnerKind, 6217 const RegionCodeGenTy &CodeGen, 6218 bool HasCancel) { 6219 if (!CGF.HaveInsertPoint()) 6220 return; 6221 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6222 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6223 } 6224 6225 namespace { 6226 enum RTCancelKind { 6227 CancelNoreq = 0, 6228 CancelParallel = 1, 6229 CancelLoop = 2, 6230 CancelSections = 3, 6231 CancelTaskgroup = 4 6232 }; 6233 } // anonymous namespace 6234 6235 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6236 RTCancelKind CancelKind = CancelNoreq; 6237 if (CancelRegion == OMPD_parallel) 6238 CancelKind = CancelParallel; 6239 else if (CancelRegion == OMPD_for) 6240 CancelKind = CancelLoop; 6241 else if (CancelRegion == OMPD_sections) 6242 CancelKind = CancelSections; 6243 else { 6244 assert(CancelRegion == OMPD_taskgroup); 6245 CancelKind = CancelTaskgroup; 6246 } 6247 return CancelKind; 6248 } 6249 6250 void CGOpenMPRuntime::emitCancellationPointCall( 6251 CodeGenFunction &CGF, SourceLocation Loc, 6252 OpenMPDirectiveKind CancelRegion) { 6253 if (!CGF.HaveInsertPoint()) 6254 return; 6255 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6256 // global_tid, kmp_int32 cncl_kind); 6257 if (auto *OMPRegionInfo = 6258 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6259 // For 'cancellation point taskgroup', the task region info may not have a 6260 // cancel. This may instead happen in another adjacent task. 6261 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6262 llvm::Value *Args[] = { 6263 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6264 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6265 // Ignore return result until untied tasks are supported. 6266 llvm::Value *Result = CGF.EmitRuntimeCall( 6267 OMPBuilder.getOrCreateRuntimeFunction( 6268 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6269 Args); 6270 // if (__kmpc_cancellationpoint()) { 6271 // exit from construct; 6272 // } 6273 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6274 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6275 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6276 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6277 CGF.EmitBlock(ExitBB); 6278 // exit from construct; 6279 CodeGenFunction::JumpDest CancelDest = 6280 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6281 CGF.EmitBranchThroughCleanup(CancelDest); 6282 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6283 } 6284 } 6285 } 6286 6287 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6288 const Expr *IfCond, 6289 OpenMPDirectiveKind CancelRegion) { 6290 if (!CGF.HaveInsertPoint()) 6291 return; 6292 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6293 // kmp_int32 cncl_kind); 6294 auto &M = CGM.getModule(); 6295 if (auto *OMPRegionInfo = 6296 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6297 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6298 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6299 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6300 llvm::Value *Args[] = { 6301 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6302 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6303 // Ignore return result until untied tasks are supported. 6304 llvm::Value *Result = CGF.EmitRuntimeCall( 6305 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6306 // if (__kmpc_cancel()) { 6307 // exit from construct; 6308 // } 6309 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6310 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6311 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6312 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6313 CGF.EmitBlock(ExitBB); 6314 // exit from construct; 6315 CodeGenFunction::JumpDest CancelDest = 6316 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6317 CGF.EmitBranchThroughCleanup(CancelDest); 6318 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6319 }; 6320 if (IfCond) { 6321 emitIfClause(CGF, IfCond, ThenGen, 6322 [](CodeGenFunction &, PrePostActionTy &) {}); 6323 } else { 6324 RegionCodeGenTy ThenRCG(ThenGen); 6325 ThenRCG(CGF); 6326 } 6327 } 6328 } 6329 6330 namespace { 6331 /// Cleanup action for uses_allocators support. 6332 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6333 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6334 6335 public: 6336 OMPUsesAllocatorsActionTy( 6337 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6338 : Allocators(Allocators) {} 6339 void Enter(CodeGenFunction &CGF) override { 6340 if (!CGF.HaveInsertPoint()) 6341 return; 6342 for (const auto &AllocatorData : Allocators) { 6343 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6344 CGF, AllocatorData.first, AllocatorData.second); 6345 } 6346 } 6347 void Exit(CodeGenFunction &CGF) override { 6348 if (!CGF.HaveInsertPoint()) 6349 return; 6350 for (const auto &AllocatorData : Allocators) { 6351 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6352 AllocatorData.first); 6353 } 6354 } 6355 }; 6356 } // namespace 6357 6358 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6359 const OMPExecutableDirective &D, StringRef ParentName, 6360 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6361 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6362 assert(!ParentName.empty() && "Invalid target region parent name!"); 6363 HasEmittedTargetRegion = true; 6364 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6365 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6366 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6367 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6368 if (!D.AllocatorTraits) 6369 continue; 6370 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6371 } 6372 } 6373 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6374 CodeGen.setAction(UsesAllocatorAction); 6375 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6376 IsOffloadEntry, CodeGen); 6377 } 6378 6379 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6380 const Expr *Allocator, 6381 const Expr *AllocatorTraits) { 6382 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6383 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6384 // Use default memspace handle. 6385 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6386 llvm::Value *NumTraits = llvm::ConstantInt::get( 6387 CGF.IntTy, cast<ConstantArrayType>( 6388 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6389 ->getSize() 6390 .getLimitedValue()); 6391 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6392 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6393 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6394 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6395 AllocatorTraitsLVal.getBaseInfo(), 6396 AllocatorTraitsLVal.getTBAAInfo()); 6397 llvm::Value *Traits = 6398 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6399 6400 llvm::Value *AllocatorVal = 6401 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6402 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6403 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6404 // Store to allocator. 6405 CGF.EmitVarDecl(*cast<VarDecl>( 6406 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6407 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6408 AllocatorVal = 6409 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6410 Allocator->getType(), Allocator->getExprLoc()); 6411 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6412 } 6413 6414 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6415 const Expr *Allocator) { 6416 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6417 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6418 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6419 llvm::Value *AllocatorVal = 6420 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6421 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6422 CGF.getContext().VoidPtrTy, 6423 Allocator->getExprLoc()); 6424 (void)CGF.EmitRuntimeCall( 6425 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6426 OMPRTL___kmpc_destroy_allocator), 6427 {ThreadId, AllocatorVal}); 6428 } 6429 6430 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6431 const OMPExecutableDirective &D, StringRef ParentName, 6432 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6433 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6434 // Create a unique name for the entry function using the source location 6435 // information of the current target region. The name will be something like: 6436 // 6437 // __omp_offloading_DD_FFFF_PP_lBB 6438 // 6439 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6440 // mangled name of the function that encloses the target region and BB is the 6441 // line number of the target region. 6442 6443 unsigned DeviceID; 6444 unsigned FileID; 6445 unsigned Line; 6446 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6447 Line); 6448 SmallString<64> EntryFnName; 6449 { 6450 llvm::raw_svector_ostream OS(EntryFnName); 6451 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6452 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6453 } 6454 6455 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6456 6457 CodeGenFunction CGF(CGM, true); 6458 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6459 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6460 6461 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6462 6463 // If this target outline function is not an offload entry, we don't need to 6464 // register it. 6465 if (!IsOffloadEntry) 6466 return; 6467 6468 // The target region ID is used by the runtime library to identify the current 6469 // target region, so it only has to be unique and not necessarily point to 6470 // anything. It could be the pointer to the outlined function that implements 6471 // the target region, but we aren't using that so that the compiler doesn't 6472 // need to keep that, and could therefore inline the host function if proven 6473 // worthwhile during optimization. In the other hand, if emitting code for the 6474 // device, the ID has to be the function address so that it can retrieved from 6475 // the offloading entry and launched by the runtime library. We also mark the 6476 // outlined function to have external linkage in case we are emitting code for 6477 // the device, because these functions will be entry points to the device. 6478 6479 if (CGM.getLangOpts().OpenMPIsDevice) { 6480 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6481 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6482 OutlinedFn->setDSOLocal(false); 6483 } else { 6484 std::string Name = getName({EntryFnName, "region_id"}); 6485 OutlinedFnID = new llvm::GlobalVariable( 6486 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6487 llvm::GlobalValue::WeakAnyLinkage, 6488 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6489 } 6490 6491 // Register the information for the entry associated with this target region. 6492 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6493 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6494 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6495 } 6496 6497 /// Checks if the expression is constant or does not have non-trivial function 6498 /// calls. 6499 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6500 // We can skip constant expressions. 6501 // We can skip expressions with trivial calls or simple expressions. 6502 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6503 !E->hasNonTrivialCall(Ctx)) && 6504 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6505 } 6506 6507 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6508 const Stmt *Body) { 6509 const Stmt *Child = Body->IgnoreContainers(); 6510 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6511 Child = nullptr; 6512 for (const Stmt *S : C->body()) { 6513 if (const auto *E = dyn_cast<Expr>(S)) { 6514 if (isTrivial(Ctx, E)) 6515 continue; 6516 } 6517 // Some of the statements can be ignored. 6518 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6519 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6520 continue; 6521 // Analyze declarations. 6522 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6523 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6524 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6525 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6526 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6527 isa<UsingDirectiveDecl>(D) || 6528 isa<OMPDeclareReductionDecl>(D) || 6529 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6530 return true; 6531 const auto *VD = dyn_cast<VarDecl>(D); 6532 if (!VD) 6533 return false; 6534 return VD->isConstexpr() || 6535 ((VD->getType().isTrivialType(Ctx) || 6536 VD->getType()->isReferenceType()) && 6537 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6538 })) 6539 continue; 6540 } 6541 // Found multiple children - cannot get the one child only. 6542 if (Child) 6543 return nullptr; 6544 Child = S; 6545 } 6546 if (Child) 6547 Child = Child->IgnoreContainers(); 6548 } 6549 return Child; 6550 } 6551 6552 /// Emit the number of teams for a target directive. Inspect the num_teams 6553 /// clause associated with a teams construct combined or closely nested 6554 /// with the target directive. 6555 /// 6556 /// Emit a team of size one for directives such as 'target parallel' that 6557 /// have no associated teams construct. 6558 /// 6559 /// Otherwise, return nullptr. 6560 static llvm::Value * 6561 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6562 const OMPExecutableDirective &D) { 6563 assert(!CGF.getLangOpts().OpenMPIsDevice && 6564 "Clauses associated with the teams directive expected to be emitted " 6565 "only for the host!"); 6566 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6567 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6568 "Expected target-based executable directive."); 6569 CGBuilderTy &Bld = CGF.Builder; 6570 switch (DirectiveKind) { 6571 case OMPD_target: { 6572 const auto *CS = D.getInnermostCapturedStmt(); 6573 const auto *Body = 6574 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6575 const Stmt *ChildStmt = 6576 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6577 if (const auto *NestedDir = 6578 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6579 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6580 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6581 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6582 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6583 const Expr *NumTeams = 6584 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6585 llvm::Value *NumTeamsVal = 6586 CGF.EmitScalarExpr(NumTeams, 6587 /*IgnoreResultAssign*/ true); 6588 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6589 /*isSigned=*/true); 6590 } 6591 return Bld.getInt32(0); 6592 } 6593 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6594 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6595 return Bld.getInt32(1); 6596 return Bld.getInt32(0); 6597 } 6598 return nullptr; 6599 } 6600 case OMPD_target_teams: 6601 case OMPD_target_teams_distribute: 6602 case OMPD_target_teams_distribute_simd: 6603 case OMPD_target_teams_distribute_parallel_for: 6604 case OMPD_target_teams_distribute_parallel_for_simd: { 6605 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6606 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6607 const Expr *NumTeams = 6608 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6609 llvm::Value *NumTeamsVal = 6610 CGF.EmitScalarExpr(NumTeams, 6611 /*IgnoreResultAssign*/ true); 6612 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6613 /*isSigned=*/true); 6614 } 6615 return Bld.getInt32(0); 6616 } 6617 case OMPD_target_parallel: 6618 case OMPD_target_parallel_for: 6619 case OMPD_target_parallel_for_simd: 6620 case OMPD_target_simd: 6621 return Bld.getInt32(1); 6622 case OMPD_parallel: 6623 case OMPD_for: 6624 case OMPD_parallel_for: 6625 case OMPD_parallel_master: 6626 case OMPD_parallel_sections: 6627 case OMPD_for_simd: 6628 case OMPD_parallel_for_simd: 6629 case OMPD_cancel: 6630 case OMPD_cancellation_point: 6631 case OMPD_ordered: 6632 case OMPD_threadprivate: 6633 case OMPD_allocate: 6634 case OMPD_task: 6635 case OMPD_simd: 6636 case OMPD_sections: 6637 case OMPD_section: 6638 case OMPD_single: 6639 case OMPD_master: 6640 case OMPD_critical: 6641 case OMPD_taskyield: 6642 case OMPD_barrier: 6643 case OMPD_taskwait: 6644 case OMPD_taskgroup: 6645 case OMPD_atomic: 6646 case OMPD_flush: 6647 case OMPD_depobj: 6648 case OMPD_scan: 6649 case OMPD_teams: 6650 case OMPD_target_data: 6651 case OMPD_target_exit_data: 6652 case OMPD_target_enter_data: 6653 case OMPD_distribute: 6654 case OMPD_distribute_simd: 6655 case OMPD_distribute_parallel_for: 6656 case OMPD_distribute_parallel_for_simd: 6657 case OMPD_teams_distribute: 6658 case OMPD_teams_distribute_simd: 6659 case OMPD_teams_distribute_parallel_for: 6660 case OMPD_teams_distribute_parallel_for_simd: 6661 case OMPD_target_update: 6662 case OMPD_declare_simd: 6663 case OMPD_declare_variant: 6664 case OMPD_begin_declare_variant: 6665 case OMPD_end_declare_variant: 6666 case OMPD_declare_target: 6667 case OMPD_end_declare_target: 6668 case OMPD_declare_reduction: 6669 case OMPD_declare_mapper: 6670 case OMPD_taskloop: 6671 case OMPD_taskloop_simd: 6672 case OMPD_master_taskloop: 6673 case OMPD_master_taskloop_simd: 6674 case OMPD_parallel_master_taskloop: 6675 case OMPD_parallel_master_taskloop_simd: 6676 case OMPD_requires: 6677 case OMPD_unknown: 6678 break; 6679 default: 6680 break; 6681 } 6682 llvm_unreachable("Unexpected directive kind."); 6683 } 6684 6685 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6686 llvm::Value *DefaultThreadLimitVal) { 6687 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6688 CGF.getContext(), CS->getCapturedStmt()); 6689 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6690 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6691 llvm::Value *NumThreads = nullptr; 6692 llvm::Value *CondVal = nullptr; 6693 // Handle if clause. If if clause present, the number of threads is 6694 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6695 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6696 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6697 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6698 const OMPIfClause *IfClause = nullptr; 6699 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6700 if (C->getNameModifier() == OMPD_unknown || 6701 C->getNameModifier() == OMPD_parallel) { 6702 IfClause = C; 6703 break; 6704 } 6705 } 6706 if (IfClause) { 6707 const Expr *Cond = IfClause->getCondition(); 6708 bool Result; 6709 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6710 if (!Result) 6711 return CGF.Builder.getInt32(1); 6712 } else { 6713 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6714 if (const auto *PreInit = 6715 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6716 for (const auto *I : PreInit->decls()) { 6717 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6718 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6719 } else { 6720 CodeGenFunction::AutoVarEmission Emission = 6721 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6722 CGF.EmitAutoVarCleanups(Emission); 6723 } 6724 } 6725 } 6726 CondVal = CGF.EvaluateExprAsBool(Cond); 6727 } 6728 } 6729 } 6730 // Check the value of num_threads clause iff if clause was not specified 6731 // or is not evaluated to false. 6732 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6733 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6734 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6735 const auto *NumThreadsClause = 6736 Dir->getSingleClause<OMPNumThreadsClause>(); 6737 CodeGenFunction::LexicalScope Scope( 6738 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6739 if (const auto *PreInit = 6740 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6741 for (const auto *I : PreInit->decls()) { 6742 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6743 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6744 } else { 6745 CodeGenFunction::AutoVarEmission Emission = 6746 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6747 CGF.EmitAutoVarCleanups(Emission); 6748 } 6749 } 6750 } 6751 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6752 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6753 /*isSigned=*/false); 6754 if (DefaultThreadLimitVal) 6755 NumThreads = CGF.Builder.CreateSelect( 6756 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6757 DefaultThreadLimitVal, NumThreads); 6758 } else { 6759 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6760 : CGF.Builder.getInt32(0); 6761 } 6762 // Process condition of the if clause. 6763 if (CondVal) { 6764 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6765 CGF.Builder.getInt32(1)); 6766 } 6767 return NumThreads; 6768 } 6769 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6770 return CGF.Builder.getInt32(1); 6771 return DefaultThreadLimitVal; 6772 } 6773 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6774 : CGF.Builder.getInt32(0); 6775 } 6776 6777 /// Emit the number of threads for a target directive. Inspect the 6778 /// thread_limit clause associated with a teams construct combined or closely 6779 /// nested with the target directive. 6780 /// 6781 /// Emit the num_threads clause for directives such as 'target parallel' that 6782 /// have no associated teams construct. 6783 /// 6784 /// Otherwise, return nullptr. 6785 static llvm::Value * 6786 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6787 const OMPExecutableDirective &D) { 6788 assert(!CGF.getLangOpts().OpenMPIsDevice && 6789 "Clauses associated with the teams directive expected to be emitted " 6790 "only for the host!"); 6791 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6792 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6793 "Expected target-based executable directive."); 6794 CGBuilderTy &Bld = CGF.Builder; 6795 llvm::Value *ThreadLimitVal = nullptr; 6796 llvm::Value *NumThreadsVal = nullptr; 6797 switch (DirectiveKind) { 6798 case OMPD_target: { 6799 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6800 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6801 return NumThreads; 6802 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6803 CGF.getContext(), CS->getCapturedStmt()); 6804 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6805 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6806 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6807 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6808 const auto *ThreadLimitClause = 6809 Dir->getSingleClause<OMPThreadLimitClause>(); 6810 CodeGenFunction::LexicalScope Scope( 6811 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6812 if (const auto *PreInit = 6813 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6814 for (const auto *I : PreInit->decls()) { 6815 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6816 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6817 } else { 6818 CodeGenFunction::AutoVarEmission Emission = 6819 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6820 CGF.EmitAutoVarCleanups(Emission); 6821 } 6822 } 6823 } 6824 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6825 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6826 ThreadLimitVal = 6827 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6828 } 6829 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6830 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6831 CS = Dir->getInnermostCapturedStmt(); 6832 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6833 CGF.getContext(), CS->getCapturedStmt()); 6834 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6835 } 6836 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6837 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6838 CS = Dir->getInnermostCapturedStmt(); 6839 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6840 return NumThreads; 6841 } 6842 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6843 return Bld.getInt32(1); 6844 } 6845 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6846 } 6847 case OMPD_target_teams: { 6848 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6849 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6850 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6851 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6852 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6853 ThreadLimitVal = 6854 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6855 } 6856 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6857 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6858 return NumThreads; 6859 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6860 CGF.getContext(), CS->getCapturedStmt()); 6861 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6862 if (Dir->getDirectiveKind() == OMPD_distribute) { 6863 CS = Dir->getInnermostCapturedStmt(); 6864 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6865 return NumThreads; 6866 } 6867 } 6868 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6869 } 6870 case OMPD_target_teams_distribute: 6871 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6872 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6873 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6874 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6875 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6876 ThreadLimitVal = 6877 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6878 } 6879 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6880 case OMPD_target_parallel: 6881 case OMPD_target_parallel_for: 6882 case OMPD_target_parallel_for_simd: 6883 case OMPD_target_teams_distribute_parallel_for: 6884 case OMPD_target_teams_distribute_parallel_for_simd: { 6885 llvm::Value *CondVal = nullptr; 6886 // Handle if clause. If if clause present, the number of threads is 6887 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6888 if (D.hasClausesOfKind<OMPIfClause>()) { 6889 const OMPIfClause *IfClause = nullptr; 6890 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6891 if (C->getNameModifier() == OMPD_unknown || 6892 C->getNameModifier() == OMPD_parallel) { 6893 IfClause = C; 6894 break; 6895 } 6896 } 6897 if (IfClause) { 6898 const Expr *Cond = IfClause->getCondition(); 6899 bool Result; 6900 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6901 if (!Result) 6902 return Bld.getInt32(1); 6903 } else { 6904 CodeGenFunction::RunCleanupsScope Scope(CGF); 6905 CondVal = CGF.EvaluateExprAsBool(Cond); 6906 } 6907 } 6908 } 6909 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6910 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6911 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6912 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6913 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6914 ThreadLimitVal = 6915 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6916 } 6917 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6918 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6919 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6920 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6921 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6922 NumThreadsVal = 6923 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6924 ThreadLimitVal = ThreadLimitVal 6925 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6926 ThreadLimitVal), 6927 NumThreadsVal, ThreadLimitVal) 6928 : NumThreadsVal; 6929 } 6930 if (!ThreadLimitVal) 6931 ThreadLimitVal = Bld.getInt32(0); 6932 if (CondVal) 6933 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6934 return ThreadLimitVal; 6935 } 6936 case OMPD_target_teams_distribute_simd: 6937 case OMPD_target_simd: 6938 return Bld.getInt32(1); 6939 case OMPD_parallel: 6940 case OMPD_for: 6941 case OMPD_parallel_for: 6942 case OMPD_parallel_master: 6943 case OMPD_parallel_sections: 6944 case OMPD_for_simd: 6945 case OMPD_parallel_for_simd: 6946 case OMPD_cancel: 6947 case OMPD_cancellation_point: 6948 case OMPD_ordered: 6949 case OMPD_threadprivate: 6950 case OMPD_allocate: 6951 case OMPD_task: 6952 case OMPD_simd: 6953 case OMPD_sections: 6954 case OMPD_section: 6955 case OMPD_single: 6956 case OMPD_master: 6957 case OMPD_critical: 6958 case OMPD_taskyield: 6959 case OMPD_barrier: 6960 case OMPD_taskwait: 6961 case OMPD_taskgroup: 6962 case OMPD_atomic: 6963 case OMPD_flush: 6964 case OMPD_depobj: 6965 case OMPD_scan: 6966 case OMPD_teams: 6967 case OMPD_target_data: 6968 case OMPD_target_exit_data: 6969 case OMPD_target_enter_data: 6970 case OMPD_distribute: 6971 case OMPD_distribute_simd: 6972 case OMPD_distribute_parallel_for: 6973 case OMPD_distribute_parallel_for_simd: 6974 case OMPD_teams_distribute: 6975 case OMPD_teams_distribute_simd: 6976 case OMPD_teams_distribute_parallel_for: 6977 case OMPD_teams_distribute_parallel_for_simd: 6978 case OMPD_target_update: 6979 case OMPD_declare_simd: 6980 case OMPD_declare_variant: 6981 case OMPD_begin_declare_variant: 6982 case OMPD_end_declare_variant: 6983 case OMPD_declare_target: 6984 case OMPD_end_declare_target: 6985 case OMPD_declare_reduction: 6986 case OMPD_declare_mapper: 6987 case OMPD_taskloop: 6988 case OMPD_taskloop_simd: 6989 case OMPD_master_taskloop: 6990 case OMPD_master_taskloop_simd: 6991 case OMPD_parallel_master_taskloop: 6992 case OMPD_parallel_master_taskloop_simd: 6993 case OMPD_requires: 6994 case OMPD_unknown: 6995 break; 6996 default: 6997 break; 6998 } 6999 llvm_unreachable("Unsupported directive kind."); 7000 } 7001 7002 namespace { 7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7004 7005 // Utility to handle information from clauses associated with a given 7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7007 // It provides a convenient interface to obtain the information and generate 7008 // code for that information. 7009 class MappableExprsHandler { 7010 public: 7011 /// Values for bit flags used to specify the mapping type for 7012 /// offloading. 7013 enum OpenMPOffloadMappingFlags : uint64_t { 7014 /// No flags 7015 OMP_MAP_NONE = 0x0, 7016 /// Allocate memory on the device and move data from host to device. 7017 OMP_MAP_TO = 0x01, 7018 /// Allocate memory on the device and move data from device to host. 7019 OMP_MAP_FROM = 0x02, 7020 /// Always perform the requested mapping action on the element, even 7021 /// if it was already mapped before. 7022 OMP_MAP_ALWAYS = 0x04, 7023 /// Delete the element from the device environment, ignoring the 7024 /// current reference count associated with the element. 7025 OMP_MAP_DELETE = 0x08, 7026 /// The element being mapped is a pointer-pointee pair; both the 7027 /// pointer and the pointee should be mapped. 7028 OMP_MAP_PTR_AND_OBJ = 0x10, 7029 /// This flags signals that the base address of an entry should be 7030 /// passed to the target kernel as an argument. 7031 OMP_MAP_TARGET_PARAM = 0x20, 7032 /// Signal that the runtime library has to return the device pointer 7033 /// in the current position for the data being mapped. Used when we have the 7034 /// use_device_ptr or use_device_addr clause. 7035 OMP_MAP_RETURN_PARAM = 0x40, 7036 /// This flag signals that the reference being passed is a pointer to 7037 /// private data. 7038 OMP_MAP_PRIVATE = 0x80, 7039 /// Pass the element to the device by value. 7040 OMP_MAP_LITERAL = 0x100, 7041 /// Implicit map 7042 OMP_MAP_IMPLICIT = 0x200, 7043 /// Close is a hint to the runtime to allocate memory close to 7044 /// the target device. 7045 OMP_MAP_CLOSE = 0x400, 7046 /// 0x800 is reserved for compatibility with XLC. 7047 /// Produce a runtime error if the data is not already allocated. 7048 OMP_MAP_PRESENT = 0x1000, 7049 /// The 16 MSBs of the flags indicate whether the entry is member of some 7050 /// struct/class. 7051 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7052 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7053 }; 7054 7055 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7056 static unsigned getFlagMemberOffset() { 7057 unsigned Offset = 0; 7058 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7059 Remain = Remain >> 1) 7060 Offset++; 7061 return Offset; 7062 } 7063 7064 /// Class that associates information with a base pointer to be passed to the 7065 /// runtime library. 7066 class BasePointerInfo { 7067 /// The base pointer. 7068 llvm::Value *Ptr = nullptr; 7069 /// The base declaration that refers to this device pointer, or null if 7070 /// there is none. 7071 const ValueDecl *DevPtrDecl = nullptr; 7072 7073 public: 7074 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7075 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7076 llvm::Value *operator*() const { return Ptr; } 7077 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7078 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7079 }; 7080 7081 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7082 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7083 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7084 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7085 7086 /// This structure contains combined information generated for mappable 7087 /// clauses, including base pointers, pointers, sizes, map types, and 7088 /// user-defined mappers. 7089 struct MapCombinedInfoTy { 7090 MapBaseValuesArrayTy BasePointers; 7091 MapValuesArrayTy Pointers; 7092 MapValuesArrayTy Sizes; 7093 MapFlagsArrayTy Types; 7094 MapMappersArrayTy Mappers; 7095 7096 /// Append arrays in \a CurInfo. 7097 void append(MapCombinedInfoTy &CurInfo) { 7098 BasePointers.append(CurInfo.BasePointers.begin(), 7099 CurInfo.BasePointers.end()); 7100 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7101 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7102 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7103 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7104 } 7105 }; 7106 7107 /// Map between a struct and the its lowest & highest elements which have been 7108 /// mapped. 7109 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7110 /// HE(FieldIndex, Pointer)} 7111 struct StructRangeInfoTy { 7112 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7113 0, Address::invalid()}; 7114 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7115 0, Address::invalid()}; 7116 Address Base = Address::invalid(); 7117 }; 7118 7119 private: 7120 /// Kind that defines how a device pointer has to be returned. 7121 struct MapInfo { 7122 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7123 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7124 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7125 bool ReturnDevicePointer = false; 7126 bool IsImplicit = false; 7127 const ValueDecl *Mapper = nullptr; 7128 bool ForDeviceAddr = false; 7129 7130 MapInfo() = default; 7131 MapInfo( 7132 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7133 OpenMPMapClauseKind MapType, 7134 ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, 7135 bool IsImplicit, const ValueDecl *Mapper = nullptr, 7136 bool ForDeviceAddr = false) 7137 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7138 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7139 Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {} 7140 }; 7141 7142 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7143 /// member and there is no map information about it, then emission of that 7144 /// entry is deferred until the whole struct has been processed. 7145 struct DeferredDevicePtrEntryTy { 7146 const Expr *IE = nullptr; 7147 const ValueDecl *VD = nullptr; 7148 bool ForDeviceAddr = false; 7149 7150 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7151 bool ForDeviceAddr) 7152 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7153 }; 7154 7155 /// The target directive from where the mappable clauses were extracted. It 7156 /// is either a executable directive or a user-defined mapper directive. 7157 llvm::PointerUnion<const OMPExecutableDirective *, 7158 const OMPDeclareMapperDecl *> 7159 CurDir; 7160 7161 /// Function the directive is being generated for. 7162 CodeGenFunction &CGF; 7163 7164 /// Set of all first private variables in the current directive. 7165 /// bool data is set to true if the variable is implicitly marked as 7166 /// firstprivate, false otherwise. 7167 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7168 7169 /// Map between device pointer declarations and their expression components. 7170 /// The key value for declarations in 'this' is null. 7171 llvm::DenseMap< 7172 const ValueDecl *, 7173 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7174 DevPointersMap; 7175 7176 llvm::Value *getExprTypeSize(const Expr *E) const { 7177 QualType ExprTy = E->getType().getCanonicalType(); 7178 7179 // Calculate the size for array shaping expression. 7180 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7181 llvm::Value *Size = 7182 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7183 for (const Expr *SE : OAE->getDimensions()) { 7184 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7185 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7186 CGF.getContext().getSizeType(), 7187 SE->getExprLoc()); 7188 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7189 } 7190 return Size; 7191 } 7192 7193 // Reference types are ignored for mapping purposes. 7194 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7195 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7196 7197 // Given that an array section is considered a built-in type, we need to 7198 // do the calculation based on the length of the section instead of relying 7199 // on CGF.getTypeSize(E->getType()). 7200 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7201 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7202 OAE->getBase()->IgnoreParenImpCasts()) 7203 .getCanonicalType(); 7204 7205 // If there is no length associated with the expression and lower bound is 7206 // not specified too, that means we are using the whole length of the 7207 // base. 7208 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7209 !OAE->getLowerBound()) 7210 return CGF.getTypeSize(BaseTy); 7211 7212 llvm::Value *ElemSize; 7213 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7214 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7215 } else { 7216 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7217 assert(ATy && "Expecting array type if not a pointer type."); 7218 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7219 } 7220 7221 // If we don't have a length at this point, that is because we have an 7222 // array section with a single element. 7223 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7224 return ElemSize; 7225 7226 if (const Expr *LenExpr = OAE->getLength()) { 7227 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7228 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7229 CGF.getContext().getSizeType(), 7230 LenExpr->getExprLoc()); 7231 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7232 } 7233 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7234 OAE->getLowerBound() && "expected array_section[lb:]."); 7235 // Size = sizetype - lb * elemtype; 7236 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7237 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7238 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7239 CGF.getContext().getSizeType(), 7240 OAE->getLowerBound()->getExprLoc()); 7241 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7242 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7243 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7244 LengthVal = CGF.Builder.CreateSelect( 7245 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7246 return LengthVal; 7247 } 7248 return CGF.getTypeSize(ExprTy); 7249 } 7250 7251 /// Return the corresponding bits for a given map clause modifier. Add 7252 /// a flag marking the map as a pointer if requested. Add a flag marking the 7253 /// map as the first one of a series of maps that relate to the same map 7254 /// expression. 7255 OpenMPOffloadMappingFlags getMapTypeBits( 7256 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7257 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7258 OpenMPOffloadMappingFlags Bits = 7259 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7260 switch (MapType) { 7261 case OMPC_MAP_alloc: 7262 case OMPC_MAP_release: 7263 // alloc and release is the default behavior in the runtime library, i.e. 7264 // if we don't pass any bits alloc/release that is what the runtime is 7265 // going to do. Therefore, we don't need to signal anything for these two 7266 // type modifiers. 7267 break; 7268 case OMPC_MAP_to: 7269 Bits |= OMP_MAP_TO; 7270 break; 7271 case OMPC_MAP_from: 7272 Bits |= OMP_MAP_FROM; 7273 break; 7274 case OMPC_MAP_tofrom: 7275 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7276 break; 7277 case OMPC_MAP_delete: 7278 Bits |= OMP_MAP_DELETE; 7279 break; 7280 case OMPC_MAP_unknown: 7281 llvm_unreachable("Unexpected map type!"); 7282 } 7283 if (AddPtrFlag) 7284 Bits |= OMP_MAP_PTR_AND_OBJ; 7285 if (AddIsTargetParamFlag) 7286 Bits |= OMP_MAP_TARGET_PARAM; 7287 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7288 != MapModifiers.end()) 7289 Bits |= OMP_MAP_ALWAYS; 7290 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7291 != MapModifiers.end()) 7292 Bits |= OMP_MAP_CLOSE; 7293 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7294 != MapModifiers.end()) 7295 Bits |= OMP_MAP_PRESENT; 7296 return Bits; 7297 } 7298 7299 /// Return true if the provided expression is a final array section. A 7300 /// final array section, is one whose length can't be proved to be one. 7301 bool isFinalArraySectionExpression(const Expr *E) const { 7302 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7303 7304 // It is not an array section and therefore not a unity-size one. 7305 if (!OASE) 7306 return false; 7307 7308 // An array section with no colon always refer to a single element. 7309 if (OASE->getColonLocFirst().isInvalid()) 7310 return false; 7311 7312 const Expr *Length = OASE->getLength(); 7313 7314 // If we don't have a length we have to check if the array has size 1 7315 // for this dimension. Also, we should always expect a length if the 7316 // base type is pointer. 7317 if (!Length) { 7318 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7319 OASE->getBase()->IgnoreParenImpCasts()) 7320 .getCanonicalType(); 7321 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7322 return ATy->getSize().getSExtValue() != 1; 7323 // If we don't have a constant dimension length, we have to consider 7324 // the current section as having any size, so it is not necessarily 7325 // unitary. If it happen to be unity size, that's user fault. 7326 return true; 7327 } 7328 7329 // Check if the length evaluates to 1. 7330 Expr::EvalResult Result; 7331 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7332 return true; // Can have more that size 1. 7333 7334 llvm::APSInt ConstLength = Result.Val.getInt(); 7335 return ConstLength.getSExtValue() != 1; 7336 } 7337 7338 /// Generate the base pointers, section pointers, sizes, map type bits, and 7339 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7340 /// map type, map modifier, and expression components. \a IsFirstComponent 7341 /// should be set to true if the provided set of components is the first 7342 /// associated with a capture. 7343 void generateInfoForComponentList( 7344 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7345 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7346 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7347 bool IsFirstComponentList, bool IsImplicit, 7348 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7349 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7350 OverlappedElements = llvm::None) const { 7351 // The following summarizes what has to be generated for each map and the 7352 // types below. The generated information is expressed in this order: 7353 // base pointer, section pointer, size, flags 7354 // (to add to the ones that come from the map type and modifier). 7355 // 7356 // double d; 7357 // int i[100]; 7358 // float *p; 7359 // 7360 // struct S1 { 7361 // int i; 7362 // float f[50]; 7363 // } 7364 // struct S2 { 7365 // int i; 7366 // float f[50]; 7367 // S1 s; 7368 // double *p; 7369 // struct S2 *ps; 7370 // } 7371 // S2 s; 7372 // S2 *ps; 7373 // 7374 // map(d) 7375 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7376 // 7377 // map(i) 7378 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7379 // 7380 // map(i[1:23]) 7381 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7382 // 7383 // map(p) 7384 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7385 // 7386 // map(p[1:24]) 7387 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7388 // for data directives 7389 // p, p, sizeof(float*), TARGET_PARAM | TO | FROM 7390 // p, &p[1], 24*sizeof(float), PTR_AND_OBJ | TO | FROM 7391 // 7392 // map(s) 7393 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7394 // 7395 // map(s.i) 7396 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7397 // 7398 // map(s.s.f) 7399 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7400 // 7401 // map(s.p) 7402 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7403 // 7404 // map(to: s.p[:22]) 7405 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7406 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7407 // &(s.p), &(s.p[0]), 22*sizeof(double), 7408 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7409 // (*) alloc space for struct members, only this is a target parameter 7410 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7411 // optimizes this entry out, same in the examples below) 7412 // (***) map the pointee (map: to) 7413 // 7414 // map(s.ps) 7415 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7416 // 7417 // map(from: s.ps->s.i) 7418 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7419 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7420 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7421 // 7422 // map(to: s.ps->ps) 7423 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7424 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7425 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7426 // 7427 // map(s.ps->ps->ps) 7428 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7429 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7430 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7431 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7432 // 7433 // map(to: s.ps->ps->s.f[:22]) 7434 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7435 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7436 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7437 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7438 // 7439 // map(ps) 7440 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7441 // 7442 // map(ps->i) 7443 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7444 // 7445 // map(ps->s.f) 7446 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7447 // 7448 // map(from: ps->p) 7449 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7450 // 7451 // map(to: ps->p[:22]) 7452 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7453 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7454 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7455 // 7456 // map(ps->ps) 7457 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7458 // 7459 // map(from: ps->ps->s.i) 7460 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7461 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7462 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7463 // 7464 // map(from: ps->ps->ps) 7465 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7466 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7467 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7468 // 7469 // map(ps->ps->ps->ps) 7470 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7471 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7472 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7473 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7474 // 7475 // map(to: ps->ps->ps->s.f[:22]) 7476 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7477 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7478 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7479 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7480 // 7481 // map(to: s.f[:22]) map(from: s.p[:33]) 7482 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7483 // sizeof(double*) (**), TARGET_PARAM 7484 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7485 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7486 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7487 // (*) allocate contiguous space needed to fit all mapped members even if 7488 // we allocate space for members not mapped (in this example, 7489 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7490 // them as well because they fall between &s.f[0] and &s.p) 7491 // 7492 // map(from: s.f[:22]) map(to: ps->p[:33]) 7493 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7494 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7495 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7496 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7497 // (*) the struct this entry pertains to is the 2nd element in the list of 7498 // arguments, hence MEMBER_OF(2) 7499 // 7500 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7501 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7502 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7503 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7504 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7505 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7506 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7507 // (*) the struct this entry pertains to is the 4th element in the list 7508 // of arguments, hence MEMBER_OF(4) 7509 7510 // Track if the map information being generated is the first for a capture. 7511 bool IsCaptureFirstInfo = IsFirstComponentList; 7512 // When the variable is on a declare target link or in a to clause with 7513 // unified memory, a reference is needed to hold the host/device address 7514 // of the variable. 7515 bool RequiresReference = false; 7516 7517 // Scan the components from the base to the complete expression. 7518 auto CI = Components.rbegin(); 7519 auto CE = Components.rend(); 7520 auto I = CI; 7521 7522 // Track if the map information being generated is the first for a list of 7523 // components. 7524 bool IsExpressionFirstInfo = true; 7525 Address BP = Address::invalid(); 7526 const Expr *AssocExpr = I->getAssociatedExpression(); 7527 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7528 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7529 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7530 7531 if (isa<MemberExpr>(AssocExpr)) { 7532 // The base is the 'this' pointer. The content of the pointer is going 7533 // to be the base of the field being mapped. 7534 BP = CGF.LoadCXXThisAddress(); 7535 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7536 (OASE && 7537 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7538 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7539 } else if (OAShE && 7540 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7541 BP = Address( 7542 CGF.EmitScalarExpr(OAShE->getBase()), 7543 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7544 } else { 7545 // The base is the reference to the variable. 7546 // BP = &Var. 7547 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7548 if (const auto *VD = 7549 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7550 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7551 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7552 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7553 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7554 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7555 RequiresReference = true; 7556 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7557 } 7558 } 7559 } 7560 7561 // If the variable is a pointer and is being dereferenced (i.e. is not 7562 // the last component), the base has to be the pointer itself, not its 7563 // reference. References are ignored for mapping purposes. 7564 QualType Ty = 7565 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7566 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7567 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7568 7569 // For non-data directives, we do not need to generate individual map 7570 // information for the pointer, it can be associated with the combined 7571 // storage. 7572 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7573 !CurDir.is<const OMPExecutableDirective *>() || 7574 !isOpenMPTargetDataManagementDirective( 7575 CurDir.get<const OMPExecutableDirective *>() 7576 ->getDirectiveKind())) 7577 ++I; 7578 } 7579 } 7580 7581 // Track whether a component of the list should be marked as MEMBER_OF some 7582 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7583 // in a component list should be marked as MEMBER_OF, all subsequent entries 7584 // do not belong to the base struct. E.g. 7585 // struct S2 s; 7586 // s.ps->ps->ps->f[:] 7587 // (1) (2) (3) (4) 7588 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7589 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7590 // is the pointee of ps(2) which is not member of struct s, so it should not 7591 // be marked as such (it is still PTR_AND_OBJ). 7592 // The variable is initialized to false so that PTR_AND_OBJ entries which 7593 // are not struct members are not considered (e.g. array of pointers to 7594 // data). 7595 bool ShouldBeMemberOf = false; 7596 7597 // Variable keeping track of whether or not we have encountered a component 7598 // in the component list which is a member expression. Useful when we have a 7599 // pointer or a final array section, in which case it is the previous 7600 // component in the list which tells us whether we have a member expression. 7601 // E.g. X.f[:] 7602 // While processing the final array section "[:]" it is "f" which tells us 7603 // whether we are dealing with a member of a declared struct. 7604 const MemberExpr *EncounteredME = nullptr; 7605 7606 for (; I != CE; ++I) { 7607 // If the current component is member of a struct (parent struct) mark it. 7608 if (!EncounteredME) { 7609 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7610 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7611 // as MEMBER_OF the parent struct. 7612 if (EncounteredME) 7613 ShouldBeMemberOf = true; 7614 } 7615 7616 auto Next = std::next(I); 7617 7618 // We need to generate the addresses and sizes if this is the last 7619 // component, if the component is a pointer or if it is an array section 7620 // whose length can't be proved to be one. If this is a pointer, it 7621 // becomes the base address for the following components. 7622 7623 // A final array section, is one whose length can't be proved to be one. 7624 bool IsFinalArraySection = 7625 isFinalArraySectionExpression(I->getAssociatedExpression()); 7626 7627 // Get information on whether the element is a pointer. Have to do a 7628 // special treatment for array sections given that they are built-in 7629 // types. 7630 const auto *OASE = 7631 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7632 const auto *OAShE = 7633 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7634 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7635 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7636 bool IsPointer = 7637 OAShE || 7638 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7639 .getCanonicalType() 7640 ->isAnyPointerType()) || 7641 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7642 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7643 7644 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7645 // If this is not the last component, we expect the pointer to be 7646 // associated with an array expression or member expression. 7647 assert((Next == CE || 7648 isa<MemberExpr>(Next->getAssociatedExpression()) || 7649 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7650 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7651 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7652 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7653 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7654 "Unexpected expression"); 7655 7656 Address LB = Address::invalid(); 7657 if (OAShE) { 7658 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7659 CGF.getContext().getTypeAlignInChars( 7660 OAShE->getBase()->getType())); 7661 } else { 7662 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7663 .getAddress(CGF); 7664 } 7665 7666 // If this component is a pointer inside the base struct then we don't 7667 // need to create any entry for it - it will be combined with the object 7668 // it is pointing to into a single PTR_AND_OBJ entry. 7669 bool IsMemberPointerOrAddr = 7670 (IsPointer || ForDeviceAddr) && EncounteredME && 7671 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7672 EncounteredME); 7673 if (!OverlappedElements.empty()) { 7674 // Handle base element with the info for overlapped elements. 7675 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7676 assert(Next == CE && 7677 "Expected last element for the overlapped elements."); 7678 assert(!IsPointer && 7679 "Unexpected base element with the pointer type."); 7680 // Mark the whole struct as the struct that requires allocation on the 7681 // device. 7682 PartialStruct.LowestElem = {0, LB}; 7683 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7684 I->getAssociatedExpression()->getType()); 7685 Address HB = CGF.Builder.CreateConstGEP( 7686 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7687 CGF.VoidPtrTy), 7688 TypeSize.getQuantity() - 1); 7689 PartialStruct.HighestElem = { 7690 std::numeric_limits<decltype( 7691 PartialStruct.HighestElem.first)>::max(), 7692 HB}; 7693 PartialStruct.Base = BP; 7694 // Emit data for non-overlapped data. 7695 OpenMPOffloadMappingFlags Flags = 7696 OMP_MAP_MEMBER_OF | 7697 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7698 /*AddPtrFlag=*/false, 7699 /*AddIsTargetParamFlag=*/false); 7700 LB = BP; 7701 llvm::Value *Size = nullptr; 7702 // Do bitcopy of all non-overlapped structure elements. 7703 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7704 Component : OverlappedElements) { 7705 Address ComponentLB = Address::invalid(); 7706 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7707 Component) { 7708 if (MC.getAssociatedDeclaration()) { 7709 ComponentLB = 7710 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7711 .getAddress(CGF); 7712 Size = CGF.Builder.CreatePtrDiff( 7713 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7714 CGF.EmitCastToVoidPtr(LB.getPointer())); 7715 break; 7716 } 7717 } 7718 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7719 CombinedInfo.Pointers.push_back(LB.getPointer()); 7720 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7721 Size, CGF.Int64Ty, /*isSigned=*/true)); 7722 CombinedInfo.Types.push_back(Flags); 7723 CombinedInfo.Mappers.push_back(nullptr); 7724 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7725 } 7726 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7727 CombinedInfo.Pointers.push_back(LB.getPointer()); 7728 Size = CGF.Builder.CreatePtrDiff( 7729 CGF.EmitCastToVoidPtr( 7730 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7731 CGF.EmitCastToVoidPtr(LB.getPointer())); 7732 CombinedInfo.Sizes.push_back( 7733 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7734 CombinedInfo.Types.push_back(Flags); 7735 CombinedInfo.Mappers.push_back(nullptr); 7736 break; 7737 } 7738 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7739 if (!IsMemberPointerOrAddr) { 7740 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7741 CombinedInfo.Pointers.push_back(LB.getPointer()); 7742 CombinedInfo.Sizes.push_back( 7743 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7744 7745 // If Mapper is valid, the last component inherits the mapper. 7746 bool HasMapper = Mapper && Next == CE; 7747 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7748 7749 // We need to add a pointer flag for each map that comes from the 7750 // same expression except for the first one. We also need to signal 7751 // this map is the first one that relates with the current capture 7752 // (there is a set of entries for each capture). 7753 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7754 MapType, MapModifiers, IsImplicit, 7755 !IsExpressionFirstInfo || RequiresReference, 7756 IsCaptureFirstInfo && !RequiresReference); 7757 7758 if (!IsExpressionFirstInfo) { 7759 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7760 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7761 if (IsPointer) 7762 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7763 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7764 7765 if (ShouldBeMemberOf) { 7766 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7767 // should be later updated with the correct value of MEMBER_OF. 7768 Flags |= OMP_MAP_MEMBER_OF; 7769 // From now on, all subsequent PTR_AND_OBJ entries should not be 7770 // marked as MEMBER_OF. 7771 ShouldBeMemberOf = false; 7772 } 7773 } 7774 7775 CombinedInfo.Types.push_back(Flags); 7776 } 7777 7778 // If we have encountered a member expression so far, keep track of the 7779 // mapped member. If the parent is "*this", then the value declaration 7780 // is nullptr. 7781 if (EncounteredME) { 7782 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7783 unsigned FieldIndex = FD->getFieldIndex(); 7784 7785 // Update info about the lowest and highest elements for this struct 7786 if (!PartialStruct.Base.isValid()) { 7787 PartialStruct.LowestElem = {FieldIndex, LB}; 7788 if (IsFinalArraySection) { 7789 Address HB = 7790 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7791 .getAddress(CGF); 7792 PartialStruct.HighestElem = {FieldIndex, HB}; 7793 } else { 7794 PartialStruct.HighestElem = {FieldIndex, LB}; 7795 } 7796 PartialStruct.Base = BP; 7797 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7798 PartialStruct.LowestElem = {FieldIndex, LB}; 7799 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7800 PartialStruct.HighestElem = {FieldIndex, LB}; 7801 } 7802 } 7803 7804 // If we have a final array section, we are done with this expression. 7805 if (IsFinalArraySection) 7806 break; 7807 7808 // The pointer becomes the base for the next element. 7809 if (Next != CE) 7810 BP = LB; 7811 7812 IsExpressionFirstInfo = false; 7813 IsCaptureFirstInfo = false; 7814 } 7815 } 7816 } 7817 7818 /// Return the adjusted map modifiers if the declaration a capture refers to 7819 /// appears in a first-private clause. This is expected to be used only with 7820 /// directives that start with 'target'. 7821 MappableExprsHandler::OpenMPOffloadMappingFlags 7822 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7823 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7824 7825 // A first private variable captured by reference will use only the 7826 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7827 // declaration is known as first-private in this handler. 7828 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7829 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7830 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7831 return MappableExprsHandler::OMP_MAP_ALWAYS | 7832 MappableExprsHandler::OMP_MAP_TO; 7833 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7834 return MappableExprsHandler::OMP_MAP_TO | 7835 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7836 return MappableExprsHandler::OMP_MAP_PRIVATE | 7837 MappableExprsHandler::OMP_MAP_TO; 7838 } 7839 return MappableExprsHandler::OMP_MAP_TO | 7840 MappableExprsHandler::OMP_MAP_FROM; 7841 } 7842 7843 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7844 // Rotate by getFlagMemberOffset() bits. 7845 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7846 << getFlagMemberOffset()); 7847 } 7848 7849 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7850 OpenMPOffloadMappingFlags MemberOfFlag) { 7851 // If the entry is PTR_AND_OBJ but has not been marked with the special 7852 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7853 // marked as MEMBER_OF. 7854 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7855 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7856 return; 7857 7858 // Reset the placeholder value to prepare the flag for the assignment of the 7859 // proper MEMBER_OF value. 7860 Flags &= ~OMP_MAP_MEMBER_OF; 7861 Flags |= MemberOfFlag; 7862 } 7863 7864 void getPlainLayout(const CXXRecordDecl *RD, 7865 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7866 bool AsBase) const { 7867 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7868 7869 llvm::StructType *St = 7870 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7871 7872 unsigned NumElements = St->getNumElements(); 7873 llvm::SmallVector< 7874 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7875 RecordLayout(NumElements); 7876 7877 // Fill bases. 7878 for (const auto &I : RD->bases()) { 7879 if (I.isVirtual()) 7880 continue; 7881 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7882 // Ignore empty bases. 7883 if (Base->isEmpty() || CGF.getContext() 7884 .getASTRecordLayout(Base) 7885 .getNonVirtualSize() 7886 .isZero()) 7887 continue; 7888 7889 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7890 RecordLayout[FieldIndex] = Base; 7891 } 7892 // Fill in virtual bases. 7893 for (const auto &I : RD->vbases()) { 7894 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7895 // Ignore empty bases. 7896 if (Base->isEmpty()) 7897 continue; 7898 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7899 if (RecordLayout[FieldIndex]) 7900 continue; 7901 RecordLayout[FieldIndex] = Base; 7902 } 7903 // Fill in all the fields. 7904 assert(!RD->isUnion() && "Unexpected union."); 7905 for (const auto *Field : RD->fields()) { 7906 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7907 // will fill in later.) 7908 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7909 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7910 RecordLayout[FieldIndex] = Field; 7911 } 7912 } 7913 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7914 &Data : RecordLayout) { 7915 if (Data.isNull()) 7916 continue; 7917 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7918 getPlainLayout(Base, Layout, /*AsBase=*/true); 7919 else 7920 Layout.push_back(Data.get<const FieldDecl *>()); 7921 } 7922 } 7923 7924 public: 7925 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7926 : CurDir(&Dir), CGF(CGF) { 7927 // Extract firstprivate clause information. 7928 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7929 for (const auto *D : C->varlists()) 7930 FirstPrivateDecls.try_emplace( 7931 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7932 // Extract implicit firstprivates from uses_allocators clauses. 7933 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7934 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7935 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7936 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7937 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7938 /*Implicit=*/true); 7939 else if (const auto *VD = dyn_cast<VarDecl>( 7940 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7941 ->getDecl())) 7942 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7943 } 7944 } 7945 // Extract device pointer clause information. 7946 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7947 for (auto L : C->component_lists()) 7948 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 7949 } 7950 7951 /// Constructor for the declare mapper directive. 7952 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7953 : CurDir(&Dir), CGF(CGF) {} 7954 7955 /// Generate code for the combined entry if we have a partially mapped struct 7956 /// and take care of the mapping flags of the arguments corresponding to 7957 /// individual struct members. 7958 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 7959 MapFlagsArrayTy &CurTypes, 7960 const StructRangeInfoTy &PartialStruct) const { 7961 // Base is the base of the struct 7962 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 7963 // Pointer is the address of the lowest element 7964 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7965 CombinedInfo.Pointers.push_back(LB); 7966 // There should not be a mapper for a combined entry. 7967 CombinedInfo.Mappers.push_back(nullptr); 7968 // Size is (addr of {highest+1} element) - (addr of lowest element) 7969 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7970 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7971 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7972 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7973 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7974 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7975 /*isSigned=*/false); 7976 CombinedInfo.Sizes.push_back(Size); 7977 // Map type is always TARGET_PARAM 7978 CombinedInfo.Types.push_back(OMP_MAP_TARGET_PARAM); 7979 // If any element has the present modifier, then make sure the runtime 7980 // doesn't attempt to allocate the struct. 7981 if (CurTypes.end() != 7982 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 7983 return Type & OMP_MAP_PRESENT; 7984 })) 7985 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 7986 // Remove TARGET_PARAM flag from the first element 7987 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7988 7989 // All other current entries will be MEMBER_OF the combined entry 7990 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7991 // 0xFFFF in the MEMBER_OF field). 7992 OpenMPOffloadMappingFlags MemberOfFlag = 7993 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 7994 for (auto &M : CurTypes) 7995 setCorrectMemberOfFlag(M, MemberOfFlag); 7996 } 7997 7998 /// Generate all the base pointers, section pointers, sizes, map types, and 7999 /// mappers for the extracted mappable expressions (all included in \a 8000 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8001 /// pair of the relevant declaration and index where it occurs is appended to 8002 /// the device pointers info array. 8003 void generateAllInfo( 8004 MapCombinedInfoTy &CombinedInfo, 8005 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8006 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8007 // We have to process the component lists that relate with the same 8008 // declaration in a single chunk so that we can generate the map flags 8009 // correctly. Therefore, we organize all lists in a map. 8010 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8011 8012 // Helper function to fill the information map for the different supported 8013 // clauses. 8014 auto &&InfoGen = 8015 [&Info, &SkipVarSet]( 8016 const ValueDecl *D, 8017 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8018 OpenMPMapClauseKind MapType, 8019 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8020 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8021 bool ForDeviceAddr = false) { 8022 const ValueDecl *VD = 8023 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8024 if (SkipVarSet.count(VD)) 8025 return; 8026 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8027 IsImplicit, Mapper, ForDeviceAddr); 8028 }; 8029 8030 assert(CurDir.is<const OMPExecutableDirective *>() && 8031 "Expect a executable directive"); 8032 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8033 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8034 for (const auto L : C->component_lists()) { 8035 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8036 C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, 8037 C->isImplicit(), std::get<2>(L)); 8038 } 8039 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8040 for (const auto L : C->component_lists()) { 8041 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8042 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); 8043 } 8044 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8045 for (const auto L : C->component_lists()) { 8046 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8047 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); 8048 } 8049 8050 // Look at the use_device_ptr clause information and mark the existing map 8051 // entries as such. If there is no map information for an entry in the 8052 // use_device_ptr list, we create one with map type 'alloc' and zero size 8053 // section. It is the user fault if that was not mapped before. If there is 8054 // no map information and the pointer is a struct member, then we defer the 8055 // emission of that entry until the whole struct has been processed. 8056 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8057 DeferredInfo; 8058 8059 for (const auto *C : 8060 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8061 for (const auto L : C->component_lists()) { 8062 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8063 std::get<1>(L); 8064 assert(!Components.empty() && 8065 "Not expecting empty list of components!"); 8066 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8067 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8068 const Expr *IE = Components.back().getAssociatedExpression(); 8069 // If the first component is a member expression, we have to look into 8070 // 'this', which maps to null in the map of map information. Otherwise 8071 // look directly for the information. 8072 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8073 8074 // We potentially have map information for this declaration already. 8075 // Look for the first set of components that refer to it. 8076 if (It != Info.end()) { 8077 auto CI = std::find_if( 8078 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8079 return MI.Components.back().getAssociatedDeclaration() == VD; 8080 }); 8081 // If we found a map entry, signal that the pointer has to be returned 8082 // and move on to the next declaration. 8083 if (CI != It->second.end()) { 8084 CI->ReturnDevicePointer = true; 8085 continue; 8086 } 8087 } 8088 8089 // We didn't find any match in our map information - generate a zero 8090 // size array section - if the pointer is a struct member we defer this 8091 // action until the whole struct has been processed. 8092 if (isa<MemberExpr>(IE)) { 8093 // Insert the pointer into Info to be processed by 8094 // generateInfoForComponentList. Because it is a member pointer 8095 // without a pointee, no entry will be generated for it, therefore 8096 // we need to generate one after the whole struct has been processed. 8097 // Nonetheless, generateInfoForComponentList must be called to take 8098 // the pointer into account for the calculation of the range of the 8099 // partial struct. 8100 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, 8101 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8102 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8103 } else { 8104 llvm::Value *Ptr = 8105 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8106 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8107 CombinedInfo.Pointers.push_back(Ptr); 8108 CombinedInfo.Sizes.push_back( 8109 llvm::Constant::getNullValue(CGF.Int64Ty)); 8110 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | 8111 OMP_MAP_TARGET_PARAM); 8112 CombinedInfo.Mappers.push_back(nullptr); 8113 } 8114 } 8115 } 8116 8117 // Look at the use_device_addr clause information and mark the existing map 8118 // entries as such. If there is no map information for an entry in the 8119 // use_device_addr list, we create one with map type 'alloc' and zero size 8120 // section. It is the user fault if that was not mapped before. If there is 8121 // no map information and the pointer is a struct member, then we defer the 8122 // emission of that entry until the whole struct has been processed. 8123 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8124 for (const auto *C : 8125 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8126 for (const auto L : C->component_lists()) { 8127 assert(!std::get<1>(L).empty() && 8128 "Not expecting empty list of components!"); 8129 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8130 if (!Processed.insert(VD).second) 8131 continue; 8132 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8133 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8134 // If the first component is a member expression, we have to look into 8135 // 'this', which maps to null in the map of map information. Otherwise 8136 // look directly for the information. 8137 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8138 8139 // We potentially have map information for this declaration already. 8140 // Look for the first set of components that refer to it. 8141 if (It != Info.end()) { 8142 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8143 return MI.Components.back().getAssociatedDeclaration() == VD; 8144 }); 8145 // If we found a map entry, signal that the pointer has to be returned 8146 // and move on to the next declaration. 8147 if (CI != It->second.end()) { 8148 CI->ReturnDevicePointer = true; 8149 continue; 8150 } 8151 } 8152 8153 // We didn't find any match in our map information - generate a zero 8154 // size array section - if the pointer is a struct member we defer this 8155 // action until the whole struct has been processed. 8156 if (isa<MemberExpr>(IE)) { 8157 // Insert the pointer into Info to be processed by 8158 // generateInfoForComponentList. Because it is a member pointer 8159 // without a pointee, no entry will be generated for it, therefore 8160 // we need to generate one after the whole struct has been processed. 8161 // Nonetheless, generateInfoForComponentList must be called to take 8162 // the pointer into account for the calculation of the range of the 8163 // partial struct. 8164 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8165 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr, 8166 /*ForDeviceAddr=*/true); 8167 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8168 } else { 8169 llvm::Value *Ptr; 8170 if (IE->isGLValue()) 8171 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8172 else 8173 Ptr = CGF.EmitScalarExpr(IE); 8174 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8175 CombinedInfo.Pointers.push_back(Ptr); 8176 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8177 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8178 CombinedInfo.Mappers.push_back(nullptr); 8179 } 8180 } 8181 } 8182 8183 for (const auto &M : Info) { 8184 // We need to know when we generate information for the first component 8185 // associated with a capture, because the mapping flags depend on it. 8186 bool IsFirstComponentList = true; 8187 8188 // Temporary generated information. 8189 MapCombinedInfoTy CurInfo; 8190 StructRangeInfoTy PartialStruct; 8191 8192 for (const MapInfo &L : M.second) { 8193 assert(!L.Components.empty() && 8194 "Not expecting declaration with no component lists."); 8195 8196 // Remember the current base pointer index. 8197 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8198 generateInfoForComponentList( 8199 L.MapType, L.MapModifiers, L.Components, CurInfo, PartialStruct, 8200 IsFirstComponentList, L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8201 8202 // If this entry relates with a device pointer, set the relevant 8203 // declaration and add the 'return pointer' flag. 8204 if (L.ReturnDevicePointer) { 8205 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8206 "Unexpected number of mapped base pointers."); 8207 8208 const ValueDecl *RelevantVD = 8209 L.Components.back().getAssociatedDeclaration(); 8210 assert(RelevantVD && 8211 "No relevant declaration related with device pointer??"); 8212 8213 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8214 RelevantVD); 8215 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8216 } 8217 IsFirstComponentList = false; 8218 } 8219 8220 // Append any pending zero-length pointers which are struct members and 8221 // used with use_device_ptr or use_device_addr. 8222 auto CI = DeferredInfo.find(M.first); 8223 if (CI != DeferredInfo.end()) { 8224 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8225 llvm::Value *BasePtr; 8226 llvm::Value *Ptr; 8227 if (L.ForDeviceAddr) { 8228 if (L.IE->isGLValue()) 8229 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8230 else 8231 Ptr = this->CGF.EmitScalarExpr(L.IE); 8232 BasePtr = Ptr; 8233 // Entry is RETURN_PARAM. Also, set the placeholder value 8234 // MEMBER_OF=FFFF so that the entry is later updated with the 8235 // correct value of MEMBER_OF. 8236 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8237 } else { 8238 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8239 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8240 L.IE->getExprLoc()); 8241 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8242 // value MEMBER_OF=FFFF so that the entry is later updated with the 8243 // correct value of MEMBER_OF. 8244 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8245 OMP_MAP_MEMBER_OF); 8246 } 8247 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8248 CurInfo.Pointers.push_back(Ptr); 8249 CurInfo.Sizes.push_back( 8250 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8251 CurInfo.Mappers.push_back(nullptr); 8252 } 8253 } 8254 8255 // If there is an entry in PartialStruct it means we have a struct with 8256 // individual members mapped. Emit an extra combined entry. 8257 if (PartialStruct.Base.isValid()) 8258 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8259 8260 // We need to append the results of this capture to what we already have. 8261 CombinedInfo.append(CurInfo); 8262 } 8263 } 8264 8265 /// Generate all the base pointers, section pointers, sizes, map types, and 8266 /// mappers for the extracted map clauses of user-defined mapper (all included 8267 /// in \a CombinedInfo). 8268 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8269 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8270 "Expect a declare mapper directive"); 8271 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8272 // We have to process the component lists that relate with the same 8273 // declaration in a single chunk so that we can generate the map flags 8274 // correctly. Therefore, we organize all lists in a map. 8275 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8276 8277 // Fill the information map for map clauses. 8278 for (const auto *C : CurMapperDir->clauselists()) { 8279 const auto *MC = cast<OMPMapClause>(C); 8280 for (const auto L : MC->component_lists()) { 8281 const ValueDecl *VD = 8282 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8283 : nullptr; 8284 // Get the corresponding user-defined mapper. 8285 Info[VD].emplace_back( 8286 std::get<1>(L), MC->getMapType(), MC->getMapTypeModifiers(), 8287 /*ReturnDevicePointer=*/false, MC->isImplicit(), std::get<2>(L)); 8288 } 8289 } 8290 8291 for (const auto &M : Info) { 8292 // We need to know when we generate information for the first component 8293 // associated with a capture, because the mapping flags depend on it. 8294 bool IsFirstComponentList = true; 8295 8296 // Temporary generated information. 8297 MapCombinedInfoTy CurInfo; 8298 StructRangeInfoTy PartialStruct; 8299 8300 for (const MapInfo &L : M.second) { 8301 assert(!L.Components.empty() && 8302 "Not expecting declaration with no component lists."); 8303 generateInfoForComponentList( 8304 L.MapType, L.MapModifiers, L.Components, CurInfo, PartialStruct, 8305 IsFirstComponentList, L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8306 IsFirstComponentList = false; 8307 } 8308 8309 // If there is an entry in PartialStruct it means we have a struct with 8310 // individual members mapped. Emit an extra combined entry. 8311 if (PartialStruct.Base.isValid()) 8312 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8313 8314 // We need to append the results of this capture to what we already have. 8315 CombinedInfo.append(CurInfo); 8316 } 8317 } 8318 8319 /// Emit capture info for lambdas for variables captured by reference. 8320 void generateInfoForLambdaCaptures( 8321 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8322 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8323 const auto *RD = VD->getType() 8324 .getCanonicalType() 8325 .getNonReferenceType() 8326 ->getAsCXXRecordDecl(); 8327 if (!RD || !RD->isLambda()) 8328 return; 8329 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8330 LValue VDLVal = CGF.MakeAddrLValue( 8331 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8332 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8333 FieldDecl *ThisCapture = nullptr; 8334 RD->getCaptureFields(Captures, ThisCapture); 8335 if (ThisCapture) { 8336 LValue ThisLVal = 8337 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8338 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8339 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8340 VDLVal.getPointer(CGF)); 8341 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8342 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8343 CombinedInfo.Sizes.push_back( 8344 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8345 CGF.Int64Ty, /*isSigned=*/true)); 8346 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8347 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8348 CombinedInfo.Mappers.push_back(nullptr); 8349 } 8350 for (const LambdaCapture &LC : RD->captures()) { 8351 if (!LC.capturesVariable()) 8352 continue; 8353 const VarDecl *VD = LC.getCapturedVar(); 8354 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8355 continue; 8356 auto It = Captures.find(VD); 8357 assert(It != Captures.end() && "Found lambda capture without field."); 8358 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8359 if (LC.getCaptureKind() == LCK_ByRef) { 8360 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8361 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8362 VDLVal.getPointer(CGF)); 8363 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8364 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8365 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8366 CGF.getTypeSize( 8367 VD->getType().getCanonicalType().getNonReferenceType()), 8368 CGF.Int64Ty, /*isSigned=*/true)); 8369 } else { 8370 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8371 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8372 VDLVal.getPointer(CGF)); 8373 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8374 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8375 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8376 } 8377 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8378 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8379 CombinedInfo.Mappers.push_back(nullptr); 8380 } 8381 } 8382 8383 /// Set correct indices for lambdas captures. 8384 void adjustMemberOfForLambdaCaptures( 8385 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8386 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8387 MapFlagsArrayTy &Types) const { 8388 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8389 // Set correct member_of idx for all implicit lambda captures. 8390 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8391 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8392 continue; 8393 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8394 assert(BasePtr && "Unable to find base lambda address."); 8395 int TgtIdx = -1; 8396 for (unsigned J = I; J > 0; --J) { 8397 unsigned Idx = J - 1; 8398 if (Pointers[Idx] != BasePtr) 8399 continue; 8400 TgtIdx = Idx; 8401 break; 8402 } 8403 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8404 // All other current entries will be MEMBER_OF the combined entry 8405 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8406 // 0xFFFF in the MEMBER_OF field). 8407 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8408 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8409 } 8410 } 8411 8412 /// Generate the base pointers, section pointers, sizes, map types, and 8413 /// mappers associated to a given capture (all included in \a CombinedInfo). 8414 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8415 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8416 StructRangeInfoTy &PartialStruct) const { 8417 assert(!Cap->capturesVariableArrayType() && 8418 "Not expecting to generate map info for a variable array type!"); 8419 8420 // We need to know when we generating information for the first component 8421 const ValueDecl *VD = Cap->capturesThis() 8422 ? nullptr 8423 : Cap->getCapturedVar()->getCanonicalDecl(); 8424 8425 // If this declaration appears in a is_device_ptr clause we just have to 8426 // pass the pointer by value. If it is a reference to a declaration, we just 8427 // pass its value. 8428 if (DevPointersMap.count(VD)) { 8429 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8430 CombinedInfo.Pointers.push_back(Arg); 8431 CombinedInfo.Sizes.push_back( 8432 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8433 CGF.Int64Ty, /*isSigned=*/true)); 8434 CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8435 CombinedInfo.Mappers.push_back(nullptr); 8436 return; 8437 } 8438 8439 using MapData = 8440 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8441 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8442 const ValueDecl *>; 8443 SmallVector<MapData, 4> DeclComponentLists; 8444 assert(CurDir.is<const OMPExecutableDirective *>() && 8445 "Expect a executable directive"); 8446 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8447 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8448 for (const auto L : C->decl_component_lists(VD)) { 8449 const ValueDecl *VDecl, *Mapper; 8450 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8451 std::tie(VDecl, Components, Mapper) = L; 8452 assert(VDecl == VD && "We got information for the wrong declaration??"); 8453 assert(!Components.empty() && 8454 "Not expecting declaration with no component lists."); 8455 DeclComponentLists.emplace_back(Components, C->getMapType(), 8456 C->getMapTypeModifiers(), 8457 C->isImplicit(), Mapper); 8458 } 8459 } 8460 8461 // Find overlapping elements (including the offset from the base element). 8462 llvm::SmallDenseMap< 8463 const MapData *, 8464 llvm::SmallVector< 8465 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8466 4> 8467 OverlappedData; 8468 size_t Count = 0; 8469 for (const MapData &L : DeclComponentLists) { 8470 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8471 OpenMPMapClauseKind MapType; 8472 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8473 bool IsImplicit; 8474 const ValueDecl *Mapper; 8475 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8476 ++Count; 8477 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8478 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8479 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1; 8480 auto CI = Components.rbegin(); 8481 auto CE = Components.rend(); 8482 auto SI = Components1.rbegin(); 8483 auto SE = Components1.rend(); 8484 for (; CI != CE && SI != SE; ++CI, ++SI) { 8485 if (CI->getAssociatedExpression()->getStmtClass() != 8486 SI->getAssociatedExpression()->getStmtClass()) 8487 break; 8488 // Are we dealing with different variables/fields? 8489 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8490 break; 8491 } 8492 // Found overlapping if, at least for one component, reached the head of 8493 // the components list. 8494 if (CI == CE || SI == SE) { 8495 assert((CI != CE || SI != SE) && 8496 "Unexpected full match of the mapping components."); 8497 const MapData &BaseData = CI == CE ? L : L1; 8498 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8499 SI == SE ? Components : Components1; 8500 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8501 OverlappedElements.getSecond().push_back(SubData); 8502 } 8503 } 8504 } 8505 // Sort the overlapped elements for each item. 8506 llvm::SmallVector<const FieldDecl *, 4> Layout; 8507 if (!OverlappedData.empty()) { 8508 if (const auto *CRD = 8509 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8510 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8511 else { 8512 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8513 Layout.append(RD->field_begin(), RD->field_end()); 8514 } 8515 } 8516 for (auto &Pair : OverlappedData) { 8517 llvm::sort( 8518 Pair.getSecond(), 8519 [&Layout]( 8520 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8521 OMPClauseMappableExprCommon::MappableExprComponentListRef 8522 Second) { 8523 auto CI = First.rbegin(); 8524 auto CE = First.rend(); 8525 auto SI = Second.rbegin(); 8526 auto SE = Second.rend(); 8527 for (; CI != CE && SI != SE; ++CI, ++SI) { 8528 if (CI->getAssociatedExpression()->getStmtClass() != 8529 SI->getAssociatedExpression()->getStmtClass()) 8530 break; 8531 // Are we dealing with different variables/fields? 8532 if (CI->getAssociatedDeclaration() != 8533 SI->getAssociatedDeclaration()) 8534 break; 8535 } 8536 8537 // Lists contain the same elements. 8538 if (CI == CE && SI == SE) 8539 return false; 8540 8541 // List with less elements is less than list with more elements. 8542 if (CI == CE || SI == SE) 8543 return CI == CE; 8544 8545 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8546 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8547 if (FD1->getParent() == FD2->getParent()) 8548 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8549 const auto It = 8550 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8551 return FD == FD1 || FD == FD2; 8552 }); 8553 return *It == FD1; 8554 }); 8555 } 8556 8557 // Associated with a capture, because the mapping flags depend on it. 8558 // Go through all of the elements with the overlapped elements. 8559 for (const auto &Pair : OverlappedData) { 8560 const MapData &L = *Pair.getFirst(); 8561 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8562 OpenMPMapClauseKind MapType; 8563 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8564 bool IsImplicit; 8565 const ValueDecl *Mapper; 8566 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8567 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8568 OverlappedComponents = Pair.getSecond(); 8569 bool IsFirstComponentList = true; 8570 generateInfoForComponentList( 8571 MapType, MapModifiers, Components, CombinedInfo, PartialStruct, 8572 IsFirstComponentList, IsImplicit, Mapper, /*ForDeviceAddr=*/false, 8573 OverlappedComponents); 8574 } 8575 // Go through other elements without overlapped elements. 8576 bool IsFirstComponentList = OverlappedData.empty(); 8577 for (const MapData &L : DeclComponentLists) { 8578 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8579 OpenMPMapClauseKind MapType; 8580 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8581 bool IsImplicit; 8582 const ValueDecl *Mapper; 8583 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8584 auto It = OverlappedData.find(&L); 8585 if (It == OverlappedData.end()) 8586 generateInfoForComponentList(MapType, MapModifiers, Components, 8587 CombinedInfo, PartialStruct, 8588 IsFirstComponentList, IsImplicit, Mapper); 8589 IsFirstComponentList = false; 8590 } 8591 } 8592 8593 /// Generate the default map information for a given capture \a CI, 8594 /// record field declaration \a RI and captured value \a CV. 8595 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8596 const FieldDecl &RI, llvm::Value *CV, 8597 MapCombinedInfoTy &CombinedInfo) const { 8598 bool IsImplicit = true; 8599 // Do the default mapping. 8600 if (CI.capturesThis()) { 8601 CombinedInfo.BasePointers.push_back(CV); 8602 CombinedInfo.Pointers.push_back(CV); 8603 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8604 CombinedInfo.Sizes.push_back( 8605 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8606 CGF.Int64Ty, /*isSigned=*/true)); 8607 // Default map type. 8608 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8609 } else if (CI.capturesVariableByCopy()) { 8610 CombinedInfo.BasePointers.push_back(CV); 8611 CombinedInfo.Pointers.push_back(CV); 8612 if (!RI.getType()->isAnyPointerType()) { 8613 // We have to signal to the runtime captures passed by value that are 8614 // not pointers. 8615 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8616 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8617 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8618 } else { 8619 // Pointers are implicitly mapped with a zero size and no flags 8620 // (other than first map that is added for all implicit maps). 8621 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8622 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8623 } 8624 const VarDecl *VD = CI.getCapturedVar(); 8625 auto I = FirstPrivateDecls.find(VD); 8626 if (I != FirstPrivateDecls.end()) 8627 IsImplicit = I->getSecond(); 8628 } else { 8629 assert(CI.capturesVariable() && "Expected captured reference."); 8630 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8631 QualType ElementType = PtrTy->getPointeeType(); 8632 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8633 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8634 // The default map type for a scalar/complex type is 'to' because by 8635 // default the value doesn't have to be retrieved. For an aggregate 8636 // type, the default is 'tofrom'. 8637 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8638 const VarDecl *VD = CI.getCapturedVar(); 8639 auto I = FirstPrivateDecls.find(VD); 8640 if (I != FirstPrivateDecls.end() && 8641 VD->getType().isConstant(CGF.getContext())) { 8642 llvm::Constant *Addr = 8643 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8644 // Copy the value of the original variable to the new global copy. 8645 CGF.Builder.CreateMemCpy( 8646 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8647 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8648 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 8649 // Use new global variable as the base pointers. 8650 CombinedInfo.BasePointers.push_back(Addr); 8651 CombinedInfo.Pointers.push_back(Addr); 8652 } else { 8653 CombinedInfo.BasePointers.push_back(CV); 8654 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8655 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8656 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8657 AlignmentSource::Decl)); 8658 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8659 } else { 8660 CombinedInfo.Pointers.push_back(CV); 8661 } 8662 } 8663 if (I != FirstPrivateDecls.end()) 8664 IsImplicit = I->getSecond(); 8665 } 8666 // Every default map produces a single argument which is a target parameter. 8667 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 8668 8669 // Add flag stating this is an implicit map. 8670 if (IsImplicit) 8671 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 8672 8673 // No user-defined mapper for default mapping. 8674 CombinedInfo.Mappers.push_back(nullptr); 8675 } 8676 }; 8677 } // anonymous namespace 8678 8679 /// Emit the arrays used to pass the captures and map information to the 8680 /// offloading runtime library. If there is no map or capture information, 8681 /// return nullptr by reference. 8682 static void 8683 emitOffloadingArrays(CodeGenFunction &CGF, 8684 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8685 CGOpenMPRuntime::TargetDataInfo &Info) { 8686 CodeGenModule &CGM = CGF.CGM; 8687 ASTContext &Ctx = CGF.getContext(); 8688 8689 // Reset the array information. 8690 Info.clearArrayInfo(); 8691 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8692 8693 if (Info.NumberOfPtrs) { 8694 // Detect if we have any capture size requiring runtime evaluation of the 8695 // size so that a constant array could be eventually used. 8696 bool hasRuntimeEvaluationCaptureSize = false; 8697 for (llvm::Value *S : CombinedInfo.Sizes) 8698 if (!isa<llvm::Constant>(S)) { 8699 hasRuntimeEvaluationCaptureSize = true; 8700 break; 8701 } 8702 8703 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8704 QualType PointerArrayType = Ctx.getConstantArrayType( 8705 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8706 /*IndexTypeQuals=*/0); 8707 8708 Info.BasePointersArray = 8709 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8710 Info.PointersArray = 8711 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8712 Address MappersArray = 8713 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 8714 Info.MappersArray = MappersArray.getPointer(); 8715 8716 // If we don't have any VLA types or other types that require runtime 8717 // evaluation, we can use a constant array for the map sizes, otherwise we 8718 // need to fill up the arrays as we do for the pointers. 8719 QualType Int64Ty = 8720 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8721 if (hasRuntimeEvaluationCaptureSize) { 8722 QualType SizeArrayType = Ctx.getConstantArrayType( 8723 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8724 /*IndexTypeQuals=*/0); 8725 Info.SizesArray = 8726 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8727 } else { 8728 // We expect all the sizes to be constant, so we collect them to create 8729 // a constant array. 8730 SmallVector<llvm::Constant *, 16> ConstSizes; 8731 for (llvm::Value *S : CombinedInfo.Sizes) 8732 ConstSizes.push_back(cast<llvm::Constant>(S)); 8733 8734 auto *SizesArrayInit = llvm::ConstantArray::get( 8735 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8736 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8737 auto *SizesArrayGbl = new llvm::GlobalVariable( 8738 CGM.getModule(), SizesArrayInit->getType(), 8739 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8740 SizesArrayInit, Name); 8741 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8742 Info.SizesArray = SizesArrayGbl; 8743 } 8744 8745 // The map types are always constant so we don't need to generate code to 8746 // fill arrays. Instead, we create an array constant. 8747 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 8748 llvm::copy(CombinedInfo.Types, Mapping.begin()); 8749 llvm::Constant *MapTypesArrayInit = 8750 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8751 std::string MaptypesName = 8752 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8753 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8754 CGM.getModule(), MapTypesArrayInit->getType(), 8755 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8756 MapTypesArrayInit, MaptypesName); 8757 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8758 Info.MapTypesArray = MapTypesArrayGbl; 8759 8760 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8761 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 8762 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8763 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8764 Info.BasePointersArray, 0, I); 8765 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8766 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8767 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8768 CGF.Builder.CreateStore(BPVal, BPAddr); 8769 8770 if (Info.requiresDevicePointerInfo()) 8771 if (const ValueDecl *DevVD = 8772 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 8773 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8774 8775 llvm::Value *PVal = CombinedInfo.Pointers[I]; 8776 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8777 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8778 Info.PointersArray, 0, I); 8779 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8780 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8781 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8782 CGF.Builder.CreateStore(PVal, PAddr); 8783 8784 if (hasRuntimeEvaluationCaptureSize) { 8785 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8786 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8787 Info.SizesArray, 8788 /*Idx0=*/0, 8789 /*Idx1=*/I); 8790 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8791 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 8792 CGM.Int64Ty, 8793 /*isSigned=*/true), 8794 SAddr); 8795 } 8796 8797 // Fill up the mapper array. 8798 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 8799 if (CombinedInfo.Mappers[I]) { 8800 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8801 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8802 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 8803 Info.HasMapper = true; 8804 } 8805 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 8806 CGF.Builder.CreateStore(MFunc, MAddr); 8807 } 8808 } 8809 } 8810 8811 /// Emit the arguments to be passed to the runtime library based on the 8812 /// arrays of base pointers, pointers, sizes, map types, and mappers. 8813 static void emitOffloadingArraysArgument( 8814 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8815 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8816 llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg, 8817 CGOpenMPRuntime::TargetDataInfo &Info) { 8818 CodeGenModule &CGM = CGF.CGM; 8819 if (Info.NumberOfPtrs) { 8820 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8821 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8822 Info.BasePointersArray, 8823 /*Idx0=*/0, /*Idx1=*/0); 8824 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8825 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8826 Info.PointersArray, 8827 /*Idx0=*/0, 8828 /*Idx1=*/0); 8829 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8830 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8831 /*Idx0=*/0, /*Idx1=*/0); 8832 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8833 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8834 Info.MapTypesArray, 8835 /*Idx0=*/0, 8836 /*Idx1=*/0); 8837 MappersArrayArg = 8838 Info.HasMapper 8839 ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy) 8840 : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8841 } else { 8842 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8843 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8844 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8845 MapTypesArrayArg = 8846 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8847 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8848 } 8849 } 8850 8851 /// Check for inner distribute directive. 8852 static const OMPExecutableDirective * 8853 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8854 const auto *CS = D.getInnermostCapturedStmt(); 8855 const auto *Body = 8856 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8857 const Stmt *ChildStmt = 8858 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8859 8860 if (const auto *NestedDir = 8861 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8862 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8863 switch (D.getDirectiveKind()) { 8864 case OMPD_target: 8865 if (isOpenMPDistributeDirective(DKind)) 8866 return NestedDir; 8867 if (DKind == OMPD_teams) { 8868 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8869 /*IgnoreCaptured=*/true); 8870 if (!Body) 8871 return nullptr; 8872 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8873 if (const auto *NND = 8874 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8875 DKind = NND->getDirectiveKind(); 8876 if (isOpenMPDistributeDirective(DKind)) 8877 return NND; 8878 } 8879 } 8880 return nullptr; 8881 case OMPD_target_teams: 8882 if (isOpenMPDistributeDirective(DKind)) 8883 return NestedDir; 8884 return nullptr; 8885 case OMPD_target_parallel: 8886 case OMPD_target_simd: 8887 case OMPD_target_parallel_for: 8888 case OMPD_target_parallel_for_simd: 8889 return nullptr; 8890 case OMPD_target_teams_distribute: 8891 case OMPD_target_teams_distribute_simd: 8892 case OMPD_target_teams_distribute_parallel_for: 8893 case OMPD_target_teams_distribute_parallel_for_simd: 8894 case OMPD_parallel: 8895 case OMPD_for: 8896 case OMPD_parallel_for: 8897 case OMPD_parallel_master: 8898 case OMPD_parallel_sections: 8899 case OMPD_for_simd: 8900 case OMPD_parallel_for_simd: 8901 case OMPD_cancel: 8902 case OMPD_cancellation_point: 8903 case OMPD_ordered: 8904 case OMPD_threadprivate: 8905 case OMPD_allocate: 8906 case OMPD_task: 8907 case OMPD_simd: 8908 case OMPD_sections: 8909 case OMPD_section: 8910 case OMPD_single: 8911 case OMPD_master: 8912 case OMPD_critical: 8913 case OMPD_taskyield: 8914 case OMPD_barrier: 8915 case OMPD_taskwait: 8916 case OMPD_taskgroup: 8917 case OMPD_atomic: 8918 case OMPD_flush: 8919 case OMPD_depobj: 8920 case OMPD_scan: 8921 case OMPD_teams: 8922 case OMPD_target_data: 8923 case OMPD_target_exit_data: 8924 case OMPD_target_enter_data: 8925 case OMPD_distribute: 8926 case OMPD_distribute_simd: 8927 case OMPD_distribute_parallel_for: 8928 case OMPD_distribute_parallel_for_simd: 8929 case OMPD_teams_distribute: 8930 case OMPD_teams_distribute_simd: 8931 case OMPD_teams_distribute_parallel_for: 8932 case OMPD_teams_distribute_parallel_for_simd: 8933 case OMPD_target_update: 8934 case OMPD_declare_simd: 8935 case OMPD_declare_variant: 8936 case OMPD_begin_declare_variant: 8937 case OMPD_end_declare_variant: 8938 case OMPD_declare_target: 8939 case OMPD_end_declare_target: 8940 case OMPD_declare_reduction: 8941 case OMPD_declare_mapper: 8942 case OMPD_taskloop: 8943 case OMPD_taskloop_simd: 8944 case OMPD_master_taskloop: 8945 case OMPD_master_taskloop_simd: 8946 case OMPD_parallel_master_taskloop: 8947 case OMPD_parallel_master_taskloop_simd: 8948 case OMPD_requires: 8949 case OMPD_unknown: 8950 default: 8951 llvm_unreachable("Unexpected directive."); 8952 } 8953 } 8954 8955 return nullptr; 8956 } 8957 8958 /// Emit the user-defined mapper function. The code generation follows the 8959 /// pattern in the example below. 8960 /// \code 8961 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8962 /// void *base, void *begin, 8963 /// int64_t size, int64_t type) { 8964 /// // Allocate space for an array section first. 8965 /// if (size > 1 && !maptype.IsDelete) 8966 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8967 /// size*sizeof(Ty), clearToFrom(type)); 8968 /// // Map members. 8969 /// for (unsigned i = 0; i < size; i++) { 8970 /// // For each component specified by this mapper: 8971 /// for (auto c : all_components) { 8972 /// if (c.hasMapper()) 8973 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8974 /// c.arg_type); 8975 /// else 8976 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8977 /// c.arg_begin, c.arg_size, c.arg_type); 8978 /// } 8979 /// } 8980 /// // Delete the array section. 8981 /// if (size > 1 && maptype.IsDelete) 8982 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8983 /// size*sizeof(Ty), clearToFrom(type)); 8984 /// } 8985 /// \endcode 8986 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8987 CodeGenFunction *CGF) { 8988 if (UDMMap.count(D) > 0) 8989 return; 8990 ASTContext &C = CGM.getContext(); 8991 QualType Ty = D->getType(); 8992 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8993 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8994 auto *MapperVarDecl = 8995 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8996 SourceLocation Loc = D->getLocation(); 8997 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8998 8999 // Prepare mapper function arguments and attributes. 9000 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9001 C.VoidPtrTy, ImplicitParamDecl::Other); 9002 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9003 ImplicitParamDecl::Other); 9004 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9005 C.VoidPtrTy, ImplicitParamDecl::Other); 9006 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9007 ImplicitParamDecl::Other); 9008 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9009 ImplicitParamDecl::Other); 9010 FunctionArgList Args; 9011 Args.push_back(&HandleArg); 9012 Args.push_back(&BaseArg); 9013 Args.push_back(&BeginArg); 9014 Args.push_back(&SizeArg); 9015 Args.push_back(&TypeArg); 9016 const CGFunctionInfo &FnInfo = 9017 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9018 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9019 SmallString<64> TyStr; 9020 llvm::raw_svector_ostream Out(TyStr); 9021 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9022 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9023 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9024 Name, &CGM.getModule()); 9025 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9026 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9027 // Start the mapper function code generation. 9028 CodeGenFunction MapperCGF(CGM); 9029 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9030 // Compute the starting and end addreses of array elements. 9031 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9032 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9033 C.getPointerType(Int64Ty), Loc); 9034 // Convert the size in bytes into the number of array elements. 9035 Size = MapperCGF.Builder.CreateExactUDiv( 9036 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9037 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9038 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9039 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9040 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9041 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9042 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9043 C.getPointerType(Int64Ty), Loc); 9044 // Prepare common arguments for array initiation and deletion. 9045 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9046 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9047 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9048 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9049 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9050 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9051 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9052 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9053 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9054 9055 // Emit array initiation if this is an array section and \p MapType indicates 9056 // that memory allocation is required. 9057 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9058 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9059 ElementSize, HeadBB, /*IsInit=*/true); 9060 9061 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9062 9063 // Emit the loop header block. 9064 MapperCGF.EmitBlock(HeadBB); 9065 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9066 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9067 // Evaluate whether the initial condition is satisfied. 9068 llvm::Value *IsEmpty = 9069 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9070 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9071 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9072 9073 // Emit the loop body block. 9074 MapperCGF.EmitBlock(BodyBB); 9075 llvm::BasicBlock *LastBB = BodyBB; 9076 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9077 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9078 PtrPHI->addIncoming(PtrBegin, EntryBB); 9079 Address PtrCurrent = 9080 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9081 .getAlignment() 9082 .alignmentOfArrayElement(ElementSize)); 9083 // Privatize the declared variable of mapper to be the current array element. 9084 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9085 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9086 return MapperCGF 9087 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9088 .getAddress(MapperCGF); 9089 }); 9090 (void)Scope.Privatize(); 9091 9092 // Get map clause information. Fill up the arrays with all mapped variables. 9093 MappableExprsHandler::MapCombinedInfoTy Info; 9094 MappableExprsHandler MEHandler(*D, MapperCGF); 9095 MEHandler.generateAllInfoForMapper(Info); 9096 9097 // Call the runtime API __tgt_mapper_num_components to get the number of 9098 // pre-existing components. 9099 llvm::Value *OffloadingArgs[] = {Handle}; 9100 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9101 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9102 OMPRTL___tgt_mapper_num_components), 9103 OffloadingArgs); 9104 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9105 PreviousSize, 9106 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9107 9108 // Fill up the runtime mapper handle for all components. 9109 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9110 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9111 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9112 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9113 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9114 llvm::Value *CurSizeArg = Info.Sizes[I]; 9115 9116 // Extract the MEMBER_OF field from the map type. 9117 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9118 MapperCGF.EmitBlock(MemberBB); 9119 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9120 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9121 OriMapType, 9122 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9123 llvm::BasicBlock *MemberCombineBB = 9124 MapperCGF.createBasicBlock("omp.member.combine"); 9125 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9126 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9127 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9128 // Add the number of pre-existing components to the MEMBER_OF field if it 9129 // is valid. 9130 MapperCGF.EmitBlock(MemberCombineBB); 9131 llvm::Value *CombinedMember = 9132 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9133 // Do nothing if it is not a member of previous components. 9134 MapperCGF.EmitBlock(TypeBB); 9135 llvm::PHINode *MemberMapType = 9136 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9137 MemberMapType->addIncoming(OriMapType, MemberBB); 9138 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9139 9140 // Combine the map type inherited from user-defined mapper with that 9141 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9142 // bits of the \a MapType, which is the input argument of the mapper 9143 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9144 // bits of MemberMapType. 9145 // [OpenMP 5.0], 1.2.6. map-type decay. 9146 // | alloc | to | from | tofrom | release | delete 9147 // ---------------------------------------------------------- 9148 // alloc | alloc | alloc | alloc | alloc | release | delete 9149 // to | alloc | to | alloc | to | release | delete 9150 // from | alloc | alloc | from | from | release | delete 9151 // tofrom | alloc | to | from | tofrom | release | delete 9152 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9153 MapType, 9154 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9155 MappableExprsHandler::OMP_MAP_FROM)); 9156 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9157 llvm::BasicBlock *AllocElseBB = 9158 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9159 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9160 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9161 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9162 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9163 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9164 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9165 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9166 MapperCGF.EmitBlock(AllocBB); 9167 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9168 MemberMapType, 9169 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9170 MappableExprsHandler::OMP_MAP_FROM))); 9171 MapperCGF.Builder.CreateBr(EndBB); 9172 MapperCGF.EmitBlock(AllocElseBB); 9173 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9174 LeftToFrom, 9175 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9176 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9177 // In case of to, clear OMP_MAP_FROM. 9178 MapperCGF.EmitBlock(ToBB); 9179 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9180 MemberMapType, 9181 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9182 MapperCGF.Builder.CreateBr(EndBB); 9183 MapperCGF.EmitBlock(ToElseBB); 9184 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9185 LeftToFrom, 9186 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9187 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9188 // In case of from, clear OMP_MAP_TO. 9189 MapperCGF.EmitBlock(FromBB); 9190 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9191 MemberMapType, 9192 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9193 // In case of tofrom, do nothing. 9194 MapperCGF.EmitBlock(EndBB); 9195 LastBB = EndBB; 9196 llvm::PHINode *CurMapType = 9197 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9198 CurMapType->addIncoming(AllocMapType, AllocBB); 9199 CurMapType->addIncoming(ToMapType, ToBB); 9200 CurMapType->addIncoming(FromMapType, FromBB); 9201 CurMapType->addIncoming(MemberMapType, ToElseBB); 9202 9203 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9204 CurSizeArg, CurMapType}; 9205 if (Info.Mappers[I]) { 9206 // Call the corresponding mapper function. 9207 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9208 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9209 assert(MapperFunc && "Expect a valid mapper function is available."); 9210 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9211 } else { 9212 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9213 // data structure. 9214 MapperCGF.EmitRuntimeCall( 9215 OMPBuilder.getOrCreateRuntimeFunction( 9216 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9217 OffloadingArgs); 9218 } 9219 } 9220 9221 // Update the pointer to point to the next element that needs to be mapped, 9222 // and check whether we have mapped all elements. 9223 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9224 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9225 PtrPHI->addIncoming(PtrNext, LastBB); 9226 llvm::Value *IsDone = 9227 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9228 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9229 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9230 9231 MapperCGF.EmitBlock(ExitBB); 9232 // Emit array deletion if this is an array section and \p MapType indicates 9233 // that deletion is required. 9234 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9235 ElementSize, DoneBB, /*IsInit=*/false); 9236 9237 // Emit the function exit block. 9238 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9239 MapperCGF.FinishFunction(); 9240 UDMMap.try_emplace(D, Fn); 9241 if (CGF) { 9242 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9243 Decls.second.push_back(D); 9244 } 9245 } 9246 9247 /// Emit the array initialization or deletion portion for user-defined mapper 9248 /// code generation. First, it evaluates whether an array section is mapped and 9249 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9250 /// true, and \a MapType indicates to not delete this array, array 9251 /// initialization code is generated. If \a IsInit is false, and \a MapType 9252 /// indicates to not this array, array deletion code is generated. 9253 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9254 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9255 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9256 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9257 StringRef Prefix = IsInit ? ".init" : ".del"; 9258 9259 // Evaluate if this is an array section. 9260 llvm::BasicBlock *IsDeleteBB = 9261 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9262 llvm::BasicBlock *BodyBB = 9263 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9264 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9265 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9266 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9267 9268 // Evaluate if we are going to delete this section. 9269 MapperCGF.EmitBlock(IsDeleteBB); 9270 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9271 MapType, 9272 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9273 llvm::Value *DeleteCond; 9274 if (IsInit) { 9275 DeleteCond = MapperCGF.Builder.CreateIsNull( 9276 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9277 } else { 9278 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9279 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9280 } 9281 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9282 9283 MapperCGF.EmitBlock(BodyBB); 9284 // Get the array size by multiplying element size and element number (i.e., \p 9285 // Size). 9286 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9287 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9288 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9289 // memory allocation/deletion purpose only. 9290 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9291 MapType, 9292 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9293 MappableExprsHandler::OMP_MAP_FROM))); 9294 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9295 // data structure. 9296 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9297 MapperCGF.EmitRuntimeCall( 9298 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9299 OMPRTL___tgt_push_mapper_component), 9300 OffloadingArgs); 9301 } 9302 9303 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9304 const OMPDeclareMapperDecl *D) { 9305 auto I = UDMMap.find(D); 9306 if (I != UDMMap.end()) 9307 return I->second; 9308 emitUserDefinedMapper(D); 9309 return UDMMap.lookup(D); 9310 } 9311 9312 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9313 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9314 llvm::Value *DeviceID, 9315 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9316 const OMPLoopDirective &D)> 9317 SizeEmitter) { 9318 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9319 const OMPExecutableDirective *TD = &D; 9320 // Get nested teams distribute kind directive, if any. 9321 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9322 TD = getNestedDistributeDirective(CGM.getContext(), D); 9323 if (!TD) 9324 return; 9325 const auto *LD = cast<OMPLoopDirective>(TD); 9326 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9327 PrePostActionTy &) { 9328 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9329 llvm::Value *Args[] = {DeviceID, NumIterations}; 9330 CGF.EmitRuntimeCall( 9331 OMPBuilder.getOrCreateRuntimeFunction( 9332 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9333 Args); 9334 } 9335 }; 9336 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9337 } 9338 9339 void CGOpenMPRuntime::emitTargetCall( 9340 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9341 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9342 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9343 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9344 const OMPLoopDirective &D)> 9345 SizeEmitter) { 9346 if (!CGF.HaveInsertPoint()) 9347 return; 9348 9349 assert(OutlinedFn && "Invalid outlined function!"); 9350 9351 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9352 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9353 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9354 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9355 PrePostActionTy &) { 9356 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9357 }; 9358 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9359 9360 CodeGenFunction::OMPTargetDataInfo InputInfo; 9361 llvm::Value *MapTypesArray = nullptr; 9362 // Fill up the pointer arrays and transfer execution to the device. 9363 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9364 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9365 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9366 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9367 // Reverse offloading is not supported, so just execute on the host. 9368 if (RequiresOuterTask) { 9369 CapturedVars.clear(); 9370 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9371 } 9372 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9373 return; 9374 } 9375 9376 // On top of the arrays that were filled up, the target offloading call 9377 // takes as arguments the device id as well as the host pointer. The host 9378 // pointer is used by the runtime library to identify the current target 9379 // region, so it only has to be unique and not necessarily point to 9380 // anything. It could be the pointer to the outlined function that 9381 // implements the target region, but we aren't using that so that the 9382 // compiler doesn't need to keep that, and could therefore inline the host 9383 // function if proven worthwhile during optimization. 9384 9385 // From this point on, we need to have an ID of the target region defined. 9386 assert(OutlinedFnID && "Invalid outlined function ID!"); 9387 9388 // Emit device ID if any. 9389 llvm::Value *DeviceID; 9390 if (Device.getPointer()) { 9391 assert((Device.getInt() == OMPC_DEVICE_unknown || 9392 Device.getInt() == OMPC_DEVICE_device_num) && 9393 "Expected device_num modifier."); 9394 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9395 DeviceID = 9396 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9397 } else { 9398 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9399 } 9400 9401 // Emit the number of elements in the offloading arrays. 9402 llvm::Value *PointerNum = 9403 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9404 9405 // Return value of the runtime offloading call. 9406 llvm::Value *Return; 9407 9408 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9409 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9410 9411 // Emit tripcount for the target loop-based directive. 9412 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9413 9414 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9415 // The target region is an outlined function launched by the runtime 9416 // via calls __tgt_target() or __tgt_target_teams(). 9417 // 9418 // __tgt_target() launches a target region with one team and one thread, 9419 // executing a serial region. This master thread may in turn launch 9420 // more threads within its team upon encountering a parallel region, 9421 // however, no additional teams can be launched on the device. 9422 // 9423 // __tgt_target_teams() launches a target region with one or more teams, 9424 // each with one or more threads. This call is required for target 9425 // constructs such as: 9426 // 'target teams' 9427 // 'target' / 'teams' 9428 // 'target teams distribute parallel for' 9429 // 'target parallel' 9430 // and so on. 9431 // 9432 // Note that on the host and CPU targets, the runtime implementation of 9433 // these calls simply call the outlined function without forking threads. 9434 // The outlined functions themselves have runtime calls to 9435 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9436 // the compiler in emitTeamsCall() and emitParallelCall(). 9437 // 9438 // In contrast, on the NVPTX target, the implementation of 9439 // __tgt_target_teams() launches a GPU kernel with the requested number 9440 // of teams and threads so no additional calls to the runtime are required. 9441 if (NumTeams) { 9442 // If we have NumTeams defined this means that we have an enclosed teams 9443 // region. Therefore we also expect to have NumThreads defined. These two 9444 // values should be defined in the presence of a teams directive, 9445 // regardless of having any clauses associated. If the user is using teams 9446 // but no clauses, these two values will be the default that should be 9447 // passed to the runtime library - a 32-bit integer with the value zero. 9448 assert(NumThreads && "Thread limit expression should be available along " 9449 "with number of teams."); 9450 llvm::Value *OffloadingArgs[] = {DeviceID, 9451 OutlinedFnID, 9452 PointerNum, 9453 InputInfo.BasePointersArray.getPointer(), 9454 InputInfo.PointersArray.getPointer(), 9455 InputInfo.SizesArray.getPointer(), 9456 MapTypesArray, 9457 InputInfo.MappersArray.getPointer(), 9458 NumTeams, 9459 NumThreads}; 9460 Return = CGF.EmitRuntimeCall( 9461 OMPBuilder.getOrCreateRuntimeFunction( 9462 CGM.getModule(), HasNowait 9463 ? OMPRTL___tgt_target_teams_nowait_mapper 9464 : OMPRTL___tgt_target_teams_mapper), 9465 OffloadingArgs); 9466 } else { 9467 llvm::Value *OffloadingArgs[] = {DeviceID, 9468 OutlinedFnID, 9469 PointerNum, 9470 InputInfo.BasePointersArray.getPointer(), 9471 InputInfo.PointersArray.getPointer(), 9472 InputInfo.SizesArray.getPointer(), 9473 MapTypesArray, 9474 InputInfo.MappersArray.getPointer()}; 9475 Return = CGF.EmitRuntimeCall( 9476 OMPBuilder.getOrCreateRuntimeFunction( 9477 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 9478 : OMPRTL___tgt_target_mapper), 9479 OffloadingArgs); 9480 } 9481 9482 // Check the error code and execute the host version if required. 9483 llvm::BasicBlock *OffloadFailedBlock = 9484 CGF.createBasicBlock("omp_offload.failed"); 9485 llvm::BasicBlock *OffloadContBlock = 9486 CGF.createBasicBlock("omp_offload.cont"); 9487 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9488 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9489 9490 CGF.EmitBlock(OffloadFailedBlock); 9491 if (RequiresOuterTask) { 9492 CapturedVars.clear(); 9493 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9494 } 9495 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9496 CGF.EmitBranch(OffloadContBlock); 9497 9498 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9499 }; 9500 9501 // Notify that the host version must be executed. 9502 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9503 RequiresOuterTask](CodeGenFunction &CGF, 9504 PrePostActionTy &) { 9505 if (RequiresOuterTask) { 9506 CapturedVars.clear(); 9507 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9508 } 9509 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9510 }; 9511 9512 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9513 &CapturedVars, RequiresOuterTask, 9514 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9515 // Fill up the arrays with all the captured variables. 9516 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9517 9518 // Get mappable expression information. 9519 MappableExprsHandler MEHandler(D, CGF); 9520 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9521 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9522 9523 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9524 auto CV = CapturedVars.begin(); 9525 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9526 CE = CS.capture_end(); 9527 CI != CE; ++CI, ++RI, ++CV) { 9528 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9529 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9530 9531 // VLA sizes are passed to the outlined region by copy and do not have map 9532 // information associated. 9533 if (CI->capturesVariableArrayType()) { 9534 CurInfo.BasePointers.push_back(*CV); 9535 CurInfo.Pointers.push_back(*CV); 9536 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9537 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9538 // Copy to the device as an argument. No need to retrieve it. 9539 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9540 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9541 MappableExprsHandler::OMP_MAP_IMPLICIT); 9542 CurInfo.Mappers.push_back(nullptr); 9543 } else { 9544 // If we have any information in the map clause, we use it, otherwise we 9545 // just do a default mapping. 9546 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9547 if (!CI->capturesThis()) 9548 MappedVarSet.insert(CI->getCapturedVar()); 9549 else 9550 MappedVarSet.insert(nullptr); 9551 if (CurInfo.BasePointers.empty()) 9552 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9553 // Generate correct mapping for variables captured by reference in 9554 // lambdas. 9555 if (CI->capturesVariable()) 9556 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9557 CurInfo, LambdaPointers); 9558 } 9559 // We expect to have at least an element of information for this capture. 9560 assert(!CurInfo.BasePointers.empty() && 9561 "Non-existing map pointer for capture!"); 9562 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9563 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9564 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9565 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9566 "Inconsistent map information sizes!"); 9567 9568 // If there is an entry in PartialStruct it means we have a struct with 9569 // individual members mapped. Emit an extra combined entry. 9570 if (PartialStruct.Base.isValid()) 9571 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 9572 9573 // We need to append the results of this capture to what we already have. 9574 CombinedInfo.append(CurInfo); 9575 } 9576 // Adjust MEMBER_OF flags for the lambdas captures. 9577 MEHandler.adjustMemberOfForLambdaCaptures( 9578 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 9579 CombinedInfo.Types); 9580 // Map any list items in a map clause that were not captures because they 9581 // weren't referenced within the construct. 9582 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 9583 9584 TargetDataInfo Info; 9585 // Fill up the arrays and create the arguments. 9586 emitOffloadingArrays(CGF, CombinedInfo, Info); 9587 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9588 Info.PointersArray, Info.SizesArray, 9589 Info.MapTypesArray, Info.MappersArray, Info); 9590 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9591 InputInfo.BasePointersArray = 9592 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9593 InputInfo.PointersArray = 9594 Address(Info.PointersArray, CGM.getPointerAlign()); 9595 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9596 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 9597 MapTypesArray = Info.MapTypesArray; 9598 if (RequiresOuterTask) 9599 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9600 else 9601 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9602 }; 9603 9604 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9605 CodeGenFunction &CGF, PrePostActionTy &) { 9606 if (RequiresOuterTask) { 9607 CodeGenFunction::OMPTargetDataInfo InputInfo; 9608 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9609 } else { 9610 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9611 } 9612 }; 9613 9614 // If we have a target function ID it means that we need to support 9615 // offloading, otherwise, just execute on the host. We need to execute on host 9616 // regardless of the conditional in the if clause if, e.g., the user do not 9617 // specify target triples. 9618 if (OutlinedFnID) { 9619 if (IfCond) { 9620 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9621 } else { 9622 RegionCodeGenTy ThenRCG(TargetThenGen); 9623 ThenRCG(CGF); 9624 } 9625 } else { 9626 RegionCodeGenTy ElseRCG(TargetElseGen); 9627 ElseRCG(CGF); 9628 } 9629 } 9630 9631 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9632 StringRef ParentName) { 9633 if (!S) 9634 return; 9635 9636 // Codegen OMP target directives that offload compute to the device. 9637 bool RequiresDeviceCodegen = 9638 isa<OMPExecutableDirective>(S) && 9639 isOpenMPTargetExecutionDirective( 9640 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9641 9642 if (RequiresDeviceCodegen) { 9643 const auto &E = *cast<OMPExecutableDirective>(S); 9644 unsigned DeviceID; 9645 unsigned FileID; 9646 unsigned Line; 9647 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9648 FileID, Line); 9649 9650 // Is this a target region that should not be emitted as an entry point? If 9651 // so just signal we are done with this target region. 9652 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9653 ParentName, Line)) 9654 return; 9655 9656 switch (E.getDirectiveKind()) { 9657 case OMPD_target: 9658 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9659 cast<OMPTargetDirective>(E)); 9660 break; 9661 case OMPD_target_parallel: 9662 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9663 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9664 break; 9665 case OMPD_target_teams: 9666 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9667 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9668 break; 9669 case OMPD_target_teams_distribute: 9670 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9671 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9672 break; 9673 case OMPD_target_teams_distribute_simd: 9674 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9675 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9676 break; 9677 case OMPD_target_parallel_for: 9678 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9679 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9680 break; 9681 case OMPD_target_parallel_for_simd: 9682 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9683 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9684 break; 9685 case OMPD_target_simd: 9686 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9687 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9688 break; 9689 case OMPD_target_teams_distribute_parallel_for: 9690 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9691 CGM, ParentName, 9692 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9693 break; 9694 case OMPD_target_teams_distribute_parallel_for_simd: 9695 CodeGenFunction:: 9696 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9697 CGM, ParentName, 9698 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9699 break; 9700 case OMPD_parallel: 9701 case OMPD_for: 9702 case OMPD_parallel_for: 9703 case OMPD_parallel_master: 9704 case OMPD_parallel_sections: 9705 case OMPD_for_simd: 9706 case OMPD_parallel_for_simd: 9707 case OMPD_cancel: 9708 case OMPD_cancellation_point: 9709 case OMPD_ordered: 9710 case OMPD_threadprivate: 9711 case OMPD_allocate: 9712 case OMPD_task: 9713 case OMPD_simd: 9714 case OMPD_sections: 9715 case OMPD_section: 9716 case OMPD_single: 9717 case OMPD_master: 9718 case OMPD_critical: 9719 case OMPD_taskyield: 9720 case OMPD_barrier: 9721 case OMPD_taskwait: 9722 case OMPD_taskgroup: 9723 case OMPD_atomic: 9724 case OMPD_flush: 9725 case OMPD_depobj: 9726 case OMPD_scan: 9727 case OMPD_teams: 9728 case OMPD_target_data: 9729 case OMPD_target_exit_data: 9730 case OMPD_target_enter_data: 9731 case OMPD_distribute: 9732 case OMPD_distribute_simd: 9733 case OMPD_distribute_parallel_for: 9734 case OMPD_distribute_parallel_for_simd: 9735 case OMPD_teams_distribute: 9736 case OMPD_teams_distribute_simd: 9737 case OMPD_teams_distribute_parallel_for: 9738 case OMPD_teams_distribute_parallel_for_simd: 9739 case OMPD_target_update: 9740 case OMPD_declare_simd: 9741 case OMPD_declare_variant: 9742 case OMPD_begin_declare_variant: 9743 case OMPD_end_declare_variant: 9744 case OMPD_declare_target: 9745 case OMPD_end_declare_target: 9746 case OMPD_declare_reduction: 9747 case OMPD_declare_mapper: 9748 case OMPD_taskloop: 9749 case OMPD_taskloop_simd: 9750 case OMPD_master_taskloop: 9751 case OMPD_master_taskloop_simd: 9752 case OMPD_parallel_master_taskloop: 9753 case OMPD_parallel_master_taskloop_simd: 9754 case OMPD_requires: 9755 case OMPD_unknown: 9756 default: 9757 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9758 } 9759 return; 9760 } 9761 9762 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9763 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9764 return; 9765 9766 scanForTargetRegionsFunctions( 9767 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9768 return; 9769 } 9770 9771 // If this is a lambda function, look into its body. 9772 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9773 S = L->getBody(); 9774 9775 // Keep looking for target regions recursively. 9776 for (const Stmt *II : S->children()) 9777 scanForTargetRegionsFunctions(II, ParentName); 9778 } 9779 9780 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9781 // If emitting code for the host, we do not process FD here. Instead we do 9782 // the normal code generation. 9783 if (!CGM.getLangOpts().OpenMPIsDevice) { 9784 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9785 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9786 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9787 // Do not emit device_type(nohost) functions for the host. 9788 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9789 return true; 9790 } 9791 return false; 9792 } 9793 9794 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9795 // Try to detect target regions in the function. 9796 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9797 StringRef Name = CGM.getMangledName(GD); 9798 scanForTargetRegionsFunctions(FD->getBody(), Name); 9799 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9800 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9801 // Do not emit device_type(nohost) functions for the host. 9802 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9803 return true; 9804 } 9805 9806 // Do not to emit function if it is not marked as declare target. 9807 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9808 AlreadyEmittedTargetDecls.count(VD) == 0; 9809 } 9810 9811 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9812 if (!CGM.getLangOpts().OpenMPIsDevice) 9813 return false; 9814 9815 // Check if there are Ctors/Dtors in this declaration and look for target 9816 // regions in it. We use the complete variant to produce the kernel name 9817 // mangling. 9818 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9819 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9820 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9821 StringRef ParentName = 9822 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9823 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9824 } 9825 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9826 StringRef ParentName = 9827 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9828 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9829 } 9830 } 9831 9832 // Do not to emit variable if it is not marked as declare target. 9833 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9834 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9835 cast<VarDecl>(GD.getDecl())); 9836 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9837 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9838 HasRequiresUnifiedSharedMemory)) { 9839 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9840 return true; 9841 } 9842 return false; 9843 } 9844 9845 llvm::Constant * 9846 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9847 const VarDecl *VD) { 9848 assert(VD->getType().isConstant(CGM.getContext()) && 9849 "Expected constant variable."); 9850 StringRef VarName; 9851 llvm::Constant *Addr; 9852 llvm::GlobalValue::LinkageTypes Linkage; 9853 QualType Ty = VD->getType(); 9854 SmallString<128> Buffer; 9855 { 9856 unsigned DeviceID; 9857 unsigned FileID; 9858 unsigned Line; 9859 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9860 FileID, Line); 9861 llvm::raw_svector_ostream OS(Buffer); 9862 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9863 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9864 VarName = OS.str(); 9865 } 9866 Linkage = llvm::GlobalValue::InternalLinkage; 9867 Addr = 9868 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9869 getDefaultFirstprivateAddressSpace()); 9870 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9871 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9872 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9873 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9874 VarName, Addr, VarSize, 9875 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9876 return Addr; 9877 } 9878 9879 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9880 llvm::Constant *Addr) { 9881 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9882 !CGM.getLangOpts().OpenMPIsDevice) 9883 return; 9884 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9885 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9886 if (!Res) { 9887 if (CGM.getLangOpts().OpenMPIsDevice) { 9888 // Register non-target variables being emitted in device code (debug info 9889 // may cause this). 9890 StringRef VarName = CGM.getMangledName(VD); 9891 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9892 } 9893 return; 9894 } 9895 // Register declare target variables. 9896 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9897 StringRef VarName; 9898 CharUnits VarSize; 9899 llvm::GlobalValue::LinkageTypes Linkage; 9900 9901 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9902 !HasRequiresUnifiedSharedMemory) { 9903 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9904 VarName = CGM.getMangledName(VD); 9905 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9906 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9907 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9908 } else { 9909 VarSize = CharUnits::Zero(); 9910 } 9911 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9912 // Temp solution to prevent optimizations of the internal variables. 9913 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9914 std::string RefName = getName({VarName, "ref"}); 9915 if (!CGM.GetGlobalValue(RefName)) { 9916 llvm::Constant *AddrRef = 9917 getOrCreateInternalVariable(Addr->getType(), RefName); 9918 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9919 GVAddrRef->setConstant(/*Val=*/true); 9920 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9921 GVAddrRef->setInitializer(Addr); 9922 CGM.addCompilerUsedGlobal(GVAddrRef); 9923 } 9924 } 9925 } else { 9926 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9927 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9928 HasRequiresUnifiedSharedMemory)) && 9929 "Declare target attribute must link or to with unified memory."); 9930 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9931 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9932 else 9933 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9934 9935 if (CGM.getLangOpts().OpenMPIsDevice) { 9936 VarName = Addr->getName(); 9937 Addr = nullptr; 9938 } else { 9939 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9940 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9941 } 9942 VarSize = CGM.getPointerSize(); 9943 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9944 } 9945 9946 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9947 VarName, Addr, VarSize, Flags, Linkage); 9948 } 9949 9950 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9951 if (isa<FunctionDecl>(GD.getDecl()) || 9952 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9953 return emitTargetFunctions(GD); 9954 9955 return emitTargetGlobalVariable(GD); 9956 } 9957 9958 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9959 for (const VarDecl *VD : DeferredGlobalVariables) { 9960 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9961 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9962 if (!Res) 9963 continue; 9964 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9965 !HasRequiresUnifiedSharedMemory) { 9966 CGM.EmitGlobal(VD); 9967 } else { 9968 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9969 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9970 HasRequiresUnifiedSharedMemory)) && 9971 "Expected link clause or to clause with unified memory."); 9972 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9973 } 9974 } 9975 } 9976 9977 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9978 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9979 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9980 " Expected target-based directive."); 9981 } 9982 9983 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9984 for (const OMPClause *Clause : D->clauselists()) { 9985 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9986 HasRequiresUnifiedSharedMemory = true; 9987 } else if (const auto *AC = 9988 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9989 switch (AC->getAtomicDefaultMemOrderKind()) { 9990 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9991 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9992 break; 9993 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9994 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 9995 break; 9996 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 9997 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 9998 break; 9999 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10000 break; 10001 } 10002 } 10003 } 10004 } 10005 10006 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10007 return RequiresAtomicOrdering; 10008 } 10009 10010 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10011 LangAS &AS) { 10012 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10013 return false; 10014 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10015 switch(A->getAllocatorType()) { 10016 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10017 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10018 // Not supported, fallback to the default mem space. 10019 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10020 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10021 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10022 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10023 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10024 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10025 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10026 AS = LangAS::Default; 10027 return true; 10028 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10029 llvm_unreachable("Expected predefined allocator for the variables with the " 10030 "static storage."); 10031 } 10032 return false; 10033 } 10034 10035 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10036 return HasRequiresUnifiedSharedMemory; 10037 } 10038 10039 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10040 CodeGenModule &CGM) 10041 : CGM(CGM) { 10042 if (CGM.getLangOpts().OpenMPIsDevice) { 10043 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10044 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10045 } 10046 } 10047 10048 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10049 if (CGM.getLangOpts().OpenMPIsDevice) 10050 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10051 } 10052 10053 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10054 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10055 return true; 10056 10057 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10058 // Do not to emit function if it is marked as declare target as it was already 10059 // emitted. 10060 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10061 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10062 if (auto *F = dyn_cast_or_null<llvm::Function>( 10063 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10064 return !F->isDeclaration(); 10065 return false; 10066 } 10067 return true; 10068 } 10069 10070 return !AlreadyEmittedTargetDecls.insert(D).second; 10071 } 10072 10073 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10074 // If we don't have entries or if we are emitting code for the device, we 10075 // don't need to do anything. 10076 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10077 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10078 (OffloadEntriesInfoManager.empty() && 10079 !HasEmittedDeclareTargetRegion && 10080 !HasEmittedTargetRegion)) 10081 return nullptr; 10082 10083 // Create and register the function that handles the requires directives. 10084 ASTContext &C = CGM.getContext(); 10085 10086 llvm::Function *RequiresRegFn; 10087 { 10088 CodeGenFunction CGF(CGM); 10089 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10090 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10091 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10092 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10093 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10094 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10095 // TODO: check for other requires clauses. 10096 // The requires directive takes effect only when a target region is 10097 // present in the compilation unit. Otherwise it is ignored and not 10098 // passed to the runtime. This avoids the runtime from throwing an error 10099 // for mismatching requires clauses across compilation units that don't 10100 // contain at least 1 target region. 10101 assert((HasEmittedTargetRegion || 10102 HasEmittedDeclareTargetRegion || 10103 !OffloadEntriesInfoManager.empty()) && 10104 "Target or declare target region expected."); 10105 if (HasRequiresUnifiedSharedMemory) 10106 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10107 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10108 CGM.getModule(), OMPRTL___tgt_register_requires), 10109 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10110 CGF.FinishFunction(); 10111 } 10112 return RequiresRegFn; 10113 } 10114 10115 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10116 const OMPExecutableDirective &D, 10117 SourceLocation Loc, 10118 llvm::Function *OutlinedFn, 10119 ArrayRef<llvm::Value *> CapturedVars) { 10120 if (!CGF.HaveInsertPoint()) 10121 return; 10122 10123 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10124 CodeGenFunction::RunCleanupsScope Scope(CGF); 10125 10126 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10127 llvm::Value *Args[] = { 10128 RTLoc, 10129 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10130 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10131 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10132 RealArgs.append(std::begin(Args), std::end(Args)); 10133 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10134 10135 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10136 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10137 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10138 } 10139 10140 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10141 const Expr *NumTeams, 10142 const Expr *ThreadLimit, 10143 SourceLocation Loc) { 10144 if (!CGF.HaveInsertPoint()) 10145 return; 10146 10147 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10148 10149 llvm::Value *NumTeamsVal = 10150 NumTeams 10151 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10152 CGF.CGM.Int32Ty, /* isSigned = */ true) 10153 : CGF.Builder.getInt32(0); 10154 10155 llvm::Value *ThreadLimitVal = 10156 ThreadLimit 10157 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10158 CGF.CGM.Int32Ty, /* isSigned = */ true) 10159 : CGF.Builder.getInt32(0); 10160 10161 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10162 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10163 ThreadLimitVal}; 10164 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10165 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10166 PushNumTeamsArgs); 10167 } 10168 10169 void CGOpenMPRuntime::emitTargetDataCalls( 10170 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10171 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10172 if (!CGF.HaveInsertPoint()) 10173 return; 10174 10175 // Action used to replace the default codegen action and turn privatization 10176 // off. 10177 PrePostActionTy NoPrivAction; 10178 10179 // Generate the code for the opening of the data environment. Capture all the 10180 // arguments of the runtime call by reference because they are used in the 10181 // closing of the region. 10182 auto &&BeginThenGen = [this, &D, Device, &Info, 10183 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10184 // Fill up the arrays with all the mapped variables. 10185 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10186 10187 // Get map clause information. 10188 MappableExprsHandler MEHandler(D, CGF); 10189 MEHandler.generateAllInfo(CombinedInfo); 10190 10191 // Fill up the arrays and create the arguments. 10192 emitOffloadingArrays(CGF, CombinedInfo, Info); 10193 10194 llvm::Value *BasePointersArrayArg = nullptr; 10195 llvm::Value *PointersArrayArg = nullptr; 10196 llvm::Value *SizesArrayArg = nullptr; 10197 llvm::Value *MapTypesArrayArg = nullptr; 10198 llvm::Value *MappersArrayArg = nullptr; 10199 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10200 SizesArrayArg, MapTypesArrayArg, 10201 MappersArrayArg, Info); 10202 10203 // Emit device ID if any. 10204 llvm::Value *DeviceID = nullptr; 10205 if (Device) { 10206 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10207 CGF.Int64Ty, /*isSigned=*/true); 10208 } else { 10209 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10210 } 10211 10212 // Emit the number of elements in the offloading arrays. 10213 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10214 10215 llvm::Value *OffloadingArgs[] = { 10216 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10217 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10218 CGF.EmitRuntimeCall( 10219 OMPBuilder.getOrCreateRuntimeFunction( 10220 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10221 OffloadingArgs); 10222 10223 // If device pointer privatization is required, emit the body of the region 10224 // here. It will have to be duplicated: with and without privatization. 10225 if (!Info.CaptureDeviceAddrMap.empty()) 10226 CodeGen(CGF); 10227 }; 10228 10229 // Generate code for the closing of the data region. 10230 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10231 PrePostActionTy &) { 10232 assert(Info.isValid() && "Invalid data environment closing arguments."); 10233 10234 llvm::Value *BasePointersArrayArg = nullptr; 10235 llvm::Value *PointersArrayArg = nullptr; 10236 llvm::Value *SizesArrayArg = nullptr; 10237 llvm::Value *MapTypesArrayArg = nullptr; 10238 llvm::Value *MappersArrayArg = nullptr; 10239 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10240 SizesArrayArg, MapTypesArrayArg, 10241 MappersArrayArg, Info); 10242 10243 // Emit device ID if any. 10244 llvm::Value *DeviceID = nullptr; 10245 if (Device) { 10246 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10247 CGF.Int64Ty, /*isSigned=*/true); 10248 } else { 10249 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10250 } 10251 10252 // Emit the number of elements in the offloading arrays. 10253 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10254 10255 llvm::Value *OffloadingArgs[] = { 10256 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10257 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10258 CGF.EmitRuntimeCall( 10259 OMPBuilder.getOrCreateRuntimeFunction( 10260 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10261 OffloadingArgs); 10262 }; 10263 10264 // If we need device pointer privatization, we need to emit the body of the 10265 // region with no privatization in the 'else' branch of the conditional. 10266 // Otherwise, we don't have to do anything. 10267 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10268 PrePostActionTy &) { 10269 if (!Info.CaptureDeviceAddrMap.empty()) { 10270 CodeGen.setAction(NoPrivAction); 10271 CodeGen(CGF); 10272 } 10273 }; 10274 10275 // We don't have to do anything to close the region if the if clause evaluates 10276 // to false. 10277 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10278 10279 if (IfCond) { 10280 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10281 } else { 10282 RegionCodeGenTy RCG(BeginThenGen); 10283 RCG(CGF); 10284 } 10285 10286 // If we don't require privatization of device pointers, we emit the body in 10287 // between the runtime calls. This avoids duplicating the body code. 10288 if (Info.CaptureDeviceAddrMap.empty()) { 10289 CodeGen.setAction(NoPrivAction); 10290 CodeGen(CGF); 10291 } 10292 10293 if (IfCond) { 10294 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10295 } else { 10296 RegionCodeGenTy RCG(EndThenGen); 10297 RCG(CGF); 10298 } 10299 } 10300 10301 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10302 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10303 const Expr *Device) { 10304 if (!CGF.HaveInsertPoint()) 10305 return; 10306 10307 assert((isa<OMPTargetEnterDataDirective>(D) || 10308 isa<OMPTargetExitDataDirective>(D) || 10309 isa<OMPTargetUpdateDirective>(D)) && 10310 "Expecting either target enter, exit data, or update directives."); 10311 10312 CodeGenFunction::OMPTargetDataInfo InputInfo; 10313 llvm::Value *MapTypesArray = nullptr; 10314 // Generate the code for the opening of the data environment. 10315 auto &&ThenGen = [this, &D, Device, &InputInfo, 10316 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10317 // Emit device ID if any. 10318 llvm::Value *DeviceID = nullptr; 10319 if (Device) { 10320 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10321 CGF.Int64Ty, /*isSigned=*/true); 10322 } else { 10323 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10324 } 10325 10326 // Emit the number of elements in the offloading arrays. 10327 llvm::Constant *PointerNum = 10328 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10329 10330 llvm::Value *OffloadingArgs[] = {DeviceID, 10331 PointerNum, 10332 InputInfo.BasePointersArray.getPointer(), 10333 InputInfo.PointersArray.getPointer(), 10334 InputInfo.SizesArray.getPointer(), 10335 MapTypesArray, 10336 InputInfo.MappersArray.getPointer()}; 10337 10338 // Select the right runtime function call for each standalone 10339 // directive. 10340 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10341 RuntimeFunction RTLFn; 10342 switch (D.getDirectiveKind()) { 10343 case OMPD_target_enter_data: 10344 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10345 : OMPRTL___tgt_target_data_begin_mapper; 10346 break; 10347 case OMPD_target_exit_data: 10348 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10349 : OMPRTL___tgt_target_data_end_mapper; 10350 break; 10351 case OMPD_target_update: 10352 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10353 : OMPRTL___tgt_target_data_update_mapper; 10354 break; 10355 case OMPD_parallel: 10356 case OMPD_for: 10357 case OMPD_parallel_for: 10358 case OMPD_parallel_master: 10359 case OMPD_parallel_sections: 10360 case OMPD_for_simd: 10361 case OMPD_parallel_for_simd: 10362 case OMPD_cancel: 10363 case OMPD_cancellation_point: 10364 case OMPD_ordered: 10365 case OMPD_threadprivate: 10366 case OMPD_allocate: 10367 case OMPD_task: 10368 case OMPD_simd: 10369 case OMPD_sections: 10370 case OMPD_section: 10371 case OMPD_single: 10372 case OMPD_master: 10373 case OMPD_critical: 10374 case OMPD_taskyield: 10375 case OMPD_barrier: 10376 case OMPD_taskwait: 10377 case OMPD_taskgroup: 10378 case OMPD_atomic: 10379 case OMPD_flush: 10380 case OMPD_depobj: 10381 case OMPD_scan: 10382 case OMPD_teams: 10383 case OMPD_target_data: 10384 case OMPD_distribute: 10385 case OMPD_distribute_simd: 10386 case OMPD_distribute_parallel_for: 10387 case OMPD_distribute_parallel_for_simd: 10388 case OMPD_teams_distribute: 10389 case OMPD_teams_distribute_simd: 10390 case OMPD_teams_distribute_parallel_for: 10391 case OMPD_teams_distribute_parallel_for_simd: 10392 case OMPD_declare_simd: 10393 case OMPD_declare_variant: 10394 case OMPD_begin_declare_variant: 10395 case OMPD_end_declare_variant: 10396 case OMPD_declare_target: 10397 case OMPD_end_declare_target: 10398 case OMPD_declare_reduction: 10399 case OMPD_declare_mapper: 10400 case OMPD_taskloop: 10401 case OMPD_taskloop_simd: 10402 case OMPD_master_taskloop: 10403 case OMPD_master_taskloop_simd: 10404 case OMPD_parallel_master_taskloop: 10405 case OMPD_parallel_master_taskloop_simd: 10406 case OMPD_target: 10407 case OMPD_target_simd: 10408 case OMPD_target_teams_distribute: 10409 case OMPD_target_teams_distribute_simd: 10410 case OMPD_target_teams_distribute_parallel_for: 10411 case OMPD_target_teams_distribute_parallel_for_simd: 10412 case OMPD_target_teams: 10413 case OMPD_target_parallel: 10414 case OMPD_target_parallel_for: 10415 case OMPD_target_parallel_for_simd: 10416 case OMPD_requires: 10417 case OMPD_unknown: 10418 default: 10419 llvm_unreachable("Unexpected standalone target data directive."); 10420 break; 10421 } 10422 CGF.EmitRuntimeCall( 10423 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10424 OffloadingArgs); 10425 }; 10426 10427 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10428 CodeGenFunction &CGF, PrePostActionTy &) { 10429 // Fill up the arrays with all the mapped variables. 10430 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10431 10432 // Get map clause information. 10433 MappableExprsHandler MEHandler(D, CGF); 10434 MEHandler.generateAllInfo(CombinedInfo); 10435 10436 TargetDataInfo Info; 10437 // Fill up the arrays and create the arguments. 10438 emitOffloadingArrays(CGF, CombinedInfo, Info); 10439 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10440 Info.PointersArray, Info.SizesArray, 10441 Info.MapTypesArray, Info.MappersArray, Info); 10442 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10443 InputInfo.BasePointersArray = 10444 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10445 InputInfo.PointersArray = 10446 Address(Info.PointersArray, CGM.getPointerAlign()); 10447 InputInfo.SizesArray = 10448 Address(Info.SizesArray, CGM.getPointerAlign()); 10449 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10450 MapTypesArray = Info.MapTypesArray; 10451 if (D.hasClausesOfKind<OMPDependClause>()) 10452 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10453 else 10454 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10455 }; 10456 10457 if (IfCond) { 10458 emitIfClause(CGF, IfCond, TargetThenGen, 10459 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10460 } else { 10461 RegionCodeGenTy ThenRCG(TargetThenGen); 10462 ThenRCG(CGF); 10463 } 10464 } 10465 10466 namespace { 10467 /// Kind of parameter in a function with 'declare simd' directive. 10468 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10469 /// Attribute set of the parameter. 10470 struct ParamAttrTy { 10471 ParamKindTy Kind = Vector; 10472 llvm::APSInt StrideOrArg; 10473 llvm::APSInt Alignment; 10474 }; 10475 } // namespace 10476 10477 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10478 ArrayRef<ParamAttrTy> ParamAttrs) { 10479 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10480 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10481 // of that clause. The VLEN value must be power of 2. 10482 // In other case the notion of the function`s "characteristic data type" (CDT) 10483 // is used to compute the vector length. 10484 // CDT is defined in the following order: 10485 // a) For non-void function, the CDT is the return type. 10486 // b) If the function has any non-uniform, non-linear parameters, then the 10487 // CDT is the type of the first such parameter. 10488 // c) If the CDT determined by a) or b) above is struct, union, or class 10489 // type which is pass-by-value (except for the type that maps to the 10490 // built-in complex data type), the characteristic data type is int. 10491 // d) If none of the above three cases is applicable, the CDT is int. 10492 // The VLEN is then determined based on the CDT and the size of vector 10493 // register of that ISA for which current vector version is generated. The 10494 // VLEN is computed using the formula below: 10495 // VLEN = sizeof(vector_register) / sizeof(CDT), 10496 // where vector register size specified in section 3.2.1 Registers and the 10497 // Stack Frame of original AMD64 ABI document. 10498 QualType RetType = FD->getReturnType(); 10499 if (RetType.isNull()) 10500 return 0; 10501 ASTContext &C = FD->getASTContext(); 10502 QualType CDT; 10503 if (!RetType.isNull() && !RetType->isVoidType()) { 10504 CDT = RetType; 10505 } else { 10506 unsigned Offset = 0; 10507 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10508 if (ParamAttrs[Offset].Kind == Vector) 10509 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10510 ++Offset; 10511 } 10512 if (CDT.isNull()) { 10513 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10514 if (ParamAttrs[I + Offset].Kind == Vector) { 10515 CDT = FD->getParamDecl(I)->getType(); 10516 break; 10517 } 10518 } 10519 } 10520 } 10521 if (CDT.isNull()) 10522 CDT = C.IntTy; 10523 CDT = CDT->getCanonicalTypeUnqualified(); 10524 if (CDT->isRecordType() || CDT->isUnionType()) 10525 CDT = C.IntTy; 10526 return C.getTypeSize(CDT); 10527 } 10528 10529 static void 10530 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10531 const llvm::APSInt &VLENVal, 10532 ArrayRef<ParamAttrTy> ParamAttrs, 10533 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10534 struct ISADataTy { 10535 char ISA; 10536 unsigned VecRegSize; 10537 }; 10538 ISADataTy ISAData[] = { 10539 { 10540 'b', 128 10541 }, // SSE 10542 { 10543 'c', 256 10544 }, // AVX 10545 { 10546 'd', 256 10547 }, // AVX2 10548 { 10549 'e', 512 10550 }, // AVX512 10551 }; 10552 llvm::SmallVector<char, 2> Masked; 10553 switch (State) { 10554 case OMPDeclareSimdDeclAttr::BS_Undefined: 10555 Masked.push_back('N'); 10556 Masked.push_back('M'); 10557 break; 10558 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10559 Masked.push_back('N'); 10560 break; 10561 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10562 Masked.push_back('M'); 10563 break; 10564 } 10565 for (char Mask : Masked) { 10566 for (const ISADataTy &Data : ISAData) { 10567 SmallString<256> Buffer; 10568 llvm::raw_svector_ostream Out(Buffer); 10569 Out << "_ZGV" << Data.ISA << Mask; 10570 if (!VLENVal) { 10571 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10572 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10573 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10574 } else { 10575 Out << VLENVal; 10576 } 10577 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10578 switch (ParamAttr.Kind){ 10579 case LinearWithVarStride: 10580 Out << 's' << ParamAttr.StrideOrArg; 10581 break; 10582 case Linear: 10583 Out << 'l'; 10584 if (ParamAttr.StrideOrArg != 1) 10585 Out << ParamAttr.StrideOrArg; 10586 break; 10587 case Uniform: 10588 Out << 'u'; 10589 break; 10590 case Vector: 10591 Out << 'v'; 10592 break; 10593 } 10594 if (!!ParamAttr.Alignment) 10595 Out << 'a' << ParamAttr.Alignment; 10596 } 10597 Out << '_' << Fn->getName(); 10598 Fn->addFnAttr(Out.str()); 10599 } 10600 } 10601 } 10602 10603 // This are the Functions that are needed to mangle the name of the 10604 // vector functions generated by the compiler, according to the rules 10605 // defined in the "Vector Function ABI specifications for AArch64", 10606 // available at 10607 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10608 10609 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10610 /// 10611 /// TODO: Need to implement the behavior for reference marked with a 10612 /// var or no linear modifiers (1.b in the section). For this, we 10613 /// need to extend ParamKindTy to support the linear modifiers. 10614 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10615 QT = QT.getCanonicalType(); 10616 10617 if (QT->isVoidType()) 10618 return false; 10619 10620 if (Kind == ParamKindTy::Uniform) 10621 return false; 10622 10623 if (Kind == ParamKindTy::Linear) 10624 return false; 10625 10626 // TODO: Handle linear references with modifiers 10627 10628 if (Kind == ParamKindTy::LinearWithVarStride) 10629 return false; 10630 10631 return true; 10632 } 10633 10634 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10635 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10636 QT = QT.getCanonicalType(); 10637 unsigned Size = C.getTypeSize(QT); 10638 10639 // Only scalars and complex within 16 bytes wide set PVB to true. 10640 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10641 return false; 10642 10643 if (QT->isFloatingType()) 10644 return true; 10645 10646 if (QT->isIntegerType()) 10647 return true; 10648 10649 if (QT->isPointerType()) 10650 return true; 10651 10652 // TODO: Add support for complex types (section 3.1.2, item 2). 10653 10654 return false; 10655 } 10656 10657 /// Computes the lane size (LS) of a return type or of an input parameter, 10658 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10659 /// TODO: Add support for references, section 3.2.1, item 1. 10660 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10661 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10662 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10663 if (getAArch64PBV(PTy, C)) 10664 return C.getTypeSize(PTy); 10665 } 10666 if (getAArch64PBV(QT, C)) 10667 return C.getTypeSize(QT); 10668 10669 return C.getTypeSize(C.getUIntPtrType()); 10670 } 10671 10672 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10673 // signature of the scalar function, as defined in 3.2.2 of the 10674 // AAVFABI. 10675 static std::tuple<unsigned, unsigned, bool> 10676 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10677 QualType RetType = FD->getReturnType().getCanonicalType(); 10678 10679 ASTContext &C = FD->getASTContext(); 10680 10681 bool OutputBecomesInput = false; 10682 10683 llvm::SmallVector<unsigned, 8> Sizes; 10684 if (!RetType->isVoidType()) { 10685 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10686 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10687 OutputBecomesInput = true; 10688 } 10689 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10690 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10691 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10692 } 10693 10694 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10695 // The LS of a function parameter / return value can only be a power 10696 // of 2, starting from 8 bits, up to 128. 10697 assert(std::all_of(Sizes.begin(), Sizes.end(), 10698 [](unsigned Size) { 10699 return Size == 8 || Size == 16 || Size == 32 || 10700 Size == 64 || Size == 128; 10701 }) && 10702 "Invalid size"); 10703 10704 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10705 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10706 OutputBecomesInput); 10707 } 10708 10709 /// Mangle the parameter part of the vector function name according to 10710 /// their OpenMP classification. The mangling function is defined in 10711 /// section 3.5 of the AAVFABI. 10712 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10713 SmallString<256> Buffer; 10714 llvm::raw_svector_ostream Out(Buffer); 10715 for (const auto &ParamAttr : ParamAttrs) { 10716 switch (ParamAttr.Kind) { 10717 case LinearWithVarStride: 10718 Out << "ls" << ParamAttr.StrideOrArg; 10719 break; 10720 case Linear: 10721 Out << 'l'; 10722 // Don't print the step value if it is not present or if it is 10723 // equal to 1. 10724 if (ParamAttr.StrideOrArg != 1) 10725 Out << ParamAttr.StrideOrArg; 10726 break; 10727 case Uniform: 10728 Out << 'u'; 10729 break; 10730 case Vector: 10731 Out << 'v'; 10732 break; 10733 } 10734 10735 if (!!ParamAttr.Alignment) 10736 Out << 'a' << ParamAttr.Alignment; 10737 } 10738 10739 return std::string(Out.str()); 10740 } 10741 10742 // Function used to add the attribute. The parameter `VLEN` is 10743 // templated to allow the use of "x" when targeting scalable functions 10744 // for SVE. 10745 template <typename T> 10746 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10747 char ISA, StringRef ParSeq, 10748 StringRef MangledName, bool OutputBecomesInput, 10749 llvm::Function *Fn) { 10750 SmallString<256> Buffer; 10751 llvm::raw_svector_ostream Out(Buffer); 10752 Out << Prefix << ISA << LMask << VLEN; 10753 if (OutputBecomesInput) 10754 Out << "v"; 10755 Out << ParSeq << "_" << MangledName; 10756 Fn->addFnAttr(Out.str()); 10757 } 10758 10759 // Helper function to generate the Advanced SIMD names depending on 10760 // the value of the NDS when simdlen is not present. 10761 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10762 StringRef Prefix, char ISA, 10763 StringRef ParSeq, StringRef MangledName, 10764 bool OutputBecomesInput, 10765 llvm::Function *Fn) { 10766 switch (NDS) { 10767 case 8: 10768 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10769 OutputBecomesInput, Fn); 10770 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10771 OutputBecomesInput, Fn); 10772 break; 10773 case 16: 10774 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10775 OutputBecomesInput, Fn); 10776 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10777 OutputBecomesInput, Fn); 10778 break; 10779 case 32: 10780 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10781 OutputBecomesInput, Fn); 10782 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10783 OutputBecomesInput, Fn); 10784 break; 10785 case 64: 10786 case 128: 10787 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10788 OutputBecomesInput, Fn); 10789 break; 10790 default: 10791 llvm_unreachable("Scalar type is too wide."); 10792 } 10793 } 10794 10795 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10796 static void emitAArch64DeclareSimdFunction( 10797 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10798 ArrayRef<ParamAttrTy> ParamAttrs, 10799 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10800 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10801 10802 // Get basic data for building the vector signature. 10803 const auto Data = getNDSWDS(FD, ParamAttrs); 10804 const unsigned NDS = std::get<0>(Data); 10805 const unsigned WDS = std::get<1>(Data); 10806 const bool OutputBecomesInput = std::get<2>(Data); 10807 10808 // Check the values provided via `simdlen` by the user. 10809 // 1. A `simdlen(1)` doesn't produce vector signatures, 10810 if (UserVLEN == 1) { 10811 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10812 DiagnosticsEngine::Warning, 10813 "The clause simdlen(1) has no effect when targeting aarch64."); 10814 CGM.getDiags().Report(SLoc, DiagID); 10815 return; 10816 } 10817 10818 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10819 // Advanced SIMD output. 10820 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10821 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10822 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10823 "power of 2 when targeting Advanced SIMD."); 10824 CGM.getDiags().Report(SLoc, DiagID); 10825 return; 10826 } 10827 10828 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10829 // limits. 10830 if (ISA == 's' && UserVLEN != 0) { 10831 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10832 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10833 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10834 "lanes in the architectural constraints " 10835 "for SVE (min is 128-bit, max is " 10836 "2048-bit, by steps of 128-bit)"); 10837 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10838 return; 10839 } 10840 } 10841 10842 // Sort out parameter sequence. 10843 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10844 StringRef Prefix = "_ZGV"; 10845 // Generate simdlen from user input (if any). 10846 if (UserVLEN) { 10847 if (ISA == 's') { 10848 // SVE generates only a masked function. 10849 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10850 OutputBecomesInput, Fn); 10851 } else { 10852 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10853 // Advanced SIMD generates one or two functions, depending on 10854 // the `[not]inbranch` clause. 10855 switch (State) { 10856 case OMPDeclareSimdDeclAttr::BS_Undefined: 10857 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10858 OutputBecomesInput, Fn); 10859 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10860 OutputBecomesInput, Fn); 10861 break; 10862 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10863 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10864 OutputBecomesInput, Fn); 10865 break; 10866 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10867 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10868 OutputBecomesInput, Fn); 10869 break; 10870 } 10871 } 10872 } else { 10873 // If no user simdlen is provided, follow the AAVFABI rules for 10874 // generating the vector length. 10875 if (ISA == 's') { 10876 // SVE, section 3.4.1, item 1. 10877 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10878 OutputBecomesInput, Fn); 10879 } else { 10880 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10881 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10882 // two vector names depending on the use of the clause 10883 // `[not]inbranch`. 10884 switch (State) { 10885 case OMPDeclareSimdDeclAttr::BS_Undefined: 10886 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10887 OutputBecomesInput, Fn); 10888 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10889 OutputBecomesInput, Fn); 10890 break; 10891 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10892 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10893 OutputBecomesInput, Fn); 10894 break; 10895 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10896 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10897 OutputBecomesInput, Fn); 10898 break; 10899 } 10900 } 10901 } 10902 } 10903 10904 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10905 llvm::Function *Fn) { 10906 ASTContext &C = CGM.getContext(); 10907 FD = FD->getMostRecentDecl(); 10908 // Map params to their positions in function decl. 10909 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10910 if (isa<CXXMethodDecl>(FD)) 10911 ParamPositions.try_emplace(FD, 0); 10912 unsigned ParamPos = ParamPositions.size(); 10913 for (const ParmVarDecl *P : FD->parameters()) { 10914 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10915 ++ParamPos; 10916 } 10917 while (FD) { 10918 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10919 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10920 // Mark uniform parameters. 10921 for (const Expr *E : Attr->uniforms()) { 10922 E = E->IgnoreParenImpCasts(); 10923 unsigned Pos; 10924 if (isa<CXXThisExpr>(E)) { 10925 Pos = ParamPositions[FD]; 10926 } else { 10927 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10928 ->getCanonicalDecl(); 10929 Pos = ParamPositions[PVD]; 10930 } 10931 ParamAttrs[Pos].Kind = Uniform; 10932 } 10933 // Get alignment info. 10934 auto NI = Attr->alignments_begin(); 10935 for (const Expr *E : Attr->aligneds()) { 10936 E = E->IgnoreParenImpCasts(); 10937 unsigned Pos; 10938 QualType ParmTy; 10939 if (isa<CXXThisExpr>(E)) { 10940 Pos = ParamPositions[FD]; 10941 ParmTy = E->getType(); 10942 } else { 10943 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10944 ->getCanonicalDecl(); 10945 Pos = ParamPositions[PVD]; 10946 ParmTy = PVD->getType(); 10947 } 10948 ParamAttrs[Pos].Alignment = 10949 (*NI) 10950 ? (*NI)->EvaluateKnownConstInt(C) 10951 : llvm::APSInt::getUnsigned( 10952 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10953 .getQuantity()); 10954 ++NI; 10955 } 10956 // Mark linear parameters. 10957 auto SI = Attr->steps_begin(); 10958 auto MI = Attr->modifiers_begin(); 10959 for (const Expr *E : Attr->linears()) { 10960 E = E->IgnoreParenImpCasts(); 10961 unsigned Pos; 10962 // Rescaling factor needed to compute the linear parameter 10963 // value in the mangled name. 10964 unsigned PtrRescalingFactor = 1; 10965 if (isa<CXXThisExpr>(E)) { 10966 Pos = ParamPositions[FD]; 10967 } else { 10968 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10969 ->getCanonicalDecl(); 10970 Pos = ParamPositions[PVD]; 10971 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 10972 PtrRescalingFactor = CGM.getContext() 10973 .getTypeSizeInChars(P->getPointeeType()) 10974 .getQuantity(); 10975 } 10976 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10977 ParamAttr.Kind = Linear; 10978 // Assuming a stride of 1, for `linear` without modifiers. 10979 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 10980 if (*SI) { 10981 Expr::EvalResult Result; 10982 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10983 if (const auto *DRE = 10984 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10985 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10986 ParamAttr.Kind = LinearWithVarStride; 10987 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10988 ParamPositions[StridePVD->getCanonicalDecl()]); 10989 } 10990 } 10991 } else { 10992 ParamAttr.StrideOrArg = Result.Val.getInt(); 10993 } 10994 } 10995 // If we are using a linear clause on a pointer, we need to 10996 // rescale the value of linear_step with the byte size of the 10997 // pointee type. 10998 if (Linear == ParamAttr.Kind) 10999 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11000 ++SI; 11001 ++MI; 11002 } 11003 llvm::APSInt VLENVal; 11004 SourceLocation ExprLoc; 11005 const Expr *VLENExpr = Attr->getSimdlen(); 11006 if (VLENExpr) { 11007 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11008 ExprLoc = VLENExpr->getExprLoc(); 11009 } 11010 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11011 if (CGM.getTriple().isX86()) { 11012 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11013 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11014 unsigned VLEN = VLENVal.getExtValue(); 11015 StringRef MangledName = Fn->getName(); 11016 if (CGM.getTarget().hasFeature("sve")) 11017 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11018 MangledName, 's', 128, Fn, ExprLoc); 11019 if (CGM.getTarget().hasFeature("neon")) 11020 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11021 MangledName, 'n', 128, Fn, ExprLoc); 11022 } 11023 } 11024 FD = FD->getPreviousDecl(); 11025 } 11026 } 11027 11028 namespace { 11029 /// Cleanup action for doacross support. 11030 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11031 public: 11032 static const int DoacrossFinArgs = 2; 11033 11034 private: 11035 llvm::FunctionCallee RTLFn; 11036 llvm::Value *Args[DoacrossFinArgs]; 11037 11038 public: 11039 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11040 ArrayRef<llvm::Value *> CallArgs) 11041 : RTLFn(RTLFn) { 11042 assert(CallArgs.size() == DoacrossFinArgs); 11043 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11044 } 11045 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11046 if (!CGF.HaveInsertPoint()) 11047 return; 11048 CGF.EmitRuntimeCall(RTLFn, Args); 11049 } 11050 }; 11051 } // namespace 11052 11053 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11054 const OMPLoopDirective &D, 11055 ArrayRef<Expr *> NumIterations) { 11056 if (!CGF.HaveInsertPoint()) 11057 return; 11058 11059 ASTContext &C = CGM.getContext(); 11060 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11061 RecordDecl *RD; 11062 if (KmpDimTy.isNull()) { 11063 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11064 // kmp_int64 lo; // lower 11065 // kmp_int64 up; // upper 11066 // kmp_int64 st; // stride 11067 // }; 11068 RD = C.buildImplicitRecord("kmp_dim"); 11069 RD->startDefinition(); 11070 addFieldToRecordDecl(C, RD, Int64Ty); 11071 addFieldToRecordDecl(C, RD, Int64Ty); 11072 addFieldToRecordDecl(C, RD, Int64Ty); 11073 RD->completeDefinition(); 11074 KmpDimTy = C.getRecordType(RD); 11075 } else { 11076 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11077 } 11078 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11079 QualType ArrayTy = 11080 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11081 11082 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11083 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11084 enum { LowerFD = 0, UpperFD, StrideFD }; 11085 // Fill dims with data. 11086 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11087 LValue DimsLVal = CGF.MakeAddrLValue( 11088 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11089 // dims.upper = num_iterations; 11090 LValue UpperLVal = CGF.EmitLValueForField( 11091 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11092 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11093 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11094 Int64Ty, NumIterations[I]->getExprLoc()); 11095 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11096 // dims.stride = 1; 11097 LValue StrideLVal = CGF.EmitLValueForField( 11098 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11099 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11100 StrideLVal); 11101 } 11102 11103 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11104 // kmp_int32 num_dims, struct kmp_dim * dims); 11105 llvm::Value *Args[] = { 11106 emitUpdateLocation(CGF, D.getBeginLoc()), 11107 getThreadID(CGF, D.getBeginLoc()), 11108 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11109 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11110 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11111 CGM.VoidPtrTy)}; 11112 11113 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11114 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11115 CGF.EmitRuntimeCall(RTLFn, Args); 11116 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11117 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11118 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11119 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11120 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11121 llvm::makeArrayRef(FiniArgs)); 11122 } 11123 11124 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11125 const OMPDependClause *C) { 11126 QualType Int64Ty = 11127 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11128 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11129 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11130 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11131 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11132 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11133 const Expr *CounterVal = C->getLoopData(I); 11134 assert(CounterVal); 11135 llvm::Value *CntVal = CGF.EmitScalarConversion( 11136 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11137 CounterVal->getExprLoc()); 11138 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11139 /*Volatile=*/false, Int64Ty); 11140 } 11141 llvm::Value *Args[] = { 11142 emitUpdateLocation(CGF, C->getBeginLoc()), 11143 getThreadID(CGF, C->getBeginLoc()), 11144 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11145 llvm::FunctionCallee RTLFn; 11146 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11147 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11148 OMPRTL___kmpc_doacross_post); 11149 } else { 11150 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11151 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11152 OMPRTL___kmpc_doacross_wait); 11153 } 11154 CGF.EmitRuntimeCall(RTLFn, Args); 11155 } 11156 11157 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11158 llvm::FunctionCallee Callee, 11159 ArrayRef<llvm::Value *> Args) const { 11160 assert(Loc.isValid() && "Outlined function call location must be valid."); 11161 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11162 11163 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11164 if (Fn->doesNotThrow()) { 11165 CGF.EmitNounwindRuntimeCall(Fn, Args); 11166 return; 11167 } 11168 } 11169 CGF.EmitRuntimeCall(Callee, Args); 11170 } 11171 11172 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11173 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11174 ArrayRef<llvm::Value *> Args) const { 11175 emitCall(CGF, Loc, OutlinedFn, Args); 11176 } 11177 11178 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11179 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11180 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11181 HasEmittedDeclareTargetRegion = true; 11182 } 11183 11184 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11185 const VarDecl *NativeParam, 11186 const VarDecl *TargetParam) const { 11187 return CGF.GetAddrOfLocalVar(NativeParam); 11188 } 11189 11190 namespace { 11191 /// Cleanup action for allocate support. 11192 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11193 public: 11194 static const int CleanupArgs = 3; 11195 11196 private: 11197 llvm::FunctionCallee RTLFn; 11198 llvm::Value *Args[CleanupArgs]; 11199 11200 public: 11201 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11202 ArrayRef<llvm::Value *> CallArgs) 11203 : RTLFn(RTLFn) { 11204 assert(CallArgs.size() == CleanupArgs && 11205 "Size of arguments does not match."); 11206 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11207 } 11208 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11209 if (!CGF.HaveInsertPoint()) 11210 return; 11211 CGF.EmitRuntimeCall(RTLFn, Args); 11212 } 11213 }; 11214 } // namespace 11215 11216 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11217 const VarDecl *VD) { 11218 if (!VD) 11219 return Address::invalid(); 11220 const VarDecl *CVD = VD->getCanonicalDecl(); 11221 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11222 return Address::invalid(); 11223 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11224 // Use the default allocation. 11225 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11226 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11227 !AA->getAllocator()) 11228 return Address::invalid(); 11229 llvm::Value *Size; 11230 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11231 if (CVD->getType()->isVariablyModifiedType()) { 11232 Size = CGF.getTypeSize(CVD->getType()); 11233 // Align the size: ((size + align - 1) / align) * align 11234 Size = CGF.Builder.CreateNUWAdd( 11235 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11236 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11237 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11238 } else { 11239 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11240 Size = CGM.getSize(Sz.alignTo(Align)); 11241 } 11242 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11243 assert(AA->getAllocator() && 11244 "Expected allocator expression for non-default allocator."); 11245 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11246 // According to the standard, the original allocator type is a enum (integer). 11247 // Convert to pointer type, if required. 11248 if (Allocator->getType()->isIntegerTy()) 11249 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11250 else if (Allocator->getType()->isPointerTy()) 11251 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11252 CGM.VoidPtrTy); 11253 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11254 11255 llvm::Value *Addr = 11256 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11257 CGM.getModule(), OMPRTL___kmpc_alloc), 11258 Args, getName({CVD->getName(), ".void.addr"})); 11259 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11260 Allocator}; 11261 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11262 CGM.getModule(), OMPRTL___kmpc_free); 11263 11264 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11265 llvm::makeArrayRef(FiniArgs)); 11266 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11267 Addr, 11268 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11269 getName({CVD->getName(), ".addr"})); 11270 return Address(Addr, Align); 11271 } 11272 11273 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11274 CodeGenModule &CGM, const OMPLoopDirective &S) 11275 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11276 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11277 if (!NeedToPush) 11278 return; 11279 NontemporalDeclsSet &DS = 11280 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11281 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11282 for (const Stmt *Ref : C->private_refs()) { 11283 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11284 const ValueDecl *VD; 11285 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11286 VD = DRE->getDecl(); 11287 } else { 11288 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11289 assert((ME->isImplicitCXXThis() || 11290 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11291 "Expected member of current class."); 11292 VD = ME->getMemberDecl(); 11293 } 11294 DS.insert(VD); 11295 } 11296 } 11297 } 11298 11299 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11300 if (!NeedToPush) 11301 return; 11302 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11303 } 11304 11305 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11306 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11307 11308 return llvm::any_of( 11309 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11310 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11311 } 11312 11313 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11314 const OMPExecutableDirective &S, 11315 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11316 const { 11317 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11318 // Vars in target/task regions must be excluded completely. 11319 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11320 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11321 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11322 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11323 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11324 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11325 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11326 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11327 } 11328 } 11329 // Exclude vars in private clauses. 11330 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11331 for (const Expr *Ref : C->varlists()) { 11332 if (!Ref->getType()->isScalarType()) 11333 continue; 11334 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11335 if (!DRE) 11336 continue; 11337 NeedToCheckForLPCs.insert(DRE->getDecl()); 11338 } 11339 } 11340 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11341 for (const Expr *Ref : C->varlists()) { 11342 if (!Ref->getType()->isScalarType()) 11343 continue; 11344 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11345 if (!DRE) 11346 continue; 11347 NeedToCheckForLPCs.insert(DRE->getDecl()); 11348 } 11349 } 11350 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11351 for (const Expr *Ref : C->varlists()) { 11352 if (!Ref->getType()->isScalarType()) 11353 continue; 11354 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11355 if (!DRE) 11356 continue; 11357 NeedToCheckForLPCs.insert(DRE->getDecl()); 11358 } 11359 } 11360 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11361 for (const Expr *Ref : C->varlists()) { 11362 if (!Ref->getType()->isScalarType()) 11363 continue; 11364 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11365 if (!DRE) 11366 continue; 11367 NeedToCheckForLPCs.insert(DRE->getDecl()); 11368 } 11369 } 11370 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11371 for (const Expr *Ref : C->varlists()) { 11372 if (!Ref->getType()->isScalarType()) 11373 continue; 11374 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11375 if (!DRE) 11376 continue; 11377 NeedToCheckForLPCs.insert(DRE->getDecl()); 11378 } 11379 } 11380 for (const Decl *VD : NeedToCheckForLPCs) { 11381 for (const LastprivateConditionalData &Data : 11382 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11383 if (Data.DeclToUniqueName.count(VD) > 0) { 11384 if (!Data.Disabled) 11385 NeedToAddForLPCsAsDisabled.insert(VD); 11386 break; 11387 } 11388 } 11389 } 11390 } 11391 11392 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11393 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11394 : CGM(CGF.CGM), 11395 Action((CGM.getLangOpts().OpenMP >= 50 && 11396 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11397 [](const OMPLastprivateClause *C) { 11398 return C->getKind() == 11399 OMPC_LASTPRIVATE_conditional; 11400 })) 11401 ? ActionToDo::PushAsLastprivateConditional 11402 : ActionToDo::DoNotPush) { 11403 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11404 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11405 return; 11406 assert(Action == ActionToDo::PushAsLastprivateConditional && 11407 "Expected a push action."); 11408 LastprivateConditionalData &Data = 11409 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11410 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11411 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11412 continue; 11413 11414 for (const Expr *Ref : C->varlists()) { 11415 Data.DeclToUniqueName.insert(std::make_pair( 11416 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11417 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11418 } 11419 } 11420 Data.IVLVal = IVLVal; 11421 Data.Fn = CGF.CurFn; 11422 } 11423 11424 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11425 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11426 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11427 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11428 if (CGM.getLangOpts().OpenMP < 50) 11429 return; 11430 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11431 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11432 if (!NeedToAddForLPCsAsDisabled.empty()) { 11433 Action = ActionToDo::DisableLastprivateConditional; 11434 LastprivateConditionalData &Data = 11435 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11436 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11437 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11438 Data.Fn = CGF.CurFn; 11439 Data.Disabled = true; 11440 } 11441 } 11442 11443 CGOpenMPRuntime::LastprivateConditionalRAII 11444 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11445 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11446 return LastprivateConditionalRAII(CGF, S); 11447 } 11448 11449 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11450 if (CGM.getLangOpts().OpenMP < 50) 11451 return; 11452 if (Action == ActionToDo::DisableLastprivateConditional) { 11453 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11454 "Expected list of disabled private vars."); 11455 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11456 } 11457 if (Action == ActionToDo::PushAsLastprivateConditional) { 11458 assert( 11459 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11460 "Expected list of lastprivate conditional vars."); 11461 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11462 } 11463 } 11464 11465 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11466 const VarDecl *VD) { 11467 ASTContext &C = CGM.getContext(); 11468 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11469 if (I == LastprivateConditionalToTypes.end()) 11470 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11471 QualType NewType; 11472 const FieldDecl *VDField; 11473 const FieldDecl *FiredField; 11474 LValue BaseLVal; 11475 auto VI = I->getSecond().find(VD); 11476 if (VI == I->getSecond().end()) { 11477 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11478 RD->startDefinition(); 11479 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11480 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11481 RD->completeDefinition(); 11482 NewType = C.getRecordType(RD); 11483 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11484 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11485 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11486 } else { 11487 NewType = std::get<0>(VI->getSecond()); 11488 VDField = std::get<1>(VI->getSecond()); 11489 FiredField = std::get<2>(VI->getSecond()); 11490 BaseLVal = std::get<3>(VI->getSecond()); 11491 } 11492 LValue FiredLVal = 11493 CGF.EmitLValueForField(BaseLVal, FiredField); 11494 CGF.EmitStoreOfScalar( 11495 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11496 FiredLVal); 11497 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11498 } 11499 11500 namespace { 11501 /// Checks if the lastprivate conditional variable is referenced in LHS. 11502 class LastprivateConditionalRefChecker final 11503 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11504 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11505 const Expr *FoundE = nullptr; 11506 const Decl *FoundD = nullptr; 11507 StringRef UniqueDeclName; 11508 LValue IVLVal; 11509 llvm::Function *FoundFn = nullptr; 11510 SourceLocation Loc; 11511 11512 public: 11513 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11514 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11515 llvm::reverse(LPM)) { 11516 auto It = D.DeclToUniqueName.find(E->getDecl()); 11517 if (It == D.DeclToUniqueName.end()) 11518 continue; 11519 if (D.Disabled) 11520 return false; 11521 FoundE = E; 11522 FoundD = E->getDecl()->getCanonicalDecl(); 11523 UniqueDeclName = It->second; 11524 IVLVal = D.IVLVal; 11525 FoundFn = D.Fn; 11526 break; 11527 } 11528 return FoundE == E; 11529 } 11530 bool VisitMemberExpr(const MemberExpr *E) { 11531 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11532 return false; 11533 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11534 llvm::reverse(LPM)) { 11535 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11536 if (It == D.DeclToUniqueName.end()) 11537 continue; 11538 if (D.Disabled) 11539 return false; 11540 FoundE = E; 11541 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11542 UniqueDeclName = It->second; 11543 IVLVal = D.IVLVal; 11544 FoundFn = D.Fn; 11545 break; 11546 } 11547 return FoundE == E; 11548 } 11549 bool VisitStmt(const Stmt *S) { 11550 for (const Stmt *Child : S->children()) { 11551 if (!Child) 11552 continue; 11553 if (const auto *E = dyn_cast<Expr>(Child)) 11554 if (!E->isGLValue()) 11555 continue; 11556 if (Visit(Child)) 11557 return true; 11558 } 11559 return false; 11560 } 11561 explicit LastprivateConditionalRefChecker( 11562 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11563 : LPM(LPM) {} 11564 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11565 getFoundData() const { 11566 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11567 } 11568 }; 11569 } // namespace 11570 11571 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11572 LValue IVLVal, 11573 StringRef UniqueDeclName, 11574 LValue LVal, 11575 SourceLocation Loc) { 11576 // Last updated loop counter for the lastprivate conditional var. 11577 // int<xx> last_iv = 0; 11578 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11579 llvm::Constant *LastIV = 11580 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11581 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11582 IVLVal.getAlignment().getAsAlign()); 11583 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11584 11585 // Last value of the lastprivate conditional. 11586 // decltype(priv_a) last_a; 11587 llvm::Constant *Last = getOrCreateInternalVariable( 11588 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11589 cast<llvm::GlobalVariable>(Last)->setAlignment( 11590 LVal.getAlignment().getAsAlign()); 11591 LValue LastLVal = 11592 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11593 11594 // Global loop counter. Required to handle inner parallel-for regions. 11595 // iv 11596 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11597 11598 // #pragma omp critical(a) 11599 // if (last_iv <= iv) { 11600 // last_iv = iv; 11601 // last_a = priv_a; 11602 // } 11603 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11604 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11605 Action.Enter(CGF); 11606 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11607 // (last_iv <= iv) ? Check if the variable is updated and store new 11608 // value in global var. 11609 llvm::Value *CmpRes; 11610 if (IVLVal.getType()->isSignedIntegerType()) { 11611 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11612 } else { 11613 assert(IVLVal.getType()->isUnsignedIntegerType() && 11614 "Loop iteration variable must be integer."); 11615 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11616 } 11617 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11618 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11619 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11620 // { 11621 CGF.EmitBlock(ThenBB); 11622 11623 // last_iv = iv; 11624 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11625 11626 // last_a = priv_a; 11627 switch (CGF.getEvaluationKind(LVal.getType())) { 11628 case TEK_Scalar: { 11629 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11630 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11631 break; 11632 } 11633 case TEK_Complex: { 11634 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11635 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11636 break; 11637 } 11638 case TEK_Aggregate: 11639 llvm_unreachable( 11640 "Aggregates are not supported in lastprivate conditional."); 11641 } 11642 // } 11643 CGF.EmitBranch(ExitBB); 11644 // There is no need to emit line number for unconditional branch. 11645 (void)ApplyDebugLocation::CreateEmpty(CGF); 11646 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11647 }; 11648 11649 if (CGM.getLangOpts().OpenMPSimd) { 11650 // Do not emit as a critical region as no parallel region could be emitted. 11651 RegionCodeGenTy ThenRCG(CodeGen); 11652 ThenRCG(CGF); 11653 } else { 11654 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11655 } 11656 } 11657 11658 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11659 const Expr *LHS) { 11660 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11661 return; 11662 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11663 if (!Checker.Visit(LHS)) 11664 return; 11665 const Expr *FoundE; 11666 const Decl *FoundD; 11667 StringRef UniqueDeclName; 11668 LValue IVLVal; 11669 llvm::Function *FoundFn; 11670 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11671 Checker.getFoundData(); 11672 if (FoundFn != CGF.CurFn) { 11673 // Special codegen for inner parallel regions. 11674 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11675 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11676 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11677 "Lastprivate conditional is not found in outer region."); 11678 QualType StructTy = std::get<0>(It->getSecond()); 11679 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11680 LValue PrivLVal = CGF.EmitLValue(FoundE); 11681 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11682 PrivLVal.getAddress(CGF), 11683 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11684 LValue BaseLVal = 11685 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11686 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11687 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11688 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11689 FiredLVal, llvm::AtomicOrdering::Unordered, 11690 /*IsVolatile=*/true, /*isInit=*/false); 11691 return; 11692 } 11693 11694 // Private address of the lastprivate conditional in the current context. 11695 // priv_a 11696 LValue LVal = CGF.EmitLValue(FoundE); 11697 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11698 FoundE->getExprLoc()); 11699 } 11700 11701 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11702 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11703 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11704 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11705 return; 11706 auto Range = llvm::reverse(LastprivateConditionalStack); 11707 auto It = llvm::find_if( 11708 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11709 if (It == Range.end() || It->Fn != CGF.CurFn) 11710 return; 11711 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11712 assert(LPCI != LastprivateConditionalToTypes.end() && 11713 "Lastprivates must be registered already."); 11714 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11715 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11716 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11717 for (const auto &Pair : It->DeclToUniqueName) { 11718 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11719 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11720 continue; 11721 auto I = LPCI->getSecond().find(Pair.first); 11722 assert(I != LPCI->getSecond().end() && 11723 "Lastprivate must be rehistered already."); 11724 // bool Cmp = priv_a.Fired != 0; 11725 LValue BaseLVal = std::get<3>(I->getSecond()); 11726 LValue FiredLVal = 11727 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11728 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11729 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11730 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11731 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11732 // if (Cmp) { 11733 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11734 CGF.EmitBlock(ThenBB); 11735 Address Addr = CGF.GetAddrOfLocalVar(VD); 11736 LValue LVal; 11737 if (VD->getType()->isReferenceType()) 11738 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11739 AlignmentSource::Decl); 11740 else 11741 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11742 AlignmentSource::Decl); 11743 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11744 D.getBeginLoc()); 11745 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11746 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11747 // } 11748 } 11749 } 11750 11751 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11752 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11753 SourceLocation Loc) { 11754 if (CGF.getLangOpts().OpenMP < 50) 11755 return; 11756 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11757 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11758 "Unknown lastprivate conditional variable."); 11759 StringRef UniqueName = It->second; 11760 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11761 // The variable was not updated in the region - exit. 11762 if (!GV) 11763 return; 11764 LValue LPLVal = CGF.MakeAddrLValue( 11765 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11766 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11767 CGF.EmitStoreOfScalar(Res, PrivLVal); 11768 } 11769 11770 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11771 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11772 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11773 llvm_unreachable("Not supported in SIMD-only mode"); 11774 } 11775 11776 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11777 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11778 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11779 llvm_unreachable("Not supported in SIMD-only mode"); 11780 } 11781 11782 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11783 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11784 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11785 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11786 bool Tied, unsigned &NumberOfParts) { 11787 llvm_unreachable("Not supported in SIMD-only mode"); 11788 } 11789 11790 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11791 SourceLocation Loc, 11792 llvm::Function *OutlinedFn, 11793 ArrayRef<llvm::Value *> CapturedVars, 11794 const Expr *IfCond) { 11795 llvm_unreachable("Not supported in SIMD-only mode"); 11796 } 11797 11798 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11799 CodeGenFunction &CGF, StringRef CriticalName, 11800 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11801 const Expr *Hint) { 11802 llvm_unreachable("Not supported in SIMD-only mode"); 11803 } 11804 11805 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11806 const RegionCodeGenTy &MasterOpGen, 11807 SourceLocation Loc) { 11808 llvm_unreachable("Not supported in SIMD-only mode"); 11809 } 11810 11811 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11812 SourceLocation Loc) { 11813 llvm_unreachable("Not supported in SIMD-only mode"); 11814 } 11815 11816 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11817 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11818 SourceLocation Loc) { 11819 llvm_unreachable("Not supported in SIMD-only mode"); 11820 } 11821 11822 void CGOpenMPSIMDRuntime::emitSingleRegion( 11823 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11824 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11825 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11826 ArrayRef<const Expr *> AssignmentOps) { 11827 llvm_unreachable("Not supported in SIMD-only mode"); 11828 } 11829 11830 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11831 const RegionCodeGenTy &OrderedOpGen, 11832 SourceLocation Loc, 11833 bool IsThreads) { 11834 llvm_unreachable("Not supported in SIMD-only mode"); 11835 } 11836 11837 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11838 SourceLocation Loc, 11839 OpenMPDirectiveKind Kind, 11840 bool EmitChecks, 11841 bool ForceSimpleCall) { 11842 llvm_unreachable("Not supported in SIMD-only mode"); 11843 } 11844 11845 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11846 CodeGenFunction &CGF, SourceLocation Loc, 11847 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11848 bool Ordered, const DispatchRTInput &DispatchValues) { 11849 llvm_unreachable("Not supported in SIMD-only mode"); 11850 } 11851 11852 void CGOpenMPSIMDRuntime::emitForStaticInit( 11853 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11854 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11855 llvm_unreachable("Not supported in SIMD-only mode"); 11856 } 11857 11858 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11859 CodeGenFunction &CGF, SourceLocation Loc, 11860 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11861 llvm_unreachable("Not supported in SIMD-only mode"); 11862 } 11863 11864 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11865 SourceLocation Loc, 11866 unsigned IVSize, 11867 bool IVSigned) { 11868 llvm_unreachable("Not supported in SIMD-only mode"); 11869 } 11870 11871 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11872 SourceLocation Loc, 11873 OpenMPDirectiveKind DKind) { 11874 llvm_unreachable("Not supported in SIMD-only mode"); 11875 } 11876 11877 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11878 SourceLocation Loc, 11879 unsigned IVSize, bool IVSigned, 11880 Address IL, Address LB, 11881 Address UB, Address ST) { 11882 llvm_unreachable("Not supported in SIMD-only mode"); 11883 } 11884 11885 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11886 llvm::Value *NumThreads, 11887 SourceLocation Loc) { 11888 llvm_unreachable("Not supported in SIMD-only mode"); 11889 } 11890 11891 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11892 ProcBindKind ProcBind, 11893 SourceLocation Loc) { 11894 llvm_unreachable("Not supported in SIMD-only mode"); 11895 } 11896 11897 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11898 const VarDecl *VD, 11899 Address VDAddr, 11900 SourceLocation Loc) { 11901 llvm_unreachable("Not supported in SIMD-only mode"); 11902 } 11903 11904 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11905 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11906 CodeGenFunction *CGF) { 11907 llvm_unreachable("Not supported in SIMD-only mode"); 11908 } 11909 11910 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11911 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11912 llvm_unreachable("Not supported in SIMD-only mode"); 11913 } 11914 11915 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11916 ArrayRef<const Expr *> Vars, 11917 SourceLocation Loc, 11918 llvm::AtomicOrdering AO) { 11919 llvm_unreachable("Not supported in SIMD-only mode"); 11920 } 11921 11922 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11923 const OMPExecutableDirective &D, 11924 llvm::Function *TaskFunction, 11925 QualType SharedsTy, Address Shareds, 11926 const Expr *IfCond, 11927 const OMPTaskDataTy &Data) { 11928 llvm_unreachable("Not supported in SIMD-only mode"); 11929 } 11930 11931 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11932 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11933 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11934 const Expr *IfCond, const OMPTaskDataTy &Data) { 11935 llvm_unreachable("Not supported in SIMD-only mode"); 11936 } 11937 11938 void CGOpenMPSIMDRuntime::emitReduction( 11939 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11940 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11941 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11942 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11943 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11944 ReductionOps, Options); 11945 } 11946 11947 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11948 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11949 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11950 llvm_unreachable("Not supported in SIMD-only mode"); 11951 } 11952 11953 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 11954 SourceLocation Loc, 11955 bool IsWorksharingReduction) { 11956 llvm_unreachable("Not supported in SIMD-only mode"); 11957 } 11958 11959 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11960 SourceLocation Loc, 11961 ReductionCodeGen &RCG, 11962 unsigned N) { 11963 llvm_unreachable("Not supported in SIMD-only mode"); 11964 } 11965 11966 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11967 SourceLocation Loc, 11968 llvm::Value *ReductionsPtr, 11969 LValue SharedLVal) { 11970 llvm_unreachable("Not supported in SIMD-only mode"); 11971 } 11972 11973 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11974 SourceLocation Loc) { 11975 llvm_unreachable("Not supported in SIMD-only mode"); 11976 } 11977 11978 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11979 CodeGenFunction &CGF, SourceLocation Loc, 11980 OpenMPDirectiveKind CancelRegion) { 11981 llvm_unreachable("Not supported in SIMD-only mode"); 11982 } 11983 11984 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11985 SourceLocation Loc, const Expr *IfCond, 11986 OpenMPDirectiveKind CancelRegion) { 11987 llvm_unreachable("Not supported in SIMD-only mode"); 11988 } 11989 11990 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11991 const OMPExecutableDirective &D, StringRef ParentName, 11992 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11993 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11994 llvm_unreachable("Not supported in SIMD-only mode"); 11995 } 11996 11997 void CGOpenMPSIMDRuntime::emitTargetCall( 11998 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11999 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12000 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12001 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12002 const OMPLoopDirective &D)> 12003 SizeEmitter) { 12004 llvm_unreachable("Not supported in SIMD-only mode"); 12005 } 12006 12007 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12008 llvm_unreachable("Not supported in SIMD-only mode"); 12009 } 12010 12011 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12012 llvm_unreachable("Not supported in SIMD-only mode"); 12013 } 12014 12015 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12016 return false; 12017 } 12018 12019 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12020 const OMPExecutableDirective &D, 12021 SourceLocation Loc, 12022 llvm::Function *OutlinedFn, 12023 ArrayRef<llvm::Value *> CapturedVars) { 12024 llvm_unreachable("Not supported in SIMD-only mode"); 12025 } 12026 12027 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12028 const Expr *NumTeams, 12029 const Expr *ThreadLimit, 12030 SourceLocation Loc) { 12031 llvm_unreachable("Not supported in SIMD-only mode"); 12032 } 12033 12034 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12035 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12036 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12037 llvm_unreachable("Not supported in SIMD-only mode"); 12038 } 12039 12040 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12041 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12042 const Expr *Device) { 12043 llvm_unreachable("Not supported in SIMD-only mode"); 12044 } 12045 12046 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12047 const OMPLoopDirective &D, 12048 ArrayRef<Expr *> NumIterations) { 12049 llvm_unreachable("Not supported in SIMD-only mode"); 12050 } 12051 12052 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12053 const OMPDependClause *C) { 12054 llvm_unreachable("Not supported in SIMD-only mode"); 12055 } 12056 12057 const VarDecl * 12058 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12059 const VarDecl *NativeParam) const { 12060 llvm_unreachable("Not supported in SIMD-only mode"); 12061 } 12062 12063 Address 12064 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12065 const VarDecl *NativeParam, 12066 const VarDecl *TargetParam) const { 12067 llvm_unreachable("Not supported in SIMD-only mode"); 12068 } 12069