1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 573 /// region. 574 class CleanupTy final : public EHScopeStack::Cleanup { 575 PrePostActionTy *Action; 576 577 public: 578 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 579 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 580 if (!CGF.HaveInsertPoint()) 581 return; 582 Action->Exit(CGF); 583 } 584 }; 585 586 } // anonymous namespace 587 588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 589 CodeGenFunction::RunCleanupsScope Scope(CGF); 590 if (PrePostAction) { 591 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 592 Callback(CodeGen, CGF, *PrePostAction); 593 } else { 594 PrePostActionTy Action; 595 Callback(CodeGen, CGF, Action); 596 } 597 } 598 599 /// Check if the combiner is a call to UDR combiner and if it is so return the 600 /// UDR decl used for reduction. 601 static const OMPDeclareReductionDecl * 602 getReductionInit(const Expr *ReductionOp) { 603 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 604 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 605 if (const auto *DRE = 606 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 607 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 608 return DRD; 609 return nullptr; 610 } 611 612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 613 const OMPDeclareReductionDecl *DRD, 614 const Expr *InitOp, 615 Address Private, Address Original, 616 QualType Ty) { 617 if (DRD->getInitializer()) { 618 std::pair<llvm::Function *, llvm::Function *> Reduction = 619 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 620 const auto *CE = cast<CallExpr>(InitOp); 621 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 622 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 623 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 624 const auto *LHSDRE = 625 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 626 const auto *RHSDRE = 627 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 628 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 629 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 630 [=]() { return Private; }); 631 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 632 [=]() { return Original; }); 633 (void)PrivateScope.Privatize(); 634 RValue Func = RValue::get(Reduction.second); 635 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 636 CGF.EmitIgnoredExpr(InitOp); 637 } else { 638 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 639 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 640 auto *GV = new llvm::GlobalVariable( 641 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 642 llvm::GlobalValue::PrivateLinkage, Init, Name); 643 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 644 RValue InitRVal; 645 switch (CGF.getEvaluationKind(Ty)) { 646 case TEK_Scalar: 647 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 648 break; 649 case TEK_Complex: 650 InitRVal = 651 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 652 break; 653 case TEK_Aggregate: 654 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 655 break; 656 } 657 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 658 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 659 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 660 /*IsInitializer=*/false); 661 } 662 } 663 664 /// Emit initialization of arrays of complex types. 665 /// \param DestAddr Address of the array. 666 /// \param Type Type of array. 667 /// \param Init Initial expression of array. 668 /// \param SrcAddr Address of the original array. 669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 670 QualType Type, bool EmitDeclareReductionInit, 671 const Expr *Init, 672 const OMPDeclareReductionDecl *DRD, 673 Address SrcAddr = Address::invalid()) { 674 // Perform element-by-element initialization. 675 QualType ElementTy; 676 677 // Drill down to the base element type on both arrays. 678 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 679 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 680 DestAddr = 681 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 682 if (DRD) 683 SrcAddr = 684 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 685 686 llvm::Value *SrcBegin = nullptr; 687 if (DRD) 688 SrcBegin = SrcAddr.getPointer(); 689 llvm::Value *DestBegin = DestAddr.getPointer(); 690 // Cast from pointer to array type to pointer to single element. 691 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 692 // The basic structure here is a while-do loop. 693 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 694 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 695 llvm::Value *IsEmpty = 696 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 697 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 698 699 // Enter the loop body, making that address the current address. 700 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 701 CGF.EmitBlock(BodyBB); 702 703 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 704 705 llvm::PHINode *SrcElementPHI = nullptr; 706 Address SrcElementCurrent = Address::invalid(); 707 if (DRD) { 708 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 709 "omp.arraycpy.srcElementPast"); 710 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 711 SrcElementCurrent = 712 Address(SrcElementPHI, 713 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 714 } 715 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 716 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 717 DestElementPHI->addIncoming(DestBegin, EntryBB); 718 Address DestElementCurrent = 719 Address(DestElementPHI, 720 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 721 722 // Emit copy. 723 { 724 CodeGenFunction::RunCleanupsScope InitScope(CGF); 725 if (EmitDeclareReductionInit) { 726 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 727 SrcElementCurrent, ElementTy); 728 } else 729 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 730 /*IsInitializer=*/false); 731 } 732 733 if (DRD) { 734 // Shift the address forward by one element. 735 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 736 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 737 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 738 } 739 740 // Shift the address forward by one element. 741 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 742 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 743 // Check whether we've reached the end. 744 llvm::Value *Done = 745 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 746 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 747 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 748 749 // Done. 750 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 751 } 752 753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 754 return CGF.EmitOMPSharedLValue(E); 755 } 756 757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 758 const Expr *E) { 759 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 760 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 761 return LValue(); 762 } 763 764 void ReductionCodeGen::emitAggregateInitialization( 765 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 766 const OMPDeclareReductionDecl *DRD) { 767 // Emit VarDecl with copy init for arrays. 768 // Get the address of the original variable captured in current 769 // captured region. 770 const auto *PrivateVD = 771 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 772 bool EmitDeclareReductionInit = 773 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 774 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 775 EmitDeclareReductionInit, 776 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 777 : PrivateVD->getInit(), 778 DRD, SharedLVal.getAddress(CGF)); 779 } 780 781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 782 ArrayRef<const Expr *> Origs, 783 ArrayRef<const Expr *> Privates, 784 ArrayRef<const Expr *> ReductionOps) { 785 ClausesData.reserve(Shareds.size()); 786 SharedAddresses.reserve(Shareds.size()); 787 Sizes.reserve(Shareds.size()); 788 BaseDecls.reserve(Shareds.size()); 789 const auto *IOrig = Origs.begin(); 790 const auto *IPriv = Privates.begin(); 791 const auto *IRed = ReductionOps.begin(); 792 for (const Expr *Ref : Shareds) { 793 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 794 std::advance(IOrig, 1); 795 std::advance(IPriv, 1); 796 std::advance(IRed, 1); 797 } 798 } 799 800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 801 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 802 "Number of generated lvalues must be exactly N."); 803 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 804 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 805 SharedAddresses.emplace_back(First, Second); 806 if (ClausesData[N].Shared == ClausesData[N].Ref) { 807 OrigAddresses.emplace_back(First, Second); 808 } else { 809 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 810 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 811 OrigAddresses.emplace_back(First, Second); 812 } 813 } 814 815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 816 const auto *PrivateVD = 817 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 818 QualType PrivateType = PrivateVD->getType(); 819 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 820 if (!PrivateType->isVariablyModifiedType()) { 821 Sizes.emplace_back( 822 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 823 nullptr); 824 return; 825 } 826 llvm::Value *Size; 827 llvm::Value *SizeInChars; 828 auto *ElemType = 829 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 830 ->getElementType(); 831 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 832 if (AsArraySection) { 833 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 834 OrigAddresses[N].first.getPointer(CGF)); 835 Size = CGF.Builder.CreateNUWAdd( 836 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 837 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 838 } else { 839 SizeInChars = 840 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 841 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 842 } 843 Sizes.emplace_back(SizeInChars, Size); 844 CodeGenFunction::OpaqueValueMapping OpaqueMap( 845 CGF, 846 cast<OpaqueValueExpr>( 847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 848 RValue::get(Size)); 849 CGF.EmitVariablyModifiedType(PrivateType); 850 } 851 852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 853 llvm::Value *Size) { 854 const auto *PrivateVD = 855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 856 QualType PrivateType = PrivateVD->getType(); 857 if (!PrivateType->isVariablyModifiedType()) { 858 assert(!Size && !Sizes[N].second && 859 "Size should be nullptr for non-variably modified reduction " 860 "items."); 861 return; 862 } 863 CodeGenFunction::OpaqueValueMapping OpaqueMap( 864 CGF, 865 cast<OpaqueValueExpr>( 866 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 867 RValue::get(Size)); 868 CGF.EmitVariablyModifiedType(PrivateType); 869 } 870 871 void ReductionCodeGen::emitInitialization( 872 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 873 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 874 assert(SharedAddresses.size() > N && "No variable was generated"); 875 const auto *PrivateVD = 876 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 877 const OMPDeclareReductionDecl *DRD = 878 getReductionInit(ClausesData[N].ReductionOp); 879 QualType PrivateType = PrivateVD->getType(); 880 PrivateAddr = CGF.Builder.CreateElementBitCast( 881 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 882 QualType SharedType = SharedAddresses[N].first.getType(); 883 SharedLVal = CGF.MakeAddrLValue( 884 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 885 CGF.ConvertTypeForMem(SharedType)), 886 SharedType, SharedAddresses[N].first.getBaseInfo(), 887 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 888 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 889 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 890 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 891 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 892 PrivateAddr, SharedLVal.getAddress(CGF), 893 SharedLVal.getType()); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 const auto *PrivateVD = 904 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 905 QualType PrivateType = PrivateVD->getType(); 906 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 907 return DTorKind != QualType::DK_none; 908 } 909 910 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 911 Address PrivateAddr) { 912 const auto *PrivateVD = 913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 914 QualType PrivateType = PrivateVD->getType(); 915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 916 if (needCleanups(N)) { 917 PrivateAddr = CGF.Builder.CreateElementBitCast( 918 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 919 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 920 } 921 } 922 923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 924 LValue BaseLV) { 925 BaseTy = BaseTy.getNonReferenceType(); 926 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 927 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 928 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 929 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 930 } else { 931 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 932 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 933 } 934 BaseTy = BaseTy->getPointeeType(); 935 } 936 return CGF.MakeAddrLValue( 937 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 938 CGF.ConvertTypeForMem(ElTy)), 939 BaseLV.getType(), BaseLV.getBaseInfo(), 940 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 941 } 942 943 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 944 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 945 llvm::Value *Addr) { 946 Address Tmp = Address::invalid(); 947 Address TopTmp = Address::invalid(); 948 Address MostTopTmp = Address::invalid(); 949 BaseTy = BaseTy.getNonReferenceType(); 950 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 951 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 952 Tmp = CGF.CreateMemTemp(BaseTy); 953 if (TopTmp.isValid()) 954 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 955 else 956 MostTopTmp = Tmp; 957 TopTmp = Tmp; 958 BaseTy = BaseTy->getPointeeType(); 959 } 960 llvm::Type *Ty = BaseLVType; 961 if (Tmp.isValid()) 962 Ty = Tmp.getElementType(); 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 964 if (Tmp.isValid()) { 965 CGF.Builder.CreateStore(Addr, Tmp); 966 return MostTopTmp; 967 } 968 return Address(Addr, BaseLVAlignment); 969 } 970 971 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 972 const VarDecl *OrigVD = nullptr; 973 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 974 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 975 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 976 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 978 Base = TempASE->getBase()->IgnoreParenImpCasts(); 979 DE = cast<DeclRefExpr>(Base); 980 OrigVD = cast<VarDecl>(DE->getDecl()); 981 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 982 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 984 Base = TempASE->getBase()->IgnoreParenImpCasts(); 985 DE = cast<DeclRefExpr>(Base); 986 OrigVD = cast<VarDecl>(DE->getDecl()); 987 } 988 return OrigVD; 989 } 990 991 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 992 Address PrivateAddr) { 993 const DeclRefExpr *DE; 994 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 995 BaseDecls.emplace_back(OrigVD); 996 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 997 LValue BaseLValue = 998 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 999 OriginalBaseLValue); 1000 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1001 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1002 llvm::Value *PrivatePointer = 1003 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1004 PrivateAddr.getPointer(), 1005 SharedAddresses[N].first.getAddress(CGF).getType()); 1006 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1007 return castToBase(CGF, OrigVD->getType(), 1008 SharedAddresses[N].first.getType(), 1009 OriginalBaseLValue.getAddress(CGF).getType(), 1010 OriginalBaseLValue.getAlignment(), Ptr); 1011 } 1012 BaseDecls.emplace_back( 1013 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1014 return PrivateAddr; 1015 } 1016 1017 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1018 const OMPDeclareReductionDecl *DRD = 1019 getReductionInit(ClausesData[N].ReductionOp); 1020 return DRD && DRD->getInitializer(); 1021 } 1022 1023 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1024 return CGF.EmitLoadOfPointerLValue( 1025 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1026 getThreadIDVariable()->getType()->castAs<PointerType>()); 1027 } 1028 1029 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1030 if (!CGF.HaveInsertPoint()) 1031 return; 1032 // 1.2.2 OpenMP Language Terminology 1033 // Structured block - An executable statement with a single entry at the 1034 // top and a single exit at the bottom. 1035 // The point of exit cannot be a branch out of the structured block. 1036 // longjmp() and throw() must not violate the entry/exit criteria. 1037 CGF.EHStack.pushTerminate(); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OffloadEntriesInfoManager(CGM) { 1064 ASTContext &C = CGM.getContext(); 1065 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1066 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1067 RD->startDefinition(); 1068 // reserved_1 1069 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1070 // flags 1071 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1072 // reserved_2 1073 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1074 // reserved_3 1075 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1076 // psource 1077 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1078 RD->completeDefinition(); 1079 IdentQTy = C.getRecordType(RD); 1080 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1081 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1082 1083 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1084 llvm::omp::types::initializeTypes(CGM.getModule()); 1085 loadOffloadInfoMetadata(); 1086 } 1087 1088 void CGOpenMPRuntime::clear() { 1089 InternalVars.clear(); 1090 // Clean non-target variable declarations possibly used only in debug info. 1091 for (const auto &Data : EmittedNonTargetVariables) { 1092 if (!Data.getValue().pointsToAliveValue()) 1093 continue; 1094 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1095 if (!GV) 1096 continue; 1097 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1098 continue; 1099 GV->eraseFromParent(); 1100 } 1101 } 1102 1103 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1104 SmallString<128> Buffer; 1105 llvm::raw_svector_ostream OS(Buffer); 1106 StringRef Sep = FirstSeparator; 1107 for (StringRef Part : Parts) { 1108 OS << Sep << Part; 1109 Sep = Separator; 1110 } 1111 return std::string(OS.str()); 1112 } 1113 1114 static llvm::Function * 1115 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1116 const Expr *CombinerInitializer, const VarDecl *In, 1117 const VarDecl *Out, bool IsCombiner) { 1118 // void .omp_combiner.(Ty *in, Ty *out); 1119 ASTContext &C = CGM.getContext(); 1120 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1121 FunctionArgList Args; 1122 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1123 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1124 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1126 Args.push_back(&OmpOutParm); 1127 Args.push_back(&OmpInParm); 1128 const CGFunctionInfo &FnInfo = 1129 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1130 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1131 std::string Name = CGM.getOpenMPRuntime().getName( 1132 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1133 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1134 Name, &CGM.getModule()); 1135 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1136 if (CGM.getLangOpts().Optimize) { 1137 Fn->removeFnAttr(llvm::Attribute::NoInline); 1138 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1139 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1140 } 1141 CodeGenFunction CGF(CGM); 1142 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1143 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1144 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1145 Out->getLocation()); 1146 CodeGenFunction::OMPPrivateScope Scope(CGF); 1147 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1148 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1153 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1154 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1155 .getAddress(CGF); 1156 }); 1157 (void)Scope.Privatize(); 1158 if (!IsCombiner && Out->hasInit() && 1159 !CGF.isTrivialInitializer(Out->getInit())) { 1160 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1161 Out->getType().getQualifiers(), 1162 /*IsInitializer=*/true); 1163 } 1164 if (CombinerInitializer) 1165 CGF.EmitIgnoredExpr(CombinerInitializer); 1166 Scope.ForceCleanup(); 1167 CGF.FinishFunction(); 1168 return Fn; 1169 } 1170 1171 void CGOpenMPRuntime::emitUserDefinedReduction( 1172 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1173 if (UDRMap.count(D) > 0) 1174 return; 1175 llvm::Function *Combiner = emitCombinerOrInitializer( 1176 CGM, D->getType(), D->getCombiner(), 1177 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1178 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1179 /*IsCombiner=*/true); 1180 llvm::Function *Initializer = nullptr; 1181 if (const Expr *Init = D->getInitializer()) { 1182 Initializer = emitCombinerOrInitializer( 1183 CGM, D->getType(), 1184 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1185 : nullptr, 1186 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1187 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1188 /*IsCombiner=*/false); 1189 } 1190 UDRMap.try_emplace(D, Combiner, Initializer); 1191 if (CGF) { 1192 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1193 Decls.second.push_back(D); 1194 } 1195 } 1196 1197 std::pair<llvm::Function *, llvm::Function *> 1198 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1199 auto I = UDRMap.find(D); 1200 if (I != UDRMap.end()) 1201 return I->second; 1202 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1203 return UDRMap.lookup(D); 1204 } 1205 1206 namespace { 1207 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1208 // Builder if one is present. 1209 struct PushAndPopStackRAII { 1210 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1211 bool HasCancel) 1212 : OMPBuilder(OMPBuilder) { 1213 if (!OMPBuilder) 1214 return; 1215 1216 // The following callback is the crucial part of clangs cleanup process. 1217 // 1218 // NOTE: 1219 // Once the OpenMPIRBuilder is used to create parallel regions (and 1220 // similar), the cancellation destination (Dest below) is determined via 1221 // IP. That means if we have variables to finalize we split the block at IP, 1222 // use the new block (=BB) as destination to build a JumpDest (via 1223 // getJumpDestInCurrentScope(BB)) which then is fed to 1224 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1225 // to push & pop an FinalizationInfo object. 1226 // The FiniCB will still be needed but at the point where the 1227 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1228 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1229 assert(IP.getBlock()->end() == IP.getPoint() && 1230 "Clang CG should cause non-terminated block!"); 1231 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1232 CGF.Builder.restoreIP(IP); 1233 CodeGenFunction::JumpDest Dest = 1234 CGF.getOMPCancelDestination(OMPD_parallel); 1235 CGF.EmitBranchThroughCleanup(Dest); 1236 }; 1237 1238 // TODO: Remove this once we emit parallel regions through the 1239 // OpenMPIRBuilder as it can do this setup internally. 1240 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1241 {FiniCB, OMPD_parallel, HasCancel}); 1242 OMPBuilder->pushFinalizationCB(std::move(FI)); 1243 } 1244 ~PushAndPopStackRAII() { 1245 if (OMPBuilder) 1246 OMPBuilder->popFinalizationCB(); 1247 } 1248 llvm::OpenMPIRBuilder *OMPBuilder; 1249 }; 1250 } // namespace 1251 1252 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1253 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1254 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1255 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1256 assert(ThreadIDVar->getType()->isPointerType() && 1257 "thread id variable must be of type kmp_int32 *"); 1258 CodeGenFunction CGF(CGM, true); 1259 bool HasCancel = false; 1260 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1261 HasCancel = OPD->hasCancel(); 1262 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1263 HasCancel = OPD->hasCancel(); 1264 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1265 HasCancel = OPSD->hasCancel(); 1266 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1267 HasCancel = OPFD->hasCancel(); 1268 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 else if (const auto *OPFD = 1276 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1277 HasCancel = OPFD->hasCancel(); 1278 1279 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1280 // parallel region to make cancellation barriers work properly. 1281 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1282 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1283 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1284 HasCancel, OutlinedHelperName); 1285 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1286 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1287 } 1288 1289 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1290 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1292 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1293 return emitParallelOrTeamsOutlinedFunction( 1294 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1295 } 1296 1297 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1298 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1299 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1300 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1301 return emitParallelOrTeamsOutlinedFunction( 1302 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1303 } 1304 1305 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1306 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1307 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1308 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1309 bool Tied, unsigned &NumberOfParts) { 1310 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1311 PrePostActionTy &) { 1312 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1313 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1314 llvm::Value *TaskArgs[] = { 1315 UpLoc, ThreadID, 1316 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1317 TaskTVar->getType()->castAs<PointerType>()) 1318 .getPointer(CGF)}; 1319 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1320 CGM.getModule(), OMPRTL___kmpc_omp_task), 1321 TaskArgs); 1322 }; 1323 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1324 UntiedCodeGen); 1325 CodeGen.setAction(Action); 1326 assert(!ThreadIDVar->getType()->isPointerType() && 1327 "thread id variable must be of type kmp_int32 for tasks"); 1328 const OpenMPDirectiveKind Region = 1329 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1330 : OMPD_task; 1331 const CapturedStmt *CS = D.getCapturedStmt(Region); 1332 bool HasCancel = false; 1333 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1336 HasCancel = TD->hasCancel(); 1337 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1338 HasCancel = TD->hasCancel(); 1339 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1340 HasCancel = TD->hasCancel(); 1341 1342 CodeGenFunction CGF(CGM, true); 1343 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1344 InnermostKind, HasCancel, Action); 1345 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1346 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1347 if (!Tied) 1348 NumberOfParts = Action.getNumberOfParts(); 1349 return Res; 1350 } 1351 1352 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1353 const RecordDecl *RD, const CGRecordLayout &RL, 1354 ArrayRef<llvm::Constant *> Data) { 1355 llvm::StructType *StructTy = RL.getLLVMType(); 1356 unsigned PrevIdx = 0; 1357 ConstantInitBuilder CIBuilder(CGM); 1358 auto DI = Data.begin(); 1359 for (const FieldDecl *FD : RD->fields()) { 1360 unsigned Idx = RL.getLLVMFieldNo(FD); 1361 // Fill the alignment. 1362 for (unsigned I = PrevIdx; I < Idx; ++I) 1363 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1364 PrevIdx = Idx + 1; 1365 Fields.add(*DI); 1366 ++DI; 1367 } 1368 } 1369 1370 template <class... As> 1371 static llvm::GlobalVariable * 1372 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1373 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1374 As &&... Args) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantInitBuilder CIBuilder(CGM); 1378 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1379 buildStructValue(Fields, CGM, RD, RL, Data); 1380 return Fields.finishAndCreateGlobal( 1381 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1382 std::forward<As>(Args)...); 1383 } 1384 1385 template <typename T> 1386 static void 1387 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1388 ArrayRef<llvm::Constant *> Data, 1389 T &Parent) { 1390 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1391 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1392 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1393 buildStructValue(Fields, CGM, RD, RL, Data); 1394 Fields.finishAndAddTo(Parent); 1395 } 1396 1397 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1398 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1399 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1400 FlagsTy FlagsKey(Flags, Reserved2Flags); 1401 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1402 if (!Entry) { 1403 if (!DefaultOpenMPPSource) { 1404 // Initialize default location for psource field of ident_t structure of 1405 // all ident_t objects. Format is ";file;function;line;column;;". 1406 // Taken from 1407 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1408 DefaultOpenMPPSource = 1409 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1410 DefaultOpenMPPSource = 1411 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1412 } 1413 1414 llvm::Constant *Data[] = { 1415 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1416 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1417 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1418 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1419 llvm::GlobalValue *DefaultOpenMPLocation = 1420 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1421 llvm::GlobalValue::PrivateLinkage); 1422 DefaultOpenMPLocation->setUnnamedAddr( 1423 llvm::GlobalValue::UnnamedAddr::Global); 1424 1425 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1426 } 1427 return Address(Entry, Align); 1428 } 1429 1430 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1431 bool AtCurrentPoint) { 1432 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1433 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1434 1435 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1436 if (AtCurrentPoint) { 1437 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1438 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1439 } else { 1440 Elem.second.ServiceInsertPt = 1441 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1442 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1443 } 1444 } 1445 1446 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1447 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1448 if (Elem.second.ServiceInsertPt) { 1449 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1450 Elem.second.ServiceInsertPt = nullptr; 1451 Ptr->eraseFromParent(); 1452 } 1453 } 1454 1455 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1456 SourceLocation Loc, 1457 unsigned Flags) { 1458 Flags |= OMP_IDENT_KMPC; 1459 // If no debug info is generated - return global default location. 1460 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1461 Loc.isInvalid()) 1462 return getOrCreateDefaultLocation(Flags).getPointer(); 1463 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 1466 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1467 Address LocValue = Address::invalid(); 1468 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1469 if (I != OpenMPLocThreadIDMap.end()) 1470 LocValue = Address(I->second.DebugLoc, Align); 1471 1472 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1473 // GetOpenMPThreadID was called before this routine. 1474 if (!LocValue.isValid()) { 1475 // Generate "ident_t .kmpc_loc.addr;" 1476 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1477 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1478 Elem.second.DebugLoc = AI.getPointer(); 1479 LocValue = AI; 1480 1481 if (!Elem.second.ServiceInsertPt) 1482 setLocThreadIdInsertPt(CGF); 1483 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1484 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1485 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1486 CGF.getTypeSize(IdentQTy)); 1487 } 1488 1489 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1490 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1491 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1492 LValue PSource = 1493 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1494 1495 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1496 if (OMPDebugLoc == nullptr) { 1497 SmallString<128> Buffer2; 1498 llvm::raw_svector_ostream OS2(Buffer2); 1499 // Build debug location 1500 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1501 OS2 << ";" << PLoc.getFilename() << ";"; 1502 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1503 OS2 << FD->getQualifiedNameAsString(); 1504 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1505 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1506 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1507 } 1508 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1509 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1510 1511 // Our callers always pass this to a runtime function, so for 1512 // convenience, go ahead and return a naked pointer. 1513 return LocValue.getPointer(); 1514 } 1515 1516 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1517 SourceLocation Loc) { 1518 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1519 1520 llvm::Value *ThreadID = nullptr; 1521 // Check whether we've already cached a load of the thread id in this 1522 // function. 1523 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1524 if (I != OpenMPLocThreadIDMap.end()) { 1525 ThreadID = I->second.ThreadID; 1526 if (ThreadID != nullptr) 1527 return ThreadID; 1528 } 1529 // If exceptions are enabled, do not use parameter to avoid possible crash. 1530 if (auto *OMPRegionInfo = 1531 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1532 if (OMPRegionInfo->getThreadIDVariable()) { 1533 // Check if this an outlined function with thread id passed as argument. 1534 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1535 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1536 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1537 !CGF.getLangOpts().CXXExceptions || 1538 CGF.Builder.GetInsertBlock() == TopBlock || 1539 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1540 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1541 TopBlock || 1542 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1543 CGF.Builder.GetInsertBlock()) { 1544 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1545 // If value loaded in entry block, cache it and use it everywhere in 1546 // function. 1547 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1548 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1549 Elem.second.ThreadID = ThreadID; 1550 } 1551 return ThreadID; 1552 } 1553 } 1554 } 1555 1556 // This is not an outlined function region - need to call __kmpc_int32 1557 // kmpc_global_thread_num(ident_t *loc). 1558 // Generate thread id value and cache this value for use across the 1559 // function. 1560 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1561 if (!Elem.second.ServiceInsertPt) 1562 setLocThreadIdInsertPt(CGF); 1563 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1564 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1565 llvm::CallInst *Call = CGF.Builder.CreateCall( 1566 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1567 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1568 emitUpdateLocation(CGF, Loc)); 1569 Call->setCallingConv(CGF.getRuntimeCC()); 1570 Elem.second.ThreadID = Call; 1571 return Call; 1572 } 1573 1574 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1575 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1576 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1577 clearLocThreadIdInsertPt(CGF); 1578 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1579 } 1580 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1581 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1582 UDRMap.erase(D); 1583 FunctionUDRMap.erase(CGF.CurFn); 1584 } 1585 auto I = FunctionUDMMap.find(CGF.CurFn); 1586 if (I != FunctionUDMMap.end()) { 1587 for(const auto *D : I->second) 1588 UDMMap.erase(D); 1589 FunctionUDMMap.erase(I); 1590 } 1591 LastprivateConditionalToTypes.erase(CGF.CurFn); 1592 } 1593 1594 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1595 return IdentTy->getPointerTo(); 1596 } 1597 1598 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1599 if (!Kmpc_MicroTy) { 1600 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1601 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1602 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1603 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1604 } 1605 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1606 } 1607 1608 llvm::FunctionCallee 1609 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1610 assert((IVSize == 32 || IVSize == 64) && 1611 "IV size is not compatible with the omp runtime"); 1612 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1613 : "__kmpc_for_static_init_4u") 1614 : (IVSigned ? "__kmpc_for_static_init_8" 1615 : "__kmpc_for_static_init_8u"); 1616 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1617 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1618 llvm::Type *TypeParams[] = { 1619 getIdentTyPointerTy(), // loc 1620 CGM.Int32Ty, // tid 1621 CGM.Int32Ty, // schedtype 1622 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1623 PtrTy, // p_lower 1624 PtrTy, // p_upper 1625 PtrTy, // p_stride 1626 ITy, // incr 1627 ITy // chunk 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 llvm::FunctionCallee 1635 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1636 assert((IVSize == 32 || IVSize == 64) && 1637 "IV size is not compatible with the omp runtime"); 1638 StringRef Name = 1639 IVSize == 32 1640 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1641 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1642 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1643 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1644 CGM.Int32Ty, // tid 1645 CGM.Int32Ty, // schedtype 1646 ITy, // lower 1647 ITy, // upper 1648 ITy, // stride 1649 ITy // chunk 1650 }; 1651 auto *FnTy = 1652 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1653 return CGM.CreateRuntimeFunction(FnTy, Name); 1654 } 1655 1656 llvm::FunctionCallee 1657 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1658 assert((IVSize == 32 || IVSize == 64) && 1659 "IV size is not compatible with the omp runtime"); 1660 StringRef Name = 1661 IVSize == 32 1662 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1663 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1664 llvm::Type *TypeParams[] = { 1665 getIdentTyPointerTy(), // loc 1666 CGM.Int32Ty, // tid 1667 }; 1668 auto *FnTy = 1669 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1670 return CGM.CreateRuntimeFunction(FnTy, Name); 1671 } 1672 1673 llvm::FunctionCallee 1674 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1675 assert((IVSize == 32 || IVSize == 64) && 1676 "IV size is not compatible with the omp runtime"); 1677 StringRef Name = 1678 IVSize == 32 1679 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1680 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1681 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1682 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1683 llvm::Type *TypeParams[] = { 1684 getIdentTyPointerTy(), // loc 1685 CGM.Int32Ty, // tid 1686 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1687 PtrTy, // p_lower 1688 PtrTy, // p_upper 1689 PtrTy // p_stride 1690 }; 1691 auto *FnTy = 1692 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1693 return CGM.CreateRuntimeFunction(FnTy, Name); 1694 } 1695 1696 /// Obtain information that uniquely identifies a target entry. This 1697 /// consists of the file and device IDs as well as line number associated with 1698 /// the relevant entry source location. 1699 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1700 unsigned &DeviceID, unsigned &FileID, 1701 unsigned &LineNum) { 1702 SourceManager &SM = C.getSourceManager(); 1703 1704 // The loc should be always valid and have a file ID (the user cannot use 1705 // #pragma directives in macros) 1706 1707 assert(Loc.isValid() && "Source location is expected to be always valid."); 1708 1709 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1710 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1711 1712 llvm::sys::fs::UniqueID ID; 1713 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1714 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1715 << PLoc.getFilename() << EC.message(); 1716 1717 DeviceID = ID.getDevice(); 1718 FileID = ID.getFile(); 1719 LineNum = PLoc.getLine(); 1720 } 1721 1722 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1723 if (CGM.getLangOpts().OpenMPSimd) 1724 return Address::invalid(); 1725 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1726 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1727 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1728 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1729 HasRequiresUnifiedSharedMemory))) { 1730 SmallString<64> PtrName; 1731 { 1732 llvm::raw_svector_ostream OS(PtrName); 1733 OS << CGM.getMangledName(GlobalDecl(VD)); 1734 if (!VD->isExternallyVisible()) { 1735 unsigned DeviceID, FileID, Line; 1736 getTargetEntryUniqueInfo(CGM.getContext(), 1737 VD->getCanonicalDecl()->getBeginLoc(), 1738 DeviceID, FileID, Line); 1739 OS << llvm::format("_%x", FileID); 1740 } 1741 OS << "_decl_tgt_ref_ptr"; 1742 } 1743 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1744 if (!Ptr) { 1745 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1746 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1747 PtrName); 1748 1749 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1750 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1751 1752 if (!CGM.getLangOpts().OpenMPIsDevice) 1753 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1754 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1755 } 1756 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1757 } 1758 return Address::invalid(); 1759 } 1760 1761 llvm::Constant * 1762 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1763 assert(!CGM.getLangOpts().OpenMPUseTLS || 1764 !CGM.getContext().getTargetInfo().isTLSSupported()); 1765 // Lookup the entry, lazily creating it if necessary. 1766 std::string Suffix = getName({"cache", ""}); 1767 return getOrCreateInternalVariable( 1768 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1769 } 1770 1771 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1772 const VarDecl *VD, 1773 Address VDAddr, 1774 SourceLocation Loc) { 1775 if (CGM.getLangOpts().OpenMPUseTLS && 1776 CGM.getContext().getTargetInfo().isTLSSupported()) 1777 return VDAddr; 1778 1779 llvm::Type *VarTy = VDAddr.getElementType(); 1780 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1781 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1782 CGM.Int8PtrTy), 1783 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1784 getOrCreateThreadPrivateCache(VD)}; 1785 return Address(CGF.EmitRuntimeCall( 1786 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1787 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1788 Args), 1789 VDAddr.getAlignment()); 1790 } 1791 1792 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1793 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1794 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1795 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1796 // library. 1797 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1798 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1799 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1800 OMPLoc); 1801 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1802 // to register constructor/destructor for variable. 1803 llvm::Value *Args[] = { 1804 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1805 Ctor, CopyCtor, Dtor}; 1806 CGF.EmitRuntimeCall( 1807 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1808 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1809 Args); 1810 } 1811 1812 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1813 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1814 bool PerformInit, CodeGenFunction *CGF) { 1815 if (CGM.getLangOpts().OpenMPUseTLS && 1816 CGM.getContext().getTargetInfo().isTLSSupported()) 1817 return nullptr; 1818 1819 VD = VD->getDefinition(CGM.getContext()); 1820 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1821 QualType ASTTy = VD->getType(); 1822 1823 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1824 const Expr *Init = VD->getAnyInitializer(); 1825 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1826 // Generate function that re-emits the declaration's initializer into the 1827 // threadprivate copy of the variable VD 1828 CodeGenFunction CtorCGF(CGM); 1829 FunctionArgList Args; 1830 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1831 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1832 ImplicitParamDecl::Other); 1833 Args.push_back(&Dst); 1834 1835 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1836 CGM.getContext().VoidPtrTy, Args); 1837 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1838 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1839 llvm::Function *Fn = 1840 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1841 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1842 Args, Loc, Loc); 1843 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1844 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1845 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1846 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1847 Arg = CtorCGF.Builder.CreateElementBitCast( 1848 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1849 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1850 /*IsInitializer=*/true); 1851 ArgVal = CtorCGF.EmitLoadOfScalar( 1852 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1853 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1854 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1855 CtorCGF.FinishFunction(); 1856 Ctor = Fn; 1857 } 1858 if (VD->getType().isDestructedType() != QualType::DK_none) { 1859 // Generate function that emits destructor call for the threadprivate copy 1860 // of the variable VD 1861 CodeGenFunction DtorCGF(CGM); 1862 FunctionArgList Args; 1863 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1864 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1865 ImplicitParamDecl::Other); 1866 Args.push_back(&Dst); 1867 1868 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1869 CGM.getContext().VoidTy, Args); 1870 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1871 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1872 llvm::Function *Fn = 1873 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1874 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1875 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1876 Loc, Loc); 1877 // Create a scope with an artificial location for the body of this function. 1878 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1879 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1880 DtorCGF.GetAddrOfLocalVar(&Dst), 1881 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1882 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1883 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1884 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1885 DtorCGF.FinishFunction(); 1886 Dtor = Fn; 1887 } 1888 // Do not emit init function if it is not required. 1889 if (!Ctor && !Dtor) 1890 return nullptr; 1891 1892 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1893 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1894 /*isVarArg=*/false) 1895 ->getPointerTo(); 1896 // Copying constructor for the threadprivate variable. 1897 // Must be NULL - reserved by runtime, but currently it requires that this 1898 // parameter is always NULL. Otherwise it fires assertion. 1899 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1900 if (Ctor == nullptr) { 1901 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1902 /*isVarArg=*/false) 1903 ->getPointerTo(); 1904 Ctor = llvm::Constant::getNullValue(CtorTy); 1905 } 1906 if (Dtor == nullptr) { 1907 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1908 /*isVarArg=*/false) 1909 ->getPointerTo(); 1910 Dtor = llvm::Constant::getNullValue(DtorTy); 1911 } 1912 if (!CGF) { 1913 auto *InitFunctionTy = 1914 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1915 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1916 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1917 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1918 CodeGenFunction InitCGF(CGM); 1919 FunctionArgList ArgList; 1920 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1921 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1922 Loc, Loc); 1923 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1924 InitCGF.FinishFunction(); 1925 return InitFunction; 1926 } 1927 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1928 } 1929 return nullptr; 1930 } 1931 1932 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1933 llvm::GlobalVariable *Addr, 1934 bool PerformInit) { 1935 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1936 !CGM.getLangOpts().OpenMPIsDevice) 1937 return false; 1938 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1939 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1940 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1941 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1942 HasRequiresUnifiedSharedMemory)) 1943 return CGM.getLangOpts().OpenMPIsDevice; 1944 VD = VD->getDefinition(CGM.getContext()); 1945 assert(VD && "Unknown VarDecl"); 1946 1947 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1948 return CGM.getLangOpts().OpenMPIsDevice; 1949 1950 QualType ASTTy = VD->getType(); 1951 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1952 1953 // Produce the unique prefix to identify the new target regions. We use 1954 // the source location of the variable declaration which we know to not 1955 // conflict with any target region. 1956 unsigned DeviceID; 1957 unsigned FileID; 1958 unsigned Line; 1959 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1960 SmallString<128> Buffer, Out; 1961 { 1962 llvm::raw_svector_ostream OS(Buffer); 1963 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1964 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1965 } 1966 1967 const Expr *Init = VD->getAnyInitializer(); 1968 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1969 llvm::Constant *Ctor; 1970 llvm::Constant *ID; 1971 if (CGM.getLangOpts().OpenMPIsDevice) { 1972 // Generate function that re-emits the declaration's initializer into 1973 // the threadprivate copy of the variable VD 1974 CodeGenFunction CtorCGF(CGM); 1975 1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1978 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1979 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1980 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1981 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1982 FunctionArgList(), Loc, Loc); 1983 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1984 CtorCGF.EmitAnyExprToMem(Init, 1985 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1986 Init->getType().getQualifiers(), 1987 /*IsInitializer=*/true); 1988 CtorCGF.FinishFunction(); 1989 Ctor = Fn; 1990 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1991 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1992 } else { 1993 Ctor = new llvm::GlobalVariable( 1994 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1995 llvm::GlobalValue::PrivateLinkage, 1996 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1997 ID = Ctor; 1998 } 1999 2000 // Register the information for the entry associated with the constructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2005 } 2006 if (VD->getType().isDestructedType() != QualType::DK_none) { 2007 llvm::Constant *Dtor; 2008 llvm::Constant *ID; 2009 if (CGM.getLangOpts().OpenMPIsDevice) { 2010 // Generate function that emits destructor call for the threadprivate 2011 // copy of the variable VD 2012 CodeGenFunction DtorCGF(CGM); 2013 2014 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2015 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2016 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 2017 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2018 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2019 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2020 FunctionArgList(), Loc, Loc); 2021 // Create a scope with an artificial location for the body of this 2022 // function. 2023 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2024 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2025 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2026 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2027 DtorCGF.FinishFunction(); 2028 Dtor = Fn; 2029 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2030 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2031 } else { 2032 Dtor = new llvm::GlobalVariable( 2033 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2034 llvm::GlobalValue::PrivateLinkage, 2035 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2036 ID = Dtor; 2037 } 2038 // Register the information for the entry associated with the destructor. 2039 Out.clear(); 2040 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2041 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2042 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2043 } 2044 return CGM.getLangOpts().OpenMPIsDevice; 2045 } 2046 2047 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2048 QualType VarType, 2049 StringRef Name) { 2050 std::string Suffix = getName({"artificial", ""}); 2051 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2052 llvm::Value *GAddr = 2053 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2054 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2055 CGM.getTarget().isTLSSupported()) { 2056 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2057 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2058 } 2059 std::string CacheSuffix = getName({"cache", ""}); 2060 llvm::Value *Args[] = { 2061 emitUpdateLocation(CGF, SourceLocation()), 2062 getThreadID(CGF, SourceLocation()), 2063 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2064 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2065 /*isSigned=*/false), 2066 getOrCreateInternalVariable( 2067 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2068 return Address( 2069 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2070 CGF.EmitRuntimeCall( 2071 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2072 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2073 Args), 2074 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2075 CGM.getContext().getTypeAlignInChars(VarType)); 2076 } 2077 2078 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2079 const RegionCodeGenTy &ThenGen, 2080 const RegionCodeGenTy &ElseGen) { 2081 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2082 2083 // If the condition constant folds and can be elided, try to avoid emitting 2084 // the condition and the dead arm of the if/else. 2085 bool CondConstant; 2086 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2087 if (CondConstant) 2088 ThenGen(CGF); 2089 else 2090 ElseGen(CGF); 2091 return; 2092 } 2093 2094 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2095 // emit the conditional branch. 2096 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2097 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2098 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2099 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2100 2101 // Emit the 'then' code. 2102 CGF.EmitBlock(ThenBlock); 2103 ThenGen(CGF); 2104 CGF.EmitBranch(ContBlock); 2105 // Emit the 'else' code if present. 2106 // There is no need to emit line number for unconditional branch. 2107 (void)ApplyDebugLocation::CreateEmpty(CGF); 2108 CGF.EmitBlock(ElseBlock); 2109 ElseGen(CGF); 2110 // There is no need to emit line number for unconditional branch. 2111 (void)ApplyDebugLocation::CreateEmpty(CGF); 2112 CGF.EmitBranch(ContBlock); 2113 // Emit the continuation block for code after the if. 2114 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2115 } 2116 2117 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2118 llvm::Function *OutlinedFn, 2119 ArrayRef<llvm::Value *> CapturedVars, 2120 const Expr *IfCond) { 2121 if (!CGF.HaveInsertPoint()) 2122 return; 2123 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2124 auto &M = CGM.getModule(); 2125 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2126 PrePostActionTy &) { 2127 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2128 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2129 llvm::Value *Args[] = { 2130 RTLoc, 2131 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2132 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2133 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2134 RealArgs.append(std::begin(Args), std::end(Args)); 2135 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2136 2137 llvm::FunctionCallee RTLFn = 2138 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2139 M, OMPRTL___kmpc_fork_call); 2140 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2141 }; 2142 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2143 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 2144 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2145 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2146 // Build calls: 2147 // __kmpc_serialized_parallel(&Loc, GTid); 2148 llvm::Value *Args[] = {RTLoc, ThreadID}; 2149 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2150 M, OMPRTL___kmpc_serialized_parallel), 2151 Args); 2152 2153 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2154 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2155 Address ZeroAddrBound = 2156 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2157 /*Name=*/".bound.zero.addr"); 2158 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2159 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2160 // ThreadId for serialized parallels is 0. 2161 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2162 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2163 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2164 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2165 2166 // __kmpc_end_serialized_parallel(&Loc, GTid); 2167 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2168 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2169 M, OMPRTL___kmpc_end_serialized_parallel), 2170 EndArgs); 2171 }; 2172 if (IfCond) { 2173 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2174 } else { 2175 RegionCodeGenTy ThenRCG(ThenGen); 2176 ThenRCG(CGF); 2177 } 2178 } 2179 2180 // If we're inside an (outlined) parallel region, use the region info's 2181 // thread-ID variable (it is passed in a first argument of the outlined function 2182 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2183 // regular serial code region, get thread ID by calling kmp_int32 2184 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2185 // return the address of that temp. 2186 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2187 SourceLocation Loc) { 2188 if (auto *OMPRegionInfo = 2189 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2190 if (OMPRegionInfo->getThreadIDVariable()) 2191 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2192 2193 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2194 QualType Int32Ty = 2195 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2196 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2197 CGF.EmitStoreOfScalar(ThreadID, 2198 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2199 2200 return ThreadIDTemp; 2201 } 2202 2203 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2204 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2205 SmallString<256> Buffer; 2206 llvm::raw_svector_ostream Out(Buffer); 2207 Out << Name; 2208 StringRef RuntimeName = Out.str(); 2209 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2210 if (Elem.second) { 2211 assert(Elem.second->getType()->getPointerElementType() == Ty && 2212 "OMP internal variable has different type than requested"); 2213 return &*Elem.second; 2214 } 2215 2216 return Elem.second = new llvm::GlobalVariable( 2217 CGM.getModule(), Ty, /*IsConstant*/ false, 2218 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2219 Elem.first(), /*InsertBefore=*/nullptr, 2220 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2221 } 2222 2223 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2224 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2225 std::string Name = getName({Prefix, "var"}); 2226 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2227 } 2228 2229 namespace { 2230 /// Common pre(post)-action for different OpenMP constructs. 2231 class CommonActionTy final : public PrePostActionTy { 2232 llvm::FunctionCallee EnterCallee; 2233 ArrayRef<llvm::Value *> EnterArgs; 2234 llvm::FunctionCallee ExitCallee; 2235 ArrayRef<llvm::Value *> ExitArgs; 2236 bool Conditional; 2237 llvm::BasicBlock *ContBlock = nullptr; 2238 2239 public: 2240 CommonActionTy(llvm::FunctionCallee EnterCallee, 2241 ArrayRef<llvm::Value *> EnterArgs, 2242 llvm::FunctionCallee ExitCallee, 2243 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2244 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2245 ExitArgs(ExitArgs), Conditional(Conditional) {} 2246 void Enter(CodeGenFunction &CGF) override { 2247 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2248 if (Conditional) { 2249 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2250 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2251 ContBlock = CGF.createBasicBlock("omp_if.end"); 2252 // Generate the branch (If-stmt) 2253 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2254 CGF.EmitBlock(ThenBlock); 2255 } 2256 } 2257 void Done(CodeGenFunction &CGF) { 2258 // Emit the rest of blocks/branches 2259 CGF.EmitBranch(ContBlock); 2260 CGF.EmitBlock(ContBlock, true); 2261 } 2262 void Exit(CodeGenFunction &CGF) override { 2263 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2264 } 2265 }; 2266 } // anonymous namespace 2267 2268 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2269 StringRef CriticalName, 2270 const RegionCodeGenTy &CriticalOpGen, 2271 SourceLocation Loc, const Expr *Hint) { 2272 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2273 // CriticalOpGen(); 2274 // __kmpc_end_critical(ident_t *, gtid, Lock); 2275 // Prepare arguments and build a call to __kmpc_critical 2276 if (!CGF.HaveInsertPoint()) 2277 return; 2278 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2279 getCriticalRegionLock(CriticalName)}; 2280 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2281 std::end(Args)); 2282 if (Hint) { 2283 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2284 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2285 } 2286 CommonActionTy Action( 2287 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2288 CGM.getModule(), 2289 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2290 EnterArgs, 2291 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_critical), 2293 Args); 2294 CriticalOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2296 } 2297 2298 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2299 const RegionCodeGenTy &MasterOpGen, 2300 SourceLocation Loc) { 2301 if (!CGF.HaveInsertPoint()) 2302 return; 2303 // if(__kmpc_master(ident_t *, gtid)) { 2304 // MasterOpGen(); 2305 // __kmpc_end_master(ident_t *, gtid); 2306 // } 2307 // Prepare arguments and build a call to __kmpc_master 2308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2309 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2310 CGM.getModule(), OMPRTL___kmpc_master), 2311 Args, 2312 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2313 CGM.getModule(), OMPRTL___kmpc_end_master), 2314 Args, 2315 /*Conditional=*/true); 2316 MasterOpGen.setAction(Action); 2317 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2318 Action.Done(CGF); 2319 } 2320 2321 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2322 SourceLocation Loc) { 2323 if (!CGF.HaveInsertPoint()) 2324 return; 2325 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2326 if (OMPBuilder) { 2327 OMPBuilder->CreateTaskyield(CGF.Builder); 2328 } else { 2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2330 llvm::Value *Args[] = { 2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2333 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2335 Args); 2336 } 2337 2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2339 Region->emitUntiedSwitch(CGF); 2340 } 2341 2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &TaskgroupOpGen, 2344 SourceLocation Loc) { 2345 if (!CGF.HaveInsertPoint()) 2346 return; 2347 // __kmpc_taskgroup(ident_t *, gtid); 2348 // TaskgroupOpGen(); 2349 // __kmpc_end_taskgroup(ident_t *, gtid); 2350 // Prepare arguments and build a call to __kmpc_taskgroup 2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2352 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2353 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2354 Args, 2355 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2357 Args); 2358 TaskgroupOpGen.setAction(Action); 2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2360 } 2361 2362 /// Given an array of pointers to variables, project the address of a 2363 /// given variable. 2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2365 unsigned Index, const VarDecl *Var) { 2366 // Pull out the pointer to the variable. 2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2369 2370 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2371 Addr = CGF.Builder.CreateElementBitCast( 2372 Addr, CGF.ConvertTypeForMem(Var->getType())); 2373 return Addr; 2374 } 2375 2376 static llvm::Value *emitCopyprivateCopyFunction( 2377 CodeGenModule &CGM, llvm::Type *ArgsType, 2378 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2379 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2380 SourceLocation Loc) { 2381 ASTContext &C = CGM.getContext(); 2382 // void copy_func(void *LHSArg, void *RHSArg); 2383 FunctionArgList Args; 2384 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2387 ImplicitParamDecl::Other); 2388 Args.push_back(&LHSArg); 2389 Args.push_back(&RHSArg); 2390 const auto &CGFI = 2391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2392 std::string Name = 2393 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2395 llvm::GlobalValue::InternalLinkage, Name, 2396 &CGM.getModule()); 2397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2398 Fn->setDoesNotRecurse(); 2399 CodeGenFunction CGF(CGM); 2400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2401 // Dest = (void*[n])(LHSArg); 2402 // Src = (void*[n])(RHSArg); 2403 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2405 ArgsType), CGF.getPointerAlign()); 2406 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2410 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2411 // ... 2412 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2414 const auto *DestVar = 2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2417 2418 const auto *SrcVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2421 2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2423 QualType Type = VD->getType(); 2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2425 } 2426 CGF.FinishFunction(); 2427 return Fn; 2428 } 2429 2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2431 const RegionCodeGenTy &SingleOpGen, 2432 SourceLocation Loc, 2433 ArrayRef<const Expr *> CopyprivateVars, 2434 ArrayRef<const Expr *> SrcExprs, 2435 ArrayRef<const Expr *> DstExprs, 2436 ArrayRef<const Expr *> AssignmentOps) { 2437 if (!CGF.HaveInsertPoint()) 2438 return; 2439 assert(CopyprivateVars.size() == SrcExprs.size() && 2440 CopyprivateVars.size() == DstExprs.size() && 2441 CopyprivateVars.size() == AssignmentOps.size()); 2442 ASTContext &C = CGM.getContext(); 2443 // int32 did_it = 0; 2444 // if(__kmpc_single(ident_t *, gtid)) { 2445 // SingleOpGen(); 2446 // __kmpc_end_single(ident_t *, gtid); 2447 // did_it = 1; 2448 // } 2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2450 // <copy_func>, did_it); 2451 2452 Address DidIt = Address::invalid(); 2453 if (!CopyprivateVars.empty()) { 2454 // int32 did_it = 0; 2455 QualType KmpInt32Ty = 2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2459 } 2460 // Prepare arguments and build a call to __kmpc_single 2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2462 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2463 CGM.getModule(), OMPRTL___kmpc_single), 2464 Args, 2465 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_end_single), 2467 Args, 2468 /*Conditional=*/true); 2469 SingleOpGen.setAction(Action); 2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2471 if (DidIt.isValid()) { 2472 // did_it = 1; 2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2474 } 2475 Action.Done(CGF); 2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2477 // <copy_func>, did_it); 2478 if (DidIt.isValid()) { 2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2480 QualType CopyprivateArrayTy = C.getConstantArrayType( 2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2482 /*IndexTypeQuals=*/0); 2483 // Create a list of all private variables for copyprivate. 2484 Address CopyprivateList = 2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2488 CGF.Builder.CreateStore( 2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2491 CGF.VoidPtrTy), 2492 Elem); 2493 } 2494 // Build function that copies private values from single region to all other 2495 // threads in the corresponding parallel region. 2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2500 Address CL = 2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2502 CGF.VoidPtrTy); 2503 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2504 llvm::Value *Args[] = { 2505 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2506 getThreadID(CGF, Loc), // i32 <gtid> 2507 BufSize, // size_t <buf_size> 2508 CL.getPointer(), // void *<copyprivate list> 2509 CpyFn, // void (*) (void *, void *) <copy_func> 2510 DidItVal // i32 did_it 2511 }; 2512 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2513 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2514 Args); 2515 } 2516 } 2517 2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2519 const RegionCodeGenTy &OrderedOpGen, 2520 SourceLocation Loc, bool IsThreads) { 2521 if (!CGF.HaveInsertPoint()) 2522 return; 2523 // __kmpc_ordered(ident_t *, gtid); 2524 // OrderedOpGen(); 2525 // __kmpc_end_ordered(ident_t *, gtid); 2526 // Prepare arguments and build a call to __kmpc_ordered 2527 if (IsThreads) { 2528 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2529 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2530 CGM.getModule(), OMPRTL___kmpc_ordered), 2531 Args, 2532 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2534 Args); 2535 OrderedOpGen.setAction(Action); 2536 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2537 return; 2538 } 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 } 2541 2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2543 unsigned Flags; 2544 if (Kind == OMPD_for) 2545 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2546 else if (Kind == OMPD_sections) 2547 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2548 else if (Kind == OMPD_single) 2549 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2550 else if (Kind == OMPD_barrier) 2551 Flags = OMP_IDENT_BARRIER_EXPL; 2552 else 2553 Flags = OMP_IDENT_BARRIER_IMPL; 2554 return Flags; 2555 } 2556 2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2558 CodeGenFunction &CGF, const OMPLoopDirective &S, 2559 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2560 // Check if the loop directive is actually a doacross loop directive. In this 2561 // case choose static, 1 schedule. 2562 if (llvm::any_of( 2563 S.getClausesOfKind<OMPOrderedClause>(), 2564 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2565 ScheduleKind = OMPC_SCHEDULE_static; 2566 // Chunk size is 1 in this case. 2567 llvm::APInt ChunkSize(32, 1); 2568 ChunkExpr = IntegerLiteral::Create( 2569 CGF.getContext(), ChunkSize, 2570 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2571 SourceLocation()); 2572 } 2573 } 2574 2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2576 OpenMPDirectiveKind Kind, bool EmitChecks, 2577 bool ForceSimpleCall) { 2578 // Check if we should use the OMPBuilder 2579 auto *OMPRegionInfo = 2580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2581 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2582 if (OMPBuilder) { 2583 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 2584 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2585 return; 2586 } 2587 2588 if (!CGF.HaveInsertPoint()) 2589 return; 2590 // Build call __kmpc_cancel_barrier(loc, thread_id); 2591 // Build call __kmpc_barrier(loc, thread_id); 2592 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2593 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2594 // thread_id); 2595 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2596 getThreadID(CGF, Loc)}; 2597 if (OMPRegionInfo) { 2598 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2599 llvm::Value *Result = CGF.EmitRuntimeCall( 2600 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2601 CGM.getModule(), OMPRTL___kmpc_cancel_barrier), 2602 Args); 2603 if (EmitChecks) { 2604 // if (__kmpc_cancel_barrier()) { 2605 // exit from construct; 2606 // } 2607 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2608 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2609 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2610 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2611 CGF.EmitBlock(ExitBB); 2612 // exit from construct; 2613 CodeGenFunction::JumpDest CancelDestination = 2614 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2615 CGF.EmitBranchThroughCleanup(CancelDestination); 2616 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2617 } 2618 return; 2619 } 2620 } 2621 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2622 CGM.getModule(), OMPRTL___kmpc_barrier), 2623 Args); 2624 } 2625 2626 /// Map the OpenMP loop schedule to the runtime enumeration. 2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2628 bool Chunked, bool Ordered) { 2629 switch (ScheduleKind) { 2630 case OMPC_SCHEDULE_static: 2631 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2632 : (Ordered ? OMP_ord_static : OMP_sch_static); 2633 case OMPC_SCHEDULE_dynamic: 2634 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2635 case OMPC_SCHEDULE_guided: 2636 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2637 case OMPC_SCHEDULE_runtime: 2638 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2639 case OMPC_SCHEDULE_auto: 2640 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2641 case OMPC_SCHEDULE_unknown: 2642 assert(!Chunked && "chunk was specified but schedule kind not known"); 2643 return Ordered ? OMP_ord_static : OMP_sch_static; 2644 } 2645 llvm_unreachable("Unexpected runtime schedule"); 2646 } 2647 2648 /// Map the OpenMP distribute schedule to the runtime enumeration. 2649 static OpenMPSchedType 2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2651 // only static is allowed for dist_schedule 2652 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2653 } 2654 2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2656 bool Chunked) const { 2657 OpenMPSchedType Schedule = 2658 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2659 return Schedule == OMP_sch_static; 2660 } 2661 2662 bool CGOpenMPRuntime::isStaticNonchunked( 2663 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2664 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2665 return Schedule == OMP_dist_sch_static; 2666 } 2667 2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2669 bool Chunked) const { 2670 OpenMPSchedType Schedule = 2671 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2672 return Schedule == OMP_sch_static_chunked; 2673 } 2674 2675 bool CGOpenMPRuntime::isStaticChunked( 2676 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2677 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2678 return Schedule == OMP_dist_sch_static_chunked; 2679 } 2680 2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2682 OpenMPSchedType Schedule = 2683 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2684 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2685 return Schedule != OMP_sch_static; 2686 } 2687 2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2689 OpenMPScheduleClauseModifier M1, 2690 OpenMPScheduleClauseModifier M2) { 2691 int Modifier = 0; 2692 switch (M1) { 2693 case OMPC_SCHEDULE_MODIFIER_monotonic: 2694 Modifier = OMP_sch_modifier_monotonic; 2695 break; 2696 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2697 Modifier = OMP_sch_modifier_nonmonotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_simd: 2700 if (Schedule == OMP_sch_static_chunked) 2701 Schedule = OMP_sch_static_balanced_chunked; 2702 break; 2703 case OMPC_SCHEDULE_MODIFIER_last: 2704 case OMPC_SCHEDULE_MODIFIER_unknown: 2705 break; 2706 } 2707 switch (M2) { 2708 case OMPC_SCHEDULE_MODIFIER_monotonic: 2709 Modifier = OMP_sch_modifier_monotonic; 2710 break; 2711 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2712 Modifier = OMP_sch_modifier_nonmonotonic; 2713 break; 2714 case OMPC_SCHEDULE_MODIFIER_simd: 2715 if (Schedule == OMP_sch_static_chunked) 2716 Schedule = OMP_sch_static_balanced_chunked; 2717 break; 2718 case OMPC_SCHEDULE_MODIFIER_last: 2719 case OMPC_SCHEDULE_MODIFIER_unknown: 2720 break; 2721 } 2722 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2723 // If the static schedule kind is specified or if the ordered clause is 2724 // specified, and if the nonmonotonic modifier is not specified, the effect is 2725 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2726 // modifier is specified, the effect is as if the nonmonotonic modifier is 2727 // specified. 2728 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2729 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2730 Schedule == OMP_sch_static_balanced_chunked || 2731 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2732 Schedule == OMP_dist_sch_static_chunked || 2733 Schedule == OMP_dist_sch_static)) 2734 Modifier = OMP_sch_modifier_nonmonotonic; 2735 } 2736 return Schedule | Modifier; 2737 } 2738 2739 void CGOpenMPRuntime::emitForDispatchInit( 2740 CodeGenFunction &CGF, SourceLocation Loc, 2741 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2742 bool Ordered, const DispatchRTInput &DispatchValues) { 2743 if (!CGF.HaveInsertPoint()) 2744 return; 2745 OpenMPSchedType Schedule = getRuntimeSchedule( 2746 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2747 assert(Ordered || 2748 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2749 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2750 Schedule != OMP_sch_static_balanced_chunked)); 2751 // Call __kmpc_dispatch_init( 2752 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2753 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2754 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2755 2756 // If the Chunk was not specified in the clause - use default value 1. 2757 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2758 : CGF.Builder.getIntN(IVSize, 1); 2759 llvm::Value *Args[] = { 2760 emitUpdateLocation(CGF, Loc), 2761 getThreadID(CGF, Loc), 2762 CGF.Builder.getInt32(addMonoNonMonoModifier( 2763 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2764 DispatchValues.LB, // Lower 2765 DispatchValues.UB, // Upper 2766 CGF.Builder.getIntN(IVSize, 1), // Stride 2767 Chunk // Chunk 2768 }; 2769 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2770 } 2771 2772 static void emitForStaticInitCall( 2773 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2774 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2775 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2776 const CGOpenMPRuntime::StaticRTInput &Values) { 2777 if (!CGF.HaveInsertPoint()) 2778 return; 2779 2780 assert(!Values.Ordered); 2781 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2782 Schedule == OMP_sch_static_balanced_chunked || 2783 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2784 Schedule == OMP_dist_sch_static || 2785 Schedule == OMP_dist_sch_static_chunked); 2786 2787 // Call __kmpc_for_static_init( 2788 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2789 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2790 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2791 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2792 llvm::Value *Chunk = Values.Chunk; 2793 if (Chunk == nullptr) { 2794 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2795 Schedule == OMP_dist_sch_static) && 2796 "expected static non-chunked schedule"); 2797 // If the Chunk was not specified in the clause - use default value 1. 2798 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2799 } else { 2800 assert((Schedule == OMP_sch_static_chunked || 2801 Schedule == OMP_sch_static_balanced_chunked || 2802 Schedule == OMP_ord_static_chunked || 2803 Schedule == OMP_dist_sch_static_chunked) && 2804 "expected static chunked schedule"); 2805 } 2806 llvm::Value *Args[] = { 2807 UpdateLocation, 2808 ThreadId, 2809 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2810 M2)), // Schedule type 2811 Values.IL.getPointer(), // &isLastIter 2812 Values.LB.getPointer(), // &LB 2813 Values.UB.getPointer(), // &UB 2814 Values.ST.getPointer(), // &Stride 2815 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2816 Chunk // Chunk 2817 }; 2818 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2819 } 2820 2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2822 SourceLocation Loc, 2823 OpenMPDirectiveKind DKind, 2824 const OpenMPScheduleTy &ScheduleKind, 2825 const StaticRTInput &Values) { 2826 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2827 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2828 assert(isOpenMPWorksharingDirective(DKind) && 2829 "Expected loop-based or sections-based directive."); 2830 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2831 isOpenMPLoopDirective(DKind) 2832 ? OMP_IDENT_WORK_LOOP 2833 : OMP_IDENT_WORK_SECTIONS); 2834 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2835 llvm::FunctionCallee StaticInitFunction = 2836 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2837 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2838 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2839 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2840 } 2841 2842 void CGOpenMPRuntime::emitDistributeStaticInit( 2843 CodeGenFunction &CGF, SourceLocation Loc, 2844 OpenMPDistScheduleClauseKind SchedKind, 2845 const CGOpenMPRuntime::StaticRTInput &Values) { 2846 OpenMPSchedType ScheduleNum = 2847 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2848 llvm::Value *UpdatedLocation = 2849 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2850 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2851 llvm::FunctionCallee StaticInitFunction = 2852 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2853 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2854 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2855 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2856 } 2857 2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2859 SourceLocation Loc, 2860 OpenMPDirectiveKind DKind) { 2861 if (!CGF.HaveInsertPoint()) 2862 return; 2863 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2864 llvm::Value *Args[] = { 2865 emitUpdateLocation(CGF, Loc, 2866 isOpenMPDistributeDirective(DKind) 2867 ? OMP_IDENT_WORK_DISTRIBUTE 2868 : isOpenMPLoopDirective(DKind) 2869 ? OMP_IDENT_WORK_LOOP 2870 : OMP_IDENT_WORK_SECTIONS), 2871 getThreadID(CGF, Loc)}; 2872 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2873 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2874 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2875 Args); 2876 } 2877 2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2879 SourceLocation Loc, 2880 unsigned IVSize, 2881 bool IVSigned) { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2885 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2886 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2887 } 2888 2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2890 SourceLocation Loc, unsigned IVSize, 2891 bool IVSigned, Address IL, 2892 Address LB, Address UB, 2893 Address ST) { 2894 // Call __kmpc_dispatch_next( 2895 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2896 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2897 // kmp_int[32|64] *p_stride); 2898 llvm::Value *Args[] = { 2899 emitUpdateLocation(CGF, Loc), 2900 getThreadID(CGF, Loc), 2901 IL.getPointer(), // &isLastIter 2902 LB.getPointer(), // &Lower 2903 UB.getPointer(), // &Upper 2904 ST.getPointer() // &Stride 2905 }; 2906 llvm::Value *Call = 2907 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2908 return CGF.EmitScalarConversion( 2909 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2910 CGF.getContext().BoolTy, Loc); 2911 } 2912 2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2914 llvm::Value *NumThreads, 2915 SourceLocation Loc) { 2916 if (!CGF.HaveInsertPoint()) 2917 return; 2918 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2919 llvm::Value *Args[] = { 2920 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2921 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2922 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2923 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2924 Args); 2925 } 2926 2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2928 ProcBindKind ProcBind, 2929 SourceLocation Loc) { 2930 if (!CGF.HaveInsertPoint()) 2931 return; 2932 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2933 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2934 llvm::Value *Args[] = { 2935 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2936 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2937 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2939 Args); 2940 } 2941 2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2943 SourceLocation Loc, llvm::AtomicOrdering AO) { 2944 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2945 if (OMPBuilder) { 2946 OMPBuilder->CreateFlush(CGF.Builder); 2947 } else { 2948 if (!CGF.HaveInsertPoint()) 2949 return; 2950 // Build call void __kmpc_flush(ident_t *loc) 2951 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2952 CGM.getModule(), OMPRTL___kmpc_flush), 2953 emitUpdateLocation(CGF, Loc)); 2954 } 2955 } 2956 2957 namespace { 2958 /// Indexes of fields for type kmp_task_t. 2959 enum KmpTaskTFields { 2960 /// List of shared variables. 2961 KmpTaskTShareds, 2962 /// Task routine. 2963 KmpTaskTRoutine, 2964 /// Partition id for the untied tasks. 2965 KmpTaskTPartId, 2966 /// Function with call of destructors for private variables. 2967 Data1, 2968 /// Task priority. 2969 Data2, 2970 /// (Taskloops only) Lower bound. 2971 KmpTaskTLowerBound, 2972 /// (Taskloops only) Upper bound. 2973 KmpTaskTUpperBound, 2974 /// (Taskloops only) Stride. 2975 KmpTaskTStride, 2976 /// (Taskloops only) Is last iteration flag. 2977 KmpTaskTLastIter, 2978 /// (Taskloops only) Reduction data. 2979 KmpTaskTReductions, 2980 }; 2981 } // anonymous namespace 2982 2983 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2984 return OffloadEntriesTargetRegion.empty() && 2985 OffloadEntriesDeviceGlobalVar.empty(); 2986 } 2987 2988 /// Initialize target region entry. 2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2990 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2991 StringRef ParentName, unsigned LineNum, 2992 unsigned Order) { 2993 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2994 "only required for the device " 2995 "code generation."); 2996 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2997 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2998 OMPTargetRegionEntryTargetRegion); 2999 ++OffloadingEntriesNum; 3000 } 3001 3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3003 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3004 StringRef ParentName, unsigned LineNum, 3005 llvm::Constant *Addr, llvm::Constant *ID, 3006 OMPTargetRegionEntryKind Flags) { 3007 // If we are emitting code for a target, the entry is already initialized, 3008 // only has to be registered. 3009 if (CGM.getLangOpts().OpenMPIsDevice) { 3010 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3011 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3012 DiagnosticsEngine::Error, 3013 "Unable to find target region on line '%0' in the device code."); 3014 CGM.getDiags().Report(DiagID) << LineNum; 3015 return; 3016 } 3017 auto &Entry = 3018 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3019 assert(Entry.isValid() && "Entry not initialized!"); 3020 Entry.setAddress(Addr); 3021 Entry.setID(ID); 3022 Entry.setFlags(Flags); 3023 } else { 3024 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3025 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3026 ++OffloadingEntriesNum; 3027 } 3028 } 3029 3030 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3031 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3032 unsigned LineNum) const { 3033 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3034 if (PerDevice == OffloadEntriesTargetRegion.end()) 3035 return false; 3036 auto PerFile = PerDevice->second.find(FileID); 3037 if (PerFile == PerDevice->second.end()) 3038 return false; 3039 auto PerParentName = PerFile->second.find(ParentName); 3040 if (PerParentName == PerFile->second.end()) 3041 return false; 3042 auto PerLine = PerParentName->second.find(LineNum); 3043 if (PerLine == PerParentName->second.end()) 3044 return false; 3045 // Fail if this entry is already registered. 3046 if (PerLine->second.getAddress() || PerLine->second.getID()) 3047 return false; 3048 return true; 3049 } 3050 3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3052 const OffloadTargetRegionEntryInfoActTy &Action) { 3053 // Scan all target region entries and perform the provided action. 3054 for (const auto &D : OffloadEntriesTargetRegion) 3055 for (const auto &F : D.second) 3056 for (const auto &P : F.second) 3057 for (const auto &L : P.second) 3058 Action(D.first, F.first, P.first(), L.first, L.second); 3059 } 3060 3061 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3062 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3063 OMPTargetGlobalVarEntryKind Flags, 3064 unsigned Order) { 3065 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3066 "only required for the device " 3067 "code generation."); 3068 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3069 ++OffloadingEntriesNum; 3070 } 3071 3072 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3073 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3074 CharUnits VarSize, 3075 OMPTargetGlobalVarEntryKind Flags, 3076 llvm::GlobalValue::LinkageTypes Linkage) { 3077 if (CGM.getLangOpts().OpenMPIsDevice) { 3078 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3079 assert(Entry.isValid() && Entry.getFlags() == Flags && 3080 "Entry not initialized!"); 3081 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3082 "Resetting with the new address."); 3083 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3084 if (Entry.getVarSize().isZero()) { 3085 Entry.setVarSize(VarSize); 3086 Entry.setLinkage(Linkage); 3087 } 3088 return; 3089 } 3090 Entry.setVarSize(VarSize); 3091 Entry.setLinkage(Linkage); 3092 Entry.setAddress(Addr); 3093 } else { 3094 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3095 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3096 assert(Entry.isValid() && Entry.getFlags() == Flags && 3097 "Entry not initialized!"); 3098 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3099 "Resetting with the new address."); 3100 if (Entry.getVarSize().isZero()) { 3101 Entry.setVarSize(VarSize); 3102 Entry.setLinkage(Linkage); 3103 } 3104 return; 3105 } 3106 OffloadEntriesDeviceGlobalVar.try_emplace( 3107 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3108 ++OffloadingEntriesNum; 3109 } 3110 } 3111 3112 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3113 actOnDeviceGlobalVarEntriesInfo( 3114 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3115 // Scan all target region entries and perform the provided action. 3116 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3117 Action(E.getKey(), E.getValue()); 3118 } 3119 3120 void CGOpenMPRuntime::createOffloadEntry( 3121 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3122 llvm::GlobalValue::LinkageTypes Linkage) { 3123 StringRef Name = Addr->getName(); 3124 llvm::Module &M = CGM.getModule(); 3125 llvm::LLVMContext &C = M.getContext(); 3126 3127 // Create constant string with the name. 3128 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3129 3130 std::string StringName = getName({"omp_offloading", "entry_name"}); 3131 auto *Str = new llvm::GlobalVariable( 3132 M, StrPtrInit->getType(), /*isConstant=*/true, 3133 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3134 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3135 3136 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3137 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3138 llvm::ConstantInt::get(CGM.SizeTy, Size), 3139 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3140 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3141 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3142 llvm::GlobalVariable *Entry = createGlobalStruct( 3143 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3144 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3145 3146 // The entry has to be created in the section the linker expects it to be. 3147 Entry->setSection("omp_offloading_entries"); 3148 } 3149 3150 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3151 // Emit the offloading entries and metadata so that the device codegen side 3152 // can easily figure out what to emit. The produced metadata looks like 3153 // this: 3154 // 3155 // !omp_offload.info = !{!1, ...} 3156 // 3157 // Right now we only generate metadata for function that contain target 3158 // regions. 3159 3160 // If we are in simd mode or there are no entries, we don't need to do 3161 // anything. 3162 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3163 return; 3164 3165 llvm::Module &M = CGM.getModule(); 3166 llvm::LLVMContext &C = M.getContext(); 3167 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3168 SourceLocation, StringRef>, 3169 16> 3170 OrderedEntries(OffloadEntriesInfoManager.size()); 3171 llvm::SmallVector<StringRef, 16> ParentFunctions( 3172 OffloadEntriesInfoManager.size()); 3173 3174 // Auxiliary methods to create metadata values and strings. 3175 auto &&GetMDInt = [this](unsigned V) { 3176 return llvm::ConstantAsMetadata::get( 3177 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3178 }; 3179 3180 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3181 3182 // Create the offloading info metadata node. 3183 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3184 3185 // Create function that emits metadata for each target region entry; 3186 auto &&TargetRegionMetadataEmitter = 3187 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3188 &GetMDString]( 3189 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3190 unsigned Line, 3191 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3192 // Generate metadata for target regions. Each entry of this metadata 3193 // contains: 3194 // - Entry 0 -> Kind of this type of metadata (0). 3195 // - Entry 1 -> Device ID of the file where the entry was identified. 3196 // - Entry 2 -> File ID of the file where the entry was identified. 3197 // - Entry 3 -> Mangled name of the function where the entry was 3198 // identified. 3199 // - Entry 4 -> Line in the file where the entry was identified. 3200 // - Entry 5 -> Order the entry was created. 3201 // The first element of the metadata node is the kind. 3202 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3203 GetMDInt(FileID), GetMDString(ParentName), 3204 GetMDInt(Line), GetMDInt(E.getOrder())}; 3205 3206 SourceLocation Loc; 3207 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3208 E = CGM.getContext().getSourceManager().fileinfo_end(); 3209 I != E; ++I) { 3210 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3211 I->getFirst()->getUniqueID().getFile() == FileID) { 3212 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3213 I->getFirst(), Line, 1); 3214 break; 3215 } 3216 } 3217 // Save this entry in the right position of the ordered entries array. 3218 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3219 ParentFunctions[E.getOrder()] = ParentName; 3220 3221 // Add metadata to the named metadata node. 3222 MD->addOperand(llvm::MDNode::get(C, Ops)); 3223 }; 3224 3225 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3226 TargetRegionMetadataEmitter); 3227 3228 // Create function that emits metadata for each device global variable entry; 3229 auto &&DeviceGlobalVarMetadataEmitter = 3230 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3231 MD](StringRef MangledName, 3232 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3233 &E) { 3234 // Generate metadata for global variables. Each entry of this metadata 3235 // contains: 3236 // - Entry 0 -> Kind of this type of metadata (1). 3237 // - Entry 1 -> Mangled name of the variable. 3238 // - Entry 2 -> Declare target kind. 3239 // - Entry 3 -> Order the entry was created. 3240 // The first element of the metadata node is the kind. 3241 llvm::Metadata *Ops[] = { 3242 GetMDInt(E.getKind()), GetMDString(MangledName), 3243 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3244 3245 // Save this entry in the right position of the ordered entries array. 3246 OrderedEntries[E.getOrder()] = 3247 std::make_tuple(&E, SourceLocation(), MangledName); 3248 3249 // Add metadata to the named metadata node. 3250 MD->addOperand(llvm::MDNode::get(C, Ops)); 3251 }; 3252 3253 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3254 DeviceGlobalVarMetadataEmitter); 3255 3256 for (const auto &E : OrderedEntries) { 3257 assert(std::get<0>(E) && "All ordered entries must exist!"); 3258 if (const auto *CE = 3259 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3260 std::get<0>(E))) { 3261 if (!CE->getID() || !CE->getAddress()) { 3262 // Do not blame the entry if the parent funtion is not emitted. 3263 StringRef FnName = ParentFunctions[CE->getOrder()]; 3264 if (!CGM.GetGlobalValue(FnName)) 3265 continue; 3266 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3267 DiagnosticsEngine::Error, 3268 "Offloading entry for target region in %0 is incorrect: either the " 3269 "address or the ID is invalid."); 3270 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3271 continue; 3272 } 3273 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3274 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3275 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3276 OffloadEntryInfoDeviceGlobalVar>( 3277 std::get<0>(E))) { 3278 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3279 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3280 CE->getFlags()); 3281 switch (Flags) { 3282 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3283 if (CGM.getLangOpts().OpenMPIsDevice && 3284 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3285 continue; 3286 if (!CE->getAddress()) { 3287 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3288 DiagnosticsEngine::Error, "Offloading entry for declare target " 3289 "variable %0 is incorrect: the " 3290 "address is invalid."); 3291 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3292 continue; 3293 } 3294 // The vaiable has no definition - no need to add the entry. 3295 if (CE->getVarSize().isZero()) 3296 continue; 3297 break; 3298 } 3299 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3300 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3301 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3302 "Declaret target link address is set."); 3303 if (CGM.getLangOpts().OpenMPIsDevice) 3304 continue; 3305 if (!CE->getAddress()) { 3306 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3307 DiagnosticsEngine::Error, 3308 "Offloading entry for declare target variable is incorrect: the " 3309 "address is invalid."); 3310 CGM.getDiags().Report(DiagID); 3311 continue; 3312 } 3313 break; 3314 } 3315 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3316 CE->getVarSize().getQuantity(), Flags, 3317 CE->getLinkage()); 3318 } else { 3319 llvm_unreachable("Unsupported entry kind."); 3320 } 3321 } 3322 } 3323 3324 /// Loads all the offload entries information from the host IR 3325 /// metadata. 3326 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3327 // If we are in target mode, load the metadata from the host IR. This code has 3328 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3329 3330 if (!CGM.getLangOpts().OpenMPIsDevice) 3331 return; 3332 3333 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3334 return; 3335 3336 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3337 if (auto EC = Buf.getError()) { 3338 CGM.getDiags().Report(diag::err_cannot_open_file) 3339 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3340 return; 3341 } 3342 3343 llvm::LLVMContext C; 3344 auto ME = expectedToErrorOrAndEmitErrors( 3345 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3346 3347 if (auto EC = ME.getError()) { 3348 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3349 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3350 CGM.getDiags().Report(DiagID) 3351 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3352 return; 3353 } 3354 3355 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3356 if (!MD) 3357 return; 3358 3359 for (llvm::MDNode *MN : MD->operands()) { 3360 auto &&GetMDInt = [MN](unsigned Idx) { 3361 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3362 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3363 }; 3364 3365 auto &&GetMDString = [MN](unsigned Idx) { 3366 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3367 return V->getString(); 3368 }; 3369 3370 switch (GetMDInt(0)) { 3371 default: 3372 llvm_unreachable("Unexpected metadata!"); 3373 break; 3374 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3375 OffloadingEntryInfoTargetRegion: 3376 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3377 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3378 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3379 /*Order=*/GetMDInt(5)); 3380 break; 3381 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3382 OffloadingEntryInfoDeviceGlobalVar: 3383 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3384 /*MangledName=*/GetMDString(1), 3385 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3386 /*Flags=*/GetMDInt(2)), 3387 /*Order=*/GetMDInt(3)); 3388 break; 3389 } 3390 } 3391 } 3392 3393 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3394 if (!KmpRoutineEntryPtrTy) { 3395 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3396 ASTContext &C = CGM.getContext(); 3397 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3398 FunctionProtoType::ExtProtoInfo EPI; 3399 KmpRoutineEntryPtrQTy = C.getPointerType( 3400 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3401 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3402 } 3403 } 3404 3405 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3406 // Make sure the type of the entry is already created. This is the type we 3407 // have to create: 3408 // struct __tgt_offload_entry{ 3409 // void *addr; // Pointer to the offload entry info. 3410 // // (function or global) 3411 // char *name; // Name of the function or global. 3412 // size_t size; // Size of the entry info (0 if it a function). 3413 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3414 // int32_t reserved; // Reserved, to use by the runtime library. 3415 // }; 3416 if (TgtOffloadEntryQTy.isNull()) { 3417 ASTContext &C = CGM.getContext(); 3418 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3419 RD->startDefinition(); 3420 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3421 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3422 addFieldToRecordDecl(C, RD, C.getSizeType()); 3423 addFieldToRecordDecl( 3424 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3425 addFieldToRecordDecl( 3426 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3427 RD->completeDefinition(); 3428 RD->addAttr(PackedAttr::CreateImplicit(C)); 3429 TgtOffloadEntryQTy = C.getRecordType(RD); 3430 } 3431 return TgtOffloadEntryQTy; 3432 } 3433 3434 namespace { 3435 struct PrivateHelpersTy { 3436 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3437 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3438 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3439 PrivateElemInit(PrivateElemInit) {} 3440 const Expr *OriginalRef = nullptr; 3441 const VarDecl *Original = nullptr; 3442 const VarDecl *PrivateCopy = nullptr; 3443 const VarDecl *PrivateElemInit = nullptr; 3444 }; 3445 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3446 } // anonymous namespace 3447 3448 static RecordDecl * 3449 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3450 if (!Privates.empty()) { 3451 ASTContext &C = CGM.getContext(); 3452 // Build struct .kmp_privates_t. { 3453 // /* private vars */ 3454 // }; 3455 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3456 RD->startDefinition(); 3457 for (const auto &Pair : Privates) { 3458 const VarDecl *VD = Pair.second.Original; 3459 QualType Type = VD->getType().getNonReferenceType(); 3460 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3461 if (VD->hasAttrs()) { 3462 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3463 E(VD->getAttrs().end()); 3464 I != E; ++I) 3465 FD->addAttr(*I); 3466 } 3467 } 3468 RD->completeDefinition(); 3469 return RD; 3470 } 3471 return nullptr; 3472 } 3473 3474 static RecordDecl * 3475 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3476 QualType KmpInt32Ty, 3477 QualType KmpRoutineEntryPointerQTy) { 3478 ASTContext &C = CGM.getContext(); 3479 // Build struct kmp_task_t { 3480 // void * shareds; 3481 // kmp_routine_entry_t routine; 3482 // kmp_int32 part_id; 3483 // kmp_cmplrdata_t data1; 3484 // kmp_cmplrdata_t data2; 3485 // For taskloops additional fields: 3486 // kmp_uint64 lb; 3487 // kmp_uint64 ub; 3488 // kmp_int64 st; 3489 // kmp_int32 liter; 3490 // void * reductions; 3491 // }; 3492 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3493 UD->startDefinition(); 3494 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3495 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3496 UD->completeDefinition(); 3497 QualType KmpCmplrdataTy = C.getRecordType(UD); 3498 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3499 RD->startDefinition(); 3500 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3501 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3502 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3503 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3504 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3505 if (isOpenMPTaskLoopDirective(Kind)) { 3506 QualType KmpUInt64Ty = 3507 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3508 QualType KmpInt64Ty = 3509 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3510 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3511 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3512 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3513 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3514 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3515 } 3516 RD->completeDefinition(); 3517 return RD; 3518 } 3519 3520 static RecordDecl * 3521 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3522 ArrayRef<PrivateDataTy> Privates) { 3523 ASTContext &C = CGM.getContext(); 3524 // Build struct kmp_task_t_with_privates { 3525 // kmp_task_t task_data; 3526 // .kmp_privates_t. privates; 3527 // }; 3528 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3529 RD->startDefinition(); 3530 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3531 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3532 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3533 RD->completeDefinition(); 3534 return RD; 3535 } 3536 3537 /// Emit a proxy function which accepts kmp_task_t as the second 3538 /// argument. 3539 /// \code 3540 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3541 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3542 /// For taskloops: 3543 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3544 /// tt->reductions, tt->shareds); 3545 /// return 0; 3546 /// } 3547 /// \endcode 3548 static llvm::Function * 3549 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3550 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3551 QualType KmpTaskTWithPrivatesPtrQTy, 3552 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3553 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3554 llvm::Value *TaskPrivatesMap) { 3555 ASTContext &C = CGM.getContext(); 3556 FunctionArgList Args; 3557 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3558 ImplicitParamDecl::Other); 3559 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3560 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3561 ImplicitParamDecl::Other); 3562 Args.push_back(&GtidArg); 3563 Args.push_back(&TaskTypeArg); 3564 const auto &TaskEntryFnInfo = 3565 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3566 llvm::FunctionType *TaskEntryTy = 3567 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3568 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3569 auto *TaskEntry = llvm::Function::Create( 3570 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3571 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3572 TaskEntry->setDoesNotRecurse(); 3573 CodeGenFunction CGF(CGM); 3574 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3575 Loc, Loc); 3576 3577 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3578 // tt, 3579 // For taskloops: 3580 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3581 // tt->task_data.shareds); 3582 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3583 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3584 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3585 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3586 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3587 const auto *KmpTaskTWithPrivatesQTyRD = 3588 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3589 LValue Base = 3590 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3591 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3592 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3593 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3594 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3595 3596 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3597 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3598 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3599 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3600 CGF.ConvertTypeForMem(SharedsPtrTy)); 3601 3602 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3603 llvm::Value *PrivatesParam; 3604 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3605 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3606 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3607 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3608 } else { 3609 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3610 } 3611 3612 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3613 TaskPrivatesMap, 3614 CGF.Builder 3615 .CreatePointerBitCastOrAddrSpaceCast( 3616 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3617 .getPointer()}; 3618 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3619 std::end(CommonArgs)); 3620 if (isOpenMPTaskLoopDirective(Kind)) { 3621 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3622 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3623 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3624 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3625 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3626 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3627 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3628 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3629 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3630 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3631 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3632 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3633 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3634 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3635 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3636 CallArgs.push_back(LBParam); 3637 CallArgs.push_back(UBParam); 3638 CallArgs.push_back(StParam); 3639 CallArgs.push_back(LIParam); 3640 CallArgs.push_back(RParam); 3641 } 3642 CallArgs.push_back(SharedsParam); 3643 3644 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3645 CallArgs); 3646 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3647 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3648 CGF.FinishFunction(); 3649 return TaskEntry; 3650 } 3651 3652 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3653 SourceLocation Loc, 3654 QualType KmpInt32Ty, 3655 QualType KmpTaskTWithPrivatesPtrQTy, 3656 QualType KmpTaskTWithPrivatesQTy) { 3657 ASTContext &C = CGM.getContext(); 3658 FunctionArgList Args; 3659 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3660 ImplicitParamDecl::Other); 3661 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3662 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3663 ImplicitParamDecl::Other); 3664 Args.push_back(&GtidArg); 3665 Args.push_back(&TaskTypeArg); 3666 const auto &DestructorFnInfo = 3667 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3668 llvm::FunctionType *DestructorFnTy = 3669 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3670 std::string Name = 3671 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3672 auto *DestructorFn = 3673 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3674 Name, &CGM.getModule()); 3675 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3676 DestructorFnInfo); 3677 DestructorFn->setDoesNotRecurse(); 3678 CodeGenFunction CGF(CGM); 3679 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3680 Args, Loc, Loc); 3681 3682 LValue Base = CGF.EmitLoadOfPointerLValue( 3683 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3684 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3685 const auto *KmpTaskTWithPrivatesQTyRD = 3686 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3687 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3688 Base = CGF.EmitLValueForField(Base, *FI); 3689 for (const auto *Field : 3690 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3691 if (QualType::DestructionKind DtorKind = 3692 Field->getType().isDestructedType()) { 3693 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3694 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3695 } 3696 } 3697 CGF.FinishFunction(); 3698 return DestructorFn; 3699 } 3700 3701 /// Emit a privates mapping function for correct handling of private and 3702 /// firstprivate variables. 3703 /// \code 3704 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3705 /// **noalias priv1,..., <tyn> **noalias privn) { 3706 /// *priv1 = &.privates.priv1; 3707 /// ...; 3708 /// *privn = &.privates.privn; 3709 /// } 3710 /// \endcode 3711 static llvm::Value * 3712 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3713 ArrayRef<const Expr *> PrivateVars, 3714 ArrayRef<const Expr *> FirstprivateVars, 3715 ArrayRef<const Expr *> LastprivateVars, 3716 QualType PrivatesQTy, 3717 ArrayRef<PrivateDataTy> Privates) { 3718 ASTContext &C = CGM.getContext(); 3719 FunctionArgList Args; 3720 ImplicitParamDecl TaskPrivatesArg( 3721 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3722 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3723 ImplicitParamDecl::Other); 3724 Args.push_back(&TaskPrivatesArg); 3725 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3726 unsigned Counter = 1; 3727 for (const Expr *E : PrivateVars) { 3728 Args.push_back(ImplicitParamDecl::Create( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(C.getPointerType(E->getType())) 3731 .withConst() 3732 .withRestrict(), 3733 ImplicitParamDecl::Other)); 3734 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3735 PrivateVarsPos[VD] = Counter; 3736 ++Counter; 3737 } 3738 for (const Expr *E : FirstprivateVars) { 3739 Args.push_back(ImplicitParamDecl::Create( 3740 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3741 C.getPointerType(C.getPointerType(E->getType())) 3742 .withConst() 3743 .withRestrict(), 3744 ImplicitParamDecl::Other)); 3745 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3746 PrivateVarsPos[VD] = Counter; 3747 ++Counter; 3748 } 3749 for (const Expr *E : LastprivateVars) { 3750 Args.push_back(ImplicitParamDecl::Create( 3751 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3752 C.getPointerType(C.getPointerType(E->getType())) 3753 .withConst() 3754 .withRestrict(), 3755 ImplicitParamDecl::Other)); 3756 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3757 PrivateVarsPos[VD] = Counter; 3758 ++Counter; 3759 } 3760 const auto &TaskPrivatesMapFnInfo = 3761 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3762 llvm::FunctionType *TaskPrivatesMapTy = 3763 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3764 std::string Name = 3765 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3766 auto *TaskPrivatesMap = llvm::Function::Create( 3767 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3768 &CGM.getModule()); 3769 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3770 TaskPrivatesMapFnInfo); 3771 if (CGM.getLangOpts().Optimize) { 3772 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3773 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3774 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3775 } 3776 CodeGenFunction CGF(CGM); 3777 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3778 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3779 3780 // *privi = &.privates.privi; 3781 LValue Base = CGF.EmitLoadOfPointerLValue( 3782 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3783 TaskPrivatesArg.getType()->castAs<PointerType>()); 3784 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3785 Counter = 0; 3786 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3787 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3788 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3789 LValue RefLVal = 3790 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3791 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3792 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3793 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3794 ++Counter; 3795 } 3796 CGF.FinishFunction(); 3797 return TaskPrivatesMap; 3798 } 3799 3800 /// Emit initialization for private variables in task-based directives. 3801 static void emitPrivatesInit(CodeGenFunction &CGF, 3802 const OMPExecutableDirective &D, 3803 Address KmpTaskSharedsPtr, LValue TDBase, 3804 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3805 QualType SharedsTy, QualType SharedsPtrTy, 3806 const OMPTaskDataTy &Data, 3807 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3808 ASTContext &C = CGF.getContext(); 3809 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3810 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3811 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3812 ? OMPD_taskloop 3813 : OMPD_task; 3814 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3815 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3816 LValue SrcBase; 3817 bool IsTargetTask = 3818 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3819 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3820 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3821 // PointersArray and SizesArray. The original variables for these arrays are 3822 // not captured and we get their addresses explicitly. 3823 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3824 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3825 SrcBase = CGF.MakeAddrLValue( 3826 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3827 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3828 SharedsTy); 3829 } 3830 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3831 for (const PrivateDataTy &Pair : Privates) { 3832 const VarDecl *VD = Pair.second.PrivateCopy; 3833 const Expr *Init = VD->getAnyInitializer(); 3834 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3835 !CGF.isTrivialInitializer(Init)))) { 3836 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3837 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3838 const VarDecl *OriginalVD = Pair.second.Original; 3839 // Check if the variable is the target-based BasePointersArray, 3840 // PointersArray or SizesArray. 3841 LValue SharedRefLValue; 3842 QualType Type = PrivateLValue.getType(); 3843 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3844 if (IsTargetTask && !SharedField) { 3845 assert(isa<ImplicitParamDecl>(OriginalVD) && 3846 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3847 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3848 ->getNumParams() == 0 && 3849 isa<TranslationUnitDecl>( 3850 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3851 ->getDeclContext()) && 3852 "Expected artificial target data variable."); 3853 SharedRefLValue = 3854 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3855 } else if (ForDup) { 3856 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3857 SharedRefLValue = CGF.MakeAddrLValue( 3858 Address(SharedRefLValue.getPointer(CGF), 3859 C.getDeclAlign(OriginalVD)), 3860 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3861 SharedRefLValue.getTBAAInfo()); 3862 } else if (CGF.LambdaCaptureFields.count( 3863 Pair.second.Original->getCanonicalDecl()) > 0 || 3864 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3865 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3866 } else { 3867 // Processing for implicitly captured variables. 3868 InlinedOpenMPRegionRAII Region( 3869 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3870 /*HasCancel=*/false); 3871 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3872 } 3873 if (Type->isArrayType()) { 3874 // Initialize firstprivate array. 3875 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3876 // Perform simple memcpy. 3877 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3878 } else { 3879 // Initialize firstprivate array using element-by-element 3880 // initialization. 3881 CGF.EmitOMPAggregateAssign( 3882 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3883 Type, 3884 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3885 Address SrcElement) { 3886 // Clean up any temporaries needed by the initialization. 3887 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3888 InitScope.addPrivate( 3889 Elem, [SrcElement]() -> Address { return SrcElement; }); 3890 (void)InitScope.Privatize(); 3891 // Emit initialization for single element. 3892 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3893 CGF, &CapturesInfo); 3894 CGF.EmitAnyExprToMem(Init, DestElement, 3895 Init->getType().getQualifiers(), 3896 /*IsInitializer=*/false); 3897 }); 3898 } 3899 } else { 3900 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3901 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3902 return SharedRefLValue.getAddress(CGF); 3903 }); 3904 (void)InitScope.Privatize(); 3905 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3906 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3907 /*capturedByInit=*/false); 3908 } 3909 } else { 3910 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3911 } 3912 } 3913 ++FI; 3914 } 3915 } 3916 3917 /// Check if duplication function is required for taskloops. 3918 static bool checkInitIsRequired(CodeGenFunction &CGF, 3919 ArrayRef<PrivateDataTy> Privates) { 3920 bool InitRequired = false; 3921 for (const PrivateDataTy &Pair : Privates) { 3922 const VarDecl *VD = Pair.second.PrivateCopy; 3923 const Expr *Init = VD->getAnyInitializer(); 3924 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3925 !CGF.isTrivialInitializer(Init)); 3926 if (InitRequired) 3927 break; 3928 } 3929 return InitRequired; 3930 } 3931 3932 3933 /// Emit task_dup function (for initialization of 3934 /// private/firstprivate/lastprivate vars and last_iter flag) 3935 /// \code 3936 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3937 /// lastpriv) { 3938 /// // setup lastprivate flag 3939 /// task_dst->last = lastpriv; 3940 /// // could be constructor calls here... 3941 /// } 3942 /// \endcode 3943 static llvm::Value * 3944 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3945 const OMPExecutableDirective &D, 3946 QualType KmpTaskTWithPrivatesPtrQTy, 3947 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3948 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3949 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3950 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3951 ASTContext &C = CGM.getContext(); 3952 FunctionArgList Args; 3953 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3954 KmpTaskTWithPrivatesPtrQTy, 3955 ImplicitParamDecl::Other); 3956 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3957 KmpTaskTWithPrivatesPtrQTy, 3958 ImplicitParamDecl::Other); 3959 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3960 ImplicitParamDecl::Other); 3961 Args.push_back(&DstArg); 3962 Args.push_back(&SrcArg); 3963 Args.push_back(&LastprivArg); 3964 const auto &TaskDupFnInfo = 3965 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3966 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3967 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3968 auto *TaskDup = llvm::Function::Create( 3969 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3970 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3971 TaskDup->setDoesNotRecurse(); 3972 CodeGenFunction CGF(CGM); 3973 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3974 Loc); 3975 3976 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3977 CGF.GetAddrOfLocalVar(&DstArg), 3978 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3979 // task_dst->liter = lastpriv; 3980 if (WithLastIter) { 3981 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3982 LValue Base = CGF.EmitLValueForField( 3983 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3984 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3985 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3986 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3987 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3988 } 3989 3990 // Emit initial values for private copies (if any). 3991 assert(!Privates.empty()); 3992 Address KmpTaskSharedsPtr = Address::invalid(); 3993 if (!Data.FirstprivateVars.empty()) { 3994 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3995 CGF.GetAddrOfLocalVar(&SrcArg), 3996 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3997 LValue Base = CGF.EmitLValueForField( 3998 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3999 KmpTaskSharedsPtr = Address( 4000 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4001 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4002 KmpTaskTShareds)), 4003 Loc), 4004 CGM.getNaturalTypeAlignment(SharedsTy)); 4005 } 4006 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4007 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4008 CGF.FinishFunction(); 4009 return TaskDup; 4010 } 4011 4012 /// Checks if destructor function is required to be generated. 4013 /// \return true if cleanups are required, false otherwise. 4014 static bool 4015 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4016 bool NeedsCleanup = false; 4017 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4018 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4019 for (const FieldDecl *FD : PrivateRD->fields()) { 4020 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4021 if (NeedsCleanup) 4022 break; 4023 } 4024 return NeedsCleanup; 4025 } 4026 4027 namespace { 4028 /// Loop generator for OpenMP iterator expression. 4029 class OMPIteratorGeneratorScope final 4030 : public CodeGenFunction::OMPPrivateScope { 4031 CodeGenFunction &CGF; 4032 const OMPIteratorExpr *E = nullptr; 4033 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4034 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4035 OMPIteratorGeneratorScope() = delete; 4036 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4037 4038 public: 4039 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4040 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4041 if (!E) 4042 return; 4043 SmallVector<llvm::Value *, 4> Uppers; 4044 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4045 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4046 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4047 addPrivate(VD, [&CGF, VD]() { 4048 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4049 }); 4050 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4051 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4052 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4053 "counter.addr"); 4054 }); 4055 } 4056 Privatize(); 4057 4058 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4059 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4060 LValue CLVal = 4061 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4062 HelperData.CounterVD->getType()); 4063 // Counter = 0; 4064 CGF.EmitStoreOfScalar( 4065 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4066 CLVal); 4067 CodeGenFunction::JumpDest &ContDest = 4068 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4069 CodeGenFunction::JumpDest &ExitDest = 4070 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4071 // N = <number-of_iterations>; 4072 llvm::Value *N = Uppers[I]; 4073 // cont: 4074 // if (Counter < N) goto body; else goto exit; 4075 CGF.EmitBlock(ContDest.getBlock()); 4076 auto *CVal = 4077 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4078 llvm::Value *Cmp = 4079 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4080 ? CGF.Builder.CreateICmpSLT(CVal, N) 4081 : CGF.Builder.CreateICmpULT(CVal, N); 4082 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4083 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4084 // body: 4085 CGF.EmitBlock(BodyBB); 4086 // Iteri = Begini + Counter * Stepi; 4087 CGF.EmitIgnoredExpr(HelperData.Update); 4088 } 4089 } 4090 ~OMPIteratorGeneratorScope() { 4091 if (!E) 4092 return; 4093 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4094 // Counter = Counter + 1; 4095 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4096 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4097 // goto cont; 4098 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4099 // exit: 4100 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4101 } 4102 } 4103 }; 4104 } // namespace 4105 4106 static std::pair<llvm::Value *, llvm::Value *> 4107 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4108 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4109 llvm::Value *Addr; 4110 if (OASE) { 4111 const Expr *Base = OASE->getBase(); 4112 Addr = CGF.EmitScalarExpr(Base); 4113 } else { 4114 Addr = CGF.EmitLValue(E).getPointer(CGF); 4115 } 4116 llvm::Value *SizeVal; 4117 QualType Ty = E->getType(); 4118 if (OASE) { 4119 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4120 for (const Expr *SE : OASE->getDimensions()) { 4121 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4122 Sz = CGF.EmitScalarConversion( 4123 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4124 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4125 } 4126 } else if (const auto *ASE = 4127 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4128 LValue UpAddrLVal = 4129 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4130 llvm::Value *UpAddr = 4131 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4132 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4133 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4134 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4135 } else { 4136 SizeVal = CGF.getTypeSize(Ty); 4137 } 4138 return std::make_pair(Addr, SizeVal); 4139 } 4140 4141 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4142 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4143 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4144 if (KmpTaskAffinityInfoTy.isNull()) { 4145 RecordDecl *KmpAffinityInfoRD = 4146 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4147 KmpAffinityInfoRD->startDefinition(); 4148 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4149 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4150 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4151 KmpAffinityInfoRD->completeDefinition(); 4152 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4153 } 4154 } 4155 4156 CGOpenMPRuntime::TaskResultTy 4157 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4158 const OMPExecutableDirective &D, 4159 llvm::Function *TaskFunction, QualType SharedsTy, 4160 Address Shareds, const OMPTaskDataTy &Data) { 4161 ASTContext &C = CGM.getContext(); 4162 llvm::SmallVector<PrivateDataTy, 4> Privates; 4163 // Aggregate privates and sort them by the alignment. 4164 const auto *I = Data.PrivateCopies.begin(); 4165 for (const Expr *E : Data.PrivateVars) { 4166 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4167 Privates.emplace_back( 4168 C.getDeclAlign(VD), 4169 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4170 /*PrivateElemInit=*/nullptr)); 4171 ++I; 4172 } 4173 I = Data.FirstprivateCopies.begin(); 4174 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4175 for (const Expr *E : Data.FirstprivateVars) { 4176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4177 Privates.emplace_back( 4178 C.getDeclAlign(VD), 4179 PrivateHelpersTy( 4180 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4181 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4182 ++I; 4183 ++IElemInitRef; 4184 } 4185 I = Data.LastprivateCopies.begin(); 4186 for (const Expr *E : Data.LastprivateVars) { 4187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4188 Privates.emplace_back( 4189 C.getDeclAlign(VD), 4190 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4191 /*PrivateElemInit=*/nullptr)); 4192 ++I; 4193 } 4194 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4195 return L.first > R.first; 4196 }); 4197 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4198 // Build type kmp_routine_entry_t (if not built yet). 4199 emitKmpRoutineEntryT(KmpInt32Ty); 4200 // Build type kmp_task_t (if not built yet). 4201 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4202 if (SavedKmpTaskloopTQTy.isNull()) { 4203 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4204 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4205 } 4206 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4207 } else { 4208 assert((D.getDirectiveKind() == OMPD_task || 4209 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4210 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4211 "Expected taskloop, task or target directive"); 4212 if (SavedKmpTaskTQTy.isNull()) { 4213 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4214 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4215 } 4216 KmpTaskTQTy = SavedKmpTaskTQTy; 4217 } 4218 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4219 // Build particular struct kmp_task_t for the given task. 4220 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4221 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4222 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4223 QualType KmpTaskTWithPrivatesPtrQTy = 4224 C.getPointerType(KmpTaskTWithPrivatesQTy); 4225 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4226 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4227 KmpTaskTWithPrivatesTy->getPointerTo(); 4228 llvm::Value *KmpTaskTWithPrivatesTySize = 4229 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4230 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4231 4232 // Emit initial values for private copies (if any). 4233 llvm::Value *TaskPrivatesMap = nullptr; 4234 llvm::Type *TaskPrivatesMapTy = 4235 std::next(TaskFunction->arg_begin(), 3)->getType(); 4236 if (!Privates.empty()) { 4237 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4238 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4239 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4240 FI->getType(), Privates); 4241 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4242 TaskPrivatesMap, TaskPrivatesMapTy); 4243 } else { 4244 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4245 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4246 } 4247 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4248 // kmp_task_t *tt); 4249 llvm::Function *TaskEntry = emitProxyTaskFunction( 4250 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4251 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4252 TaskPrivatesMap); 4253 4254 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4255 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4256 // kmp_routine_entry_t *task_entry); 4257 // Task flags. Format is taken from 4258 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4259 // description of kmp_tasking_flags struct. 4260 enum { 4261 TiedFlag = 0x1, 4262 FinalFlag = 0x2, 4263 DestructorsFlag = 0x8, 4264 PriorityFlag = 0x20, 4265 DetachableFlag = 0x40, 4266 }; 4267 unsigned Flags = Data.Tied ? TiedFlag : 0; 4268 bool NeedsCleanup = false; 4269 if (!Privates.empty()) { 4270 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4271 if (NeedsCleanup) 4272 Flags = Flags | DestructorsFlag; 4273 } 4274 if (Data.Priority.getInt()) 4275 Flags = Flags | PriorityFlag; 4276 if (D.hasClausesOfKind<OMPDetachClause>()) 4277 Flags = Flags | DetachableFlag; 4278 llvm::Value *TaskFlags = 4279 Data.Final.getPointer() 4280 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4281 CGF.Builder.getInt32(FinalFlag), 4282 CGF.Builder.getInt32(/*C=*/0)) 4283 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4284 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4285 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4286 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4287 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4288 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4289 TaskEntry, KmpRoutineEntryPtrTy)}; 4290 llvm::Value *NewTask; 4291 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4292 // Check if we have any device clause associated with the directive. 4293 const Expr *Device = nullptr; 4294 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4295 Device = C->getDevice(); 4296 // Emit device ID if any otherwise use default value. 4297 llvm::Value *DeviceID; 4298 if (Device) 4299 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4300 CGF.Int64Ty, /*isSigned=*/true); 4301 else 4302 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4303 AllocArgs.push_back(DeviceID); 4304 NewTask = CGF.EmitRuntimeCall( 4305 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4306 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4307 AllocArgs); 4308 } else { 4309 NewTask = 4310 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4311 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4312 AllocArgs); 4313 } 4314 // Emit detach clause initialization. 4315 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4316 // task_descriptor); 4317 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4318 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4319 LValue EvtLVal = CGF.EmitLValue(Evt); 4320 4321 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4322 // int gtid, kmp_task_t *task); 4323 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4324 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4325 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4326 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4327 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4328 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4329 {Loc, Tid, NewTask}); 4330 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4331 Evt->getExprLoc()); 4332 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4333 } 4334 // Process affinity clauses. 4335 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4336 // Process list of affinity data. 4337 ASTContext &C = CGM.getContext(); 4338 Address AffinitiesArray = Address::invalid(); 4339 // Calculate number of elements to form the array of affinity data. 4340 llvm::Value *NumOfElements = nullptr; 4341 unsigned NumAffinities = 0; 4342 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4343 if (const Expr *Modifier = C->getModifier()) { 4344 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4345 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4346 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4347 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4348 NumOfElements = 4349 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4350 } 4351 } else { 4352 NumAffinities += C->varlist_size(); 4353 } 4354 } 4355 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4356 // Fields ids in kmp_task_affinity_info record. 4357 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4358 4359 QualType KmpTaskAffinityInfoArrayTy; 4360 if (NumOfElements) { 4361 NumOfElements = CGF.Builder.CreateNUWAdd( 4362 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4363 OpaqueValueExpr OVE( 4364 Loc, 4365 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4366 VK_RValue); 4367 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4368 RValue::get(NumOfElements)); 4369 KmpTaskAffinityInfoArrayTy = 4370 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4371 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4372 // Properly emit variable-sized array. 4373 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4374 ImplicitParamDecl::Other); 4375 CGF.EmitVarDecl(*PD); 4376 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4377 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4378 /*isSigned=*/false); 4379 } else { 4380 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4381 KmpTaskAffinityInfoTy, 4382 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4383 ArrayType::Normal, /*IndexTypeQuals=*/0); 4384 AffinitiesArray = 4385 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4386 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4387 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4388 /*isSigned=*/false); 4389 } 4390 4391 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4392 // Fill array by elements without iterators. 4393 unsigned Pos = 0; 4394 bool HasIterator = false; 4395 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4396 if (C->getModifier()) { 4397 HasIterator = true; 4398 continue; 4399 } 4400 for (const Expr *E : C->varlists()) { 4401 llvm::Value *Addr; 4402 llvm::Value *Size; 4403 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4404 LValue Base = 4405 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4406 KmpTaskAffinityInfoTy); 4407 // affs[i].base_addr = &<Affinities[i].second>; 4408 LValue BaseAddrLVal = CGF.EmitLValueForField( 4409 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4410 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4411 BaseAddrLVal); 4412 // affs[i].len = sizeof(<Affinities[i].second>); 4413 LValue LenLVal = CGF.EmitLValueForField( 4414 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4415 CGF.EmitStoreOfScalar(Size, LenLVal); 4416 ++Pos; 4417 } 4418 } 4419 LValue PosLVal; 4420 if (HasIterator) { 4421 PosLVal = CGF.MakeAddrLValue( 4422 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4423 C.getSizeType()); 4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4425 } 4426 // Process elements with iterators. 4427 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4428 const Expr *Modifier = C->getModifier(); 4429 if (!Modifier) 4430 continue; 4431 OMPIteratorGeneratorScope IteratorScope( 4432 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4433 for (const Expr *E : C->varlists()) { 4434 llvm::Value *Addr; 4435 llvm::Value *Size; 4436 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4437 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4438 LValue Base = CGF.MakeAddrLValue( 4439 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4440 AffinitiesArray.getAlignment()), 4441 KmpTaskAffinityInfoTy); 4442 // affs[i].base_addr = &<Affinities[i].second>; 4443 LValue BaseAddrLVal = CGF.EmitLValueForField( 4444 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4445 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4446 BaseAddrLVal); 4447 // affs[i].len = sizeof(<Affinities[i].second>); 4448 LValue LenLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4450 CGF.EmitStoreOfScalar(Size, LenLVal); 4451 Idx = CGF.Builder.CreateNUWAdd( 4452 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4453 CGF.EmitStoreOfScalar(Idx, PosLVal); 4454 } 4455 } 4456 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4457 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4458 // naffins, kmp_task_affinity_info_t *affin_list); 4459 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4460 llvm::Value *GTid = getThreadID(CGF, Loc); 4461 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4462 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4463 // FIXME: Emit the function and ignore its result for now unless the 4464 // runtime function is properly implemented. 4465 (void)CGF.EmitRuntimeCall( 4466 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4467 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4468 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4469 } 4470 llvm::Value *NewTaskNewTaskTTy = 4471 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4472 NewTask, KmpTaskTWithPrivatesPtrTy); 4473 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4474 KmpTaskTWithPrivatesQTy); 4475 LValue TDBase = 4476 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4477 // Fill the data in the resulting kmp_task_t record. 4478 // Copy shareds if there are any. 4479 Address KmpTaskSharedsPtr = Address::invalid(); 4480 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4481 KmpTaskSharedsPtr = 4482 Address(CGF.EmitLoadOfScalar( 4483 CGF.EmitLValueForField( 4484 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4485 KmpTaskTShareds)), 4486 Loc), 4487 CGM.getNaturalTypeAlignment(SharedsTy)); 4488 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4489 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4490 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4491 } 4492 // Emit initial values for private copies (if any). 4493 TaskResultTy Result; 4494 if (!Privates.empty()) { 4495 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4496 SharedsTy, SharedsPtrTy, Data, Privates, 4497 /*ForDup=*/false); 4498 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4499 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4500 Result.TaskDupFn = emitTaskDupFunction( 4501 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4502 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4503 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4504 } 4505 } 4506 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4507 enum { Priority = 0, Destructors = 1 }; 4508 // Provide pointer to function with destructors for privates. 4509 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4510 const RecordDecl *KmpCmplrdataUD = 4511 (*FI)->getType()->getAsUnionType()->getDecl(); 4512 if (NeedsCleanup) { 4513 llvm::Value *DestructorFn = emitDestructorsFunction( 4514 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4515 KmpTaskTWithPrivatesQTy); 4516 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4517 LValue DestructorsLV = CGF.EmitLValueForField( 4518 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4519 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4520 DestructorFn, KmpRoutineEntryPtrTy), 4521 DestructorsLV); 4522 } 4523 // Set priority. 4524 if (Data.Priority.getInt()) { 4525 LValue Data2LV = CGF.EmitLValueForField( 4526 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4527 LValue PriorityLV = CGF.EmitLValueForField( 4528 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4529 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4530 } 4531 Result.NewTask = NewTask; 4532 Result.TaskEntry = TaskEntry; 4533 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4534 Result.TDBase = TDBase; 4535 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4536 return Result; 4537 } 4538 4539 namespace { 4540 /// Dependence kind for RTL. 4541 enum RTLDependenceKindTy { 4542 DepIn = 0x01, 4543 DepInOut = 0x3, 4544 DepMutexInOutSet = 0x4 4545 }; 4546 /// Fields ids in kmp_depend_info record. 4547 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4548 } // namespace 4549 4550 /// Translates internal dependency kind into the runtime kind. 4551 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4552 RTLDependenceKindTy DepKind; 4553 switch (K) { 4554 case OMPC_DEPEND_in: 4555 DepKind = DepIn; 4556 break; 4557 // Out and InOut dependencies must use the same code. 4558 case OMPC_DEPEND_out: 4559 case OMPC_DEPEND_inout: 4560 DepKind = DepInOut; 4561 break; 4562 case OMPC_DEPEND_mutexinoutset: 4563 DepKind = DepMutexInOutSet; 4564 break; 4565 case OMPC_DEPEND_source: 4566 case OMPC_DEPEND_sink: 4567 case OMPC_DEPEND_depobj: 4568 case OMPC_DEPEND_unknown: 4569 llvm_unreachable("Unknown task dependence type"); 4570 } 4571 return DepKind; 4572 } 4573 4574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4575 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4576 QualType &FlagsTy) { 4577 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4578 if (KmpDependInfoTy.isNull()) { 4579 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4580 KmpDependInfoRD->startDefinition(); 4581 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4582 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4583 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4584 KmpDependInfoRD->completeDefinition(); 4585 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4586 } 4587 } 4588 4589 std::pair<llvm::Value *, LValue> 4590 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4591 SourceLocation Loc) { 4592 ASTContext &C = CGM.getContext(); 4593 QualType FlagsTy; 4594 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4595 RecordDecl *KmpDependInfoRD = 4596 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4597 LValue Base = CGF.EmitLoadOfPointerLValue( 4598 DepobjLVal.getAddress(CGF), 4599 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4600 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4601 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4602 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4603 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4604 Base.getTBAAInfo()); 4605 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4606 Addr.getPointer(), 4607 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4608 LValue NumDepsBase = CGF.MakeAddrLValue( 4609 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4610 Base.getBaseInfo(), Base.getTBAAInfo()); 4611 // NumDeps = deps[i].base_addr; 4612 LValue BaseAddrLVal = CGF.EmitLValueForField( 4613 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4614 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4615 return std::make_pair(NumDeps, Base); 4616 } 4617 4618 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4619 llvm::PointerUnion<unsigned *, LValue *> Pos, 4620 const OMPTaskDataTy::DependData &Data, 4621 Address DependenciesArray) { 4622 CodeGenModule &CGM = CGF.CGM; 4623 ASTContext &C = CGM.getContext(); 4624 QualType FlagsTy; 4625 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4626 RecordDecl *KmpDependInfoRD = 4627 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4628 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4629 4630 OMPIteratorGeneratorScope IteratorScope( 4631 CGF, cast_or_null<OMPIteratorExpr>( 4632 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4633 : nullptr)); 4634 for (const Expr *E : Data.DepExprs) { 4635 llvm::Value *Addr; 4636 llvm::Value *Size; 4637 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4638 LValue Base; 4639 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4640 Base = CGF.MakeAddrLValue( 4641 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4642 } else { 4643 LValue &PosLVal = *Pos.get<LValue *>(); 4644 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4645 Base = CGF.MakeAddrLValue( 4646 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4647 DependenciesArray.getAlignment()), 4648 KmpDependInfoTy); 4649 } 4650 // deps[i].base_addr = &<Dependencies[i].second>; 4651 LValue BaseAddrLVal = CGF.EmitLValueForField( 4652 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4653 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4654 BaseAddrLVal); 4655 // deps[i].len = sizeof(<Dependencies[i].second>); 4656 LValue LenLVal = CGF.EmitLValueForField( 4657 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4658 CGF.EmitStoreOfScalar(Size, LenLVal); 4659 // deps[i].flags = <Dependencies[i].first>; 4660 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4661 LValue FlagsLVal = CGF.EmitLValueForField( 4662 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4663 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4664 FlagsLVal); 4665 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4666 ++(*P); 4667 } else { 4668 LValue &PosLVal = *Pos.get<LValue *>(); 4669 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4670 Idx = CGF.Builder.CreateNUWAdd(Idx, 4671 llvm::ConstantInt::get(Idx->getType(), 1)); 4672 CGF.EmitStoreOfScalar(Idx, PosLVal); 4673 } 4674 } 4675 } 4676 4677 static SmallVector<llvm::Value *, 4> 4678 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4679 const OMPTaskDataTy::DependData &Data) { 4680 assert(Data.DepKind == OMPC_DEPEND_depobj && 4681 "Expected depobj dependecy kind."); 4682 SmallVector<llvm::Value *, 4> Sizes; 4683 SmallVector<LValue, 4> SizeLVals; 4684 ASTContext &C = CGF.getContext(); 4685 QualType FlagsTy; 4686 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4687 RecordDecl *KmpDependInfoRD = 4688 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4689 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4690 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4691 { 4692 OMPIteratorGeneratorScope IteratorScope( 4693 CGF, cast_or_null<OMPIteratorExpr>( 4694 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4695 : nullptr)); 4696 for (const Expr *E : Data.DepExprs) { 4697 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4698 LValue Base = CGF.EmitLoadOfPointerLValue( 4699 DepobjLVal.getAddress(CGF), 4700 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4701 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4702 Base.getAddress(CGF), KmpDependInfoPtrT); 4703 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4704 Base.getTBAAInfo()); 4705 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4706 Addr.getPointer(), 4707 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4708 LValue NumDepsBase = CGF.MakeAddrLValue( 4709 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4710 Base.getBaseInfo(), Base.getTBAAInfo()); 4711 // NumDeps = deps[i].base_addr; 4712 LValue BaseAddrLVal = CGF.EmitLValueForField( 4713 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4714 llvm::Value *NumDeps = 4715 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4716 LValue NumLVal = CGF.MakeAddrLValue( 4717 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4718 C.getUIntPtrType()); 4719 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4720 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4721 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4722 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4723 CGF.EmitStoreOfScalar(Add, NumLVal); 4724 SizeLVals.push_back(NumLVal); 4725 } 4726 } 4727 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4728 llvm::Value *Size = 4729 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4730 Sizes.push_back(Size); 4731 } 4732 return Sizes; 4733 } 4734 4735 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4736 LValue PosLVal, 4737 const OMPTaskDataTy::DependData &Data, 4738 Address DependenciesArray) { 4739 assert(Data.DepKind == OMPC_DEPEND_depobj && 4740 "Expected depobj dependecy kind."); 4741 ASTContext &C = CGF.getContext(); 4742 QualType FlagsTy; 4743 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4744 RecordDecl *KmpDependInfoRD = 4745 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4746 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4747 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4748 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4749 { 4750 OMPIteratorGeneratorScope IteratorScope( 4751 CGF, cast_or_null<OMPIteratorExpr>( 4752 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4753 : nullptr)); 4754 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4755 const Expr *E = Data.DepExprs[I]; 4756 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4757 LValue Base = CGF.EmitLoadOfPointerLValue( 4758 DepobjLVal.getAddress(CGF), 4759 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4760 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4761 Base.getAddress(CGF), KmpDependInfoPtrT); 4762 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4763 Base.getTBAAInfo()); 4764 4765 // Get number of elements in a single depobj. 4766 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4767 Addr.getPointer(), 4768 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4769 LValue NumDepsBase = CGF.MakeAddrLValue( 4770 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4771 Base.getBaseInfo(), Base.getTBAAInfo()); 4772 // NumDeps = deps[i].base_addr; 4773 LValue BaseAddrLVal = CGF.EmitLValueForField( 4774 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4775 llvm::Value *NumDeps = 4776 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4777 4778 // memcopy dependency data. 4779 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4780 ElSize, 4781 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4782 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4783 Address DepAddr = 4784 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4785 DependenciesArray.getAlignment()); 4786 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4787 4788 // Increase pos. 4789 // pos += size; 4790 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4791 CGF.EmitStoreOfScalar(Add, PosLVal); 4792 } 4793 } 4794 } 4795 4796 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4797 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4798 SourceLocation Loc) { 4799 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4800 return D.DepExprs.empty(); 4801 })) 4802 return std::make_pair(nullptr, Address::invalid()); 4803 // Process list of dependencies. 4804 ASTContext &C = CGM.getContext(); 4805 Address DependenciesArray = Address::invalid(); 4806 llvm::Value *NumOfElements = nullptr; 4807 unsigned NumDependencies = std::accumulate( 4808 Dependencies.begin(), Dependencies.end(), 0, 4809 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4810 return D.DepKind == OMPC_DEPEND_depobj 4811 ? V 4812 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4813 }); 4814 QualType FlagsTy; 4815 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4816 bool HasDepobjDeps = false; 4817 bool HasRegularWithIterators = false; 4818 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4819 llvm::Value *NumOfRegularWithIterators = 4820 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4821 // Calculate number of depobj dependecies and regular deps with the iterators. 4822 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4823 if (D.DepKind == OMPC_DEPEND_depobj) { 4824 SmallVector<llvm::Value *, 4> Sizes = 4825 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4826 for (llvm::Value *Size : Sizes) { 4827 NumOfDepobjElements = 4828 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4829 } 4830 HasDepobjDeps = true; 4831 continue; 4832 } 4833 // Include number of iterations, if any. 4834 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4835 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4836 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4837 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4838 NumOfRegularWithIterators = 4839 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4840 } 4841 HasRegularWithIterators = true; 4842 continue; 4843 } 4844 } 4845 4846 QualType KmpDependInfoArrayTy; 4847 if (HasDepobjDeps || HasRegularWithIterators) { 4848 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4849 /*isSigned=*/false); 4850 if (HasDepobjDeps) { 4851 NumOfElements = 4852 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4853 } 4854 if (HasRegularWithIterators) { 4855 NumOfElements = 4856 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4857 } 4858 OpaqueValueExpr OVE(Loc, 4859 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4860 VK_RValue); 4861 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4862 RValue::get(NumOfElements)); 4863 KmpDependInfoArrayTy = 4864 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4865 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4866 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4867 // Properly emit variable-sized array. 4868 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4869 ImplicitParamDecl::Other); 4870 CGF.EmitVarDecl(*PD); 4871 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4872 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4873 /*isSigned=*/false); 4874 } else { 4875 KmpDependInfoArrayTy = C.getConstantArrayType( 4876 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4877 ArrayType::Normal, /*IndexTypeQuals=*/0); 4878 DependenciesArray = 4879 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4880 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4881 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4882 /*isSigned=*/false); 4883 } 4884 unsigned Pos = 0; 4885 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4886 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4887 Dependencies[I].IteratorExpr) 4888 continue; 4889 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4890 DependenciesArray); 4891 } 4892 // Copy regular dependecies with iterators. 4893 LValue PosLVal = CGF.MakeAddrLValue( 4894 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4895 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4896 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4897 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4898 !Dependencies[I].IteratorExpr) 4899 continue; 4900 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4901 DependenciesArray); 4902 } 4903 // Copy final depobj arrays without iterators. 4904 if (HasDepobjDeps) { 4905 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4906 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4907 continue; 4908 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4909 DependenciesArray); 4910 } 4911 } 4912 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4913 DependenciesArray, CGF.VoidPtrTy); 4914 return std::make_pair(NumOfElements, DependenciesArray); 4915 } 4916 4917 Address CGOpenMPRuntime::emitDepobjDependClause( 4918 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4919 SourceLocation Loc) { 4920 if (Dependencies.DepExprs.empty()) 4921 return Address::invalid(); 4922 // Process list of dependencies. 4923 ASTContext &C = CGM.getContext(); 4924 Address DependenciesArray = Address::invalid(); 4925 unsigned NumDependencies = Dependencies.DepExprs.size(); 4926 QualType FlagsTy; 4927 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4928 RecordDecl *KmpDependInfoRD = 4929 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4930 4931 llvm::Value *Size; 4932 // Define type kmp_depend_info[<Dependencies.size()>]; 4933 // For depobj reserve one extra element to store the number of elements. 4934 // It is required to handle depobj(x) update(in) construct. 4935 // kmp_depend_info[<Dependencies.size()>] deps; 4936 llvm::Value *NumDepsVal; 4937 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4938 if (const auto *IE = 4939 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4940 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4941 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4942 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4943 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4944 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4945 } 4946 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4947 NumDepsVal); 4948 CharUnits SizeInBytes = 4949 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4950 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4951 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4952 NumDepsVal = 4953 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4954 } else { 4955 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4956 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4957 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4958 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4959 Size = CGM.getSize(Sz.alignTo(Align)); 4960 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4961 } 4962 // Need to allocate on the dynamic memory. 4963 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4964 // Use default allocator. 4965 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4966 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4967 4968 llvm::Value *Addr = 4969 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4970 CGM.getModule(), OMPRTL___kmpc_alloc), 4971 Args, ".dep.arr.addr"); 4972 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4973 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4974 DependenciesArray = Address(Addr, Align); 4975 // Write number of elements in the first element of array for depobj. 4976 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4977 // deps[i].base_addr = NumDependencies; 4978 LValue BaseAddrLVal = CGF.EmitLValueForField( 4979 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4980 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4981 llvm::PointerUnion<unsigned *, LValue *> Pos; 4982 unsigned Idx = 1; 4983 LValue PosLVal; 4984 if (Dependencies.IteratorExpr) { 4985 PosLVal = CGF.MakeAddrLValue( 4986 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4987 C.getSizeType()); 4988 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4989 /*IsInit=*/true); 4990 Pos = &PosLVal; 4991 } else { 4992 Pos = &Idx; 4993 } 4994 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4995 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4996 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4997 return DependenciesArray; 4998 } 4999 5000 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5001 SourceLocation Loc) { 5002 ASTContext &C = CGM.getContext(); 5003 QualType FlagsTy; 5004 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5005 LValue Base = CGF.EmitLoadOfPointerLValue( 5006 DepobjLVal.getAddress(CGF), 5007 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5008 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5009 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5010 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5011 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5012 Addr.getPointer(), 5013 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5014 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5015 CGF.VoidPtrTy); 5016 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5017 // Use default allocator. 5018 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5019 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5020 5021 // _kmpc_free(gtid, addr, nullptr); 5022 (void)CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5023 CGM.getModule(), OMPRTL___kmpc_free), 5024 Args); 5025 } 5026 5027 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5028 OpenMPDependClauseKind NewDepKind, 5029 SourceLocation Loc) { 5030 ASTContext &C = CGM.getContext(); 5031 QualType FlagsTy; 5032 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5033 RecordDecl *KmpDependInfoRD = 5034 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5035 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5036 llvm::Value *NumDeps; 5037 LValue Base; 5038 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5039 5040 Address Begin = Base.getAddress(CGF); 5041 // Cast from pointer to array type to pointer to single element. 5042 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5043 // The basic structure here is a while-do loop. 5044 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5045 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5046 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5047 CGF.EmitBlock(BodyBB); 5048 llvm::PHINode *ElementPHI = 5049 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5050 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5051 Begin = Address(ElementPHI, Begin.getAlignment()); 5052 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5053 Base.getTBAAInfo()); 5054 // deps[i].flags = NewDepKind; 5055 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5056 LValue FlagsLVal = CGF.EmitLValueForField( 5057 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5058 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5059 FlagsLVal); 5060 5061 // Shift the address forward by one element. 5062 Address ElementNext = 5063 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5064 ElementPHI->addIncoming(ElementNext.getPointer(), 5065 CGF.Builder.GetInsertBlock()); 5066 llvm::Value *IsEmpty = 5067 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5068 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5069 // Done. 5070 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5071 } 5072 5073 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5074 const OMPExecutableDirective &D, 5075 llvm::Function *TaskFunction, 5076 QualType SharedsTy, Address Shareds, 5077 const Expr *IfCond, 5078 const OMPTaskDataTy &Data) { 5079 if (!CGF.HaveInsertPoint()) 5080 return; 5081 5082 TaskResultTy Result = 5083 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5084 llvm::Value *NewTask = Result.NewTask; 5085 llvm::Function *TaskEntry = Result.TaskEntry; 5086 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5087 LValue TDBase = Result.TDBase; 5088 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5089 // Process list of dependences. 5090 Address DependenciesArray = Address::invalid(); 5091 llvm::Value *NumOfElements; 5092 std::tie(NumOfElements, DependenciesArray) = 5093 emitDependClause(CGF, Data.Dependences, Loc); 5094 5095 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5096 // libcall. 5097 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5098 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5099 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5100 // list is not empty 5101 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5102 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5103 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5104 llvm::Value *DepTaskArgs[7]; 5105 if (!Data.Dependences.empty()) { 5106 DepTaskArgs[0] = UpLoc; 5107 DepTaskArgs[1] = ThreadID; 5108 DepTaskArgs[2] = NewTask; 5109 DepTaskArgs[3] = NumOfElements; 5110 DepTaskArgs[4] = DependenciesArray.getPointer(); 5111 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5112 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5113 } 5114 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5115 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5116 if (!Data.Tied) { 5117 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5118 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5119 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5120 } 5121 if (!Data.Dependences.empty()) { 5122 CGF.EmitRuntimeCall( 5123 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5124 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5125 DepTaskArgs); 5126 } else { 5127 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5128 CGM.getModule(), OMPRTL___kmpc_omp_task), 5129 TaskArgs); 5130 } 5131 // Check if parent region is untied and build return for untied task; 5132 if (auto *Region = 5133 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5134 Region->emitUntiedSwitch(CGF); 5135 }; 5136 5137 llvm::Value *DepWaitTaskArgs[6]; 5138 if (!Data.Dependences.empty()) { 5139 DepWaitTaskArgs[0] = UpLoc; 5140 DepWaitTaskArgs[1] = ThreadID; 5141 DepWaitTaskArgs[2] = NumOfElements; 5142 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5143 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5144 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5145 } 5146 auto &M = CGM.getModule(); 5147 auto &&ElseCodeGen = [&M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5148 &Data, &DepWaitTaskArgs, 5149 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5150 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5151 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5152 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5153 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5154 // is specified. 5155 if (!Data.Dependences.empty()) 5156 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5157 M, OMPRTL___kmpc_omp_wait_deps), 5158 DepWaitTaskArgs); 5159 // Call proxy_task_entry(gtid, new_task); 5160 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5161 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5162 Action.Enter(CGF); 5163 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5164 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5165 OutlinedFnArgs); 5166 }; 5167 5168 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5169 // kmp_task_t *new_task); 5170 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5171 // kmp_task_t *new_task); 5172 RegionCodeGenTy RCG(CodeGen); 5173 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5174 M, OMPRTL___kmpc_omp_task_begin_if0), 5175 TaskArgs, 5176 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5177 M, OMPRTL___kmpc_omp_task_complete_if0), 5178 TaskArgs); 5179 RCG.setAction(Action); 5180 RCG(CGF); 5181 }; 5182 5183 if (IfCond) { 5184 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5185 } else { 5186 RegionCodeGenTy ThenRCG(ThenCodeGen); 5187 ThenRCG(CGF); 5188 } 5189 } 5190 5191 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5192 const OMPLoopDirective &D, 5193 llvm::Function *TaskFunction, 5194 QualType SharedsTy, Address Shareds, 5195 const Expr *IfCond, 5196 const OMPTaskDataTy &Data) { 5197 if (!CGF.HaveInsertPoint()) 5198 return; 5199 TaskResultTy Result = 5200 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5201 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5202 // libcall. 5203 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5204 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5205 // sched, kmp_uint64 grainsize, void *task_dup); 5206 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5207 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5208 llvm::Value *IfVal; 5209 if (IfCond) { 5210 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5211 /*isSigned=*/true); 5212 } else { 5213 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5214 } 5215 5216 LValue LBLVal = CGF.EmitLValueForField( 5217 Result.TDBase, 5218 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5219 const auto *LBVar = 5220 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5221 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5222 LBLVal.getQuals(), 5223 /*IsInitializer=*/true); 5224 LValue UBLVal = CGF.EmitLValueForField( 5225 Result.TDBase, 5226 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5227 const auto *UBVar = 5228 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5229 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5230 UBLVal.getQuals(), 5231 /*IsInitializer=*/true); 5232 LValue StLVal = CGF.EmitLValueForField( 5233 Result.TDBase, 5234 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5235 const auto *StVar = 5236 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5237 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5238 StLVal.getQuals(), 5239 /*IsInitializer=*/true); 5240 // Store reductions address. 5241 LValue RedLVal = CGF.EmitLValueForField( 5242 Result.TDBase, 5243 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5244 if (Data.Reductions) { 5245 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5246 } else { 5247 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5248 CGF.getContext().VoidPtrTy); 5249 } 5250 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5251 llvm::Value *TaskArgs[] = { 5252 UpLoc, 5253 ThreadID, 5254 Result.NewTask, 5255 IfVal, 5256 LBLVal.getPointer(CGF), 5257 UBLVal.getPointer(CGF), 5258 CGF.EmitLoadOfScalar(StLVal, Loc), 5259 llvm::ConstantInt::getSigned( 5260 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5261 llvm::ConstantInt::getSigned( 5262 CGF.IntTy, Data.Schedule.getPointer() 5263 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5264 : NoSchedule), 5265 Data.Schedule.getPointer() 5266 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5267 /*isSigned=*/false) 5268 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5269 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5270 Result.TaskDupFn, CGF.VoidPtrTy) 5271 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5272 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5273 CGM.getModule(), OMPRTL___kmpc_taskloop), 5274 TaskArgs); 5275 } 5276 5277 /// Emit reduction operation for each element of array (required for 5278 /// array sections) LHS op = RHS. 5279 /// \param Type Type of array. 5280 /// \param LHSVar Variable on the left side of the reduction operation 5281 /// (references element of array in original variable). 5282 /// \param RHSVar Variable on the right side of the reduction operation 5283 /// (references element of array in original variable). 5284 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5285 /// RHSVar. 5286 static void EmitOMPAggregateReduction( 5287 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5288 const VarDecl *RHSVar, 5289 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5290 const Expr *, const Expr *)> &RedOpGen, 5291 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5292 const Expr *UpExpr = nullptr) { 5293 // Perform element-by-element initialization. 5294 QualType ElementTy; 5295 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5296 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5297 5298 // Drill down to the base element type on both arrays. 5299 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5300 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5301 5302 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5303 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5304 // Cast from pointer to array type to pointer to single element. 5305 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5306 // The basic structure here is a while-do loop. 5307 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5308 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5309 llvm::Value *IsEmpty = 5310 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5311 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5312 5313 // Enter the loop body, making that address the current address. 5314 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5315 CGF.EmitBlock(BodyBB); 5316 5317 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5318 5319 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5320 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5321 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5322 Address RHSElementCurrent = 5323 Address(RHSElementPHI, 5324 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5325 5326 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5327 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5328 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5329 Address LHSElementCurrent = 5330 Address(LHSElementPHI, 5331 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5332 5333 // Emit copy. 5334 CodeGenFunction::OMPPrivateScope Scope(CGF); 5335 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5336 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5337 Scope.Privatize(); 5338 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5339 Scope.ForceCleanup(); 5340 5341 // Shift the address forward by one element. 5342 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5343 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5344 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5345 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5346 // Check whether we've reached the end. 5347 llvm::Value *Done = 5348 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5349 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5350 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5351 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5352 5353 // Done. 5354 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5355 } 5356 5357 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5358 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5359 /// UDR combiner function. 5360 static void emitReductionCombiner(CodeGenFunction &CGF, 5361 const Expr *ReductionOp) { 5362 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5363 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5364 if (const auto *DRE = 5365 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5366 if (const auto *DRD = 5367 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5368 std::pair<llvm::Function *, llvm::Function *> Reduction = 5369 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5370 RValue Func = RValue::get(Reduction.first); 5371 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5372 CGF.EmitIgnoredExpr(ReductionOp); 5373 return; 5374 } 5375 CGF.EmitIgnoredExpr(ReductionOp); 5376 } 5377 5378 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5379 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5380 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5381 ArrayRef<const Expr *> ReductionOps) { 5382 ASTContext &C = CGM.getContext(); 5383 5384 // void reduction_func(void *LHSArg, void *RHSArg); 5385 FunctionArgList Args; 5386 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5387 ImplicitParamDecl::Other); 5388 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5389 ImplicitParamDecl::Other); 5390 Args.push_back(&LHSArg); 5391 Args.push_back(&RHSArg); 5392 const auto &CGFI = 5393 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5394 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5395 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5396 llvm::GlobalValue::InternalLinkage, Name, 5397 &CGM.getModule()); 5398 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5399 Fn->setDoesNotRecurse(); 5400 CodeGenFunction CGF(CGM); 5401 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5402 5403 // Dst = (void*[n])(LHSArg); 5404 // Src = (void*[n])(RHSArg); 5405 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5406 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5407 ArgsType), CGF.getPointerAlign()); 5408 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5410 ArgsType), CGF.getPointerAlign()); 5411 5412 // ... 5413 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5414 // ... 5415 CodeGenFunction::OMPPrivateScope Scope(CGF); 5416 auto IPriv = Privates.begin(); 5417 unsigned Idx = 0; 5418 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5419 const auto *RHSVar = 5420 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5421 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5422 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5423 }); 5424 const auto *LHSVar = 5425 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5426 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5427 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5428 }); 5429 QualType PrivTy = (*IPriv)->getType(); 5430 if (PrivTy->isVariablyModifiedType()) { 5431 // Get array size and emit VLA type. 5432 ++Idx; 5433 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5434 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5435 const VariableArrayType *VLA = 5436 CGF.getContext().getAsVariableArrayType(PrivTy); 5437 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5438 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5439 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5440 CGF.EmitVariablyModifiedType(PrivTy); 5441 } 5442 } 5443 Scope.Privatize(); 5444 IPriv = Privates.begin(); 5445 auto ILHS = LHSExprs.begin(); 5446 auto IRHS = RHSExprs.begin(); 5447 for (const Expr *E : ReductionOps) { 5448 if ((*IPriv)->getType()->isArrayType()) { 5449 // Emit reduction for array section. 5450 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5451 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5452 EmitOMPAggregateReduction( 5453 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5454 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5455 emitReductionCombiner(CGF, E); 5456 }); 5457 } else { 5458 // Emit reduction for array subscript or single variable. 5459 emitReductionCombiner(CGF, E); 5460 } 5461 ++IPriv; 5462 ++ILHS; 5463 ++IRHS; 5464 } 5465 Scope.ForceCleanup(); 5466 CGF.FinishFunction(); 5467 return Fn; 5468 } 5469 5470 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5471 const Expr *ReductionOp, 5472 const Expr *PrivateRef, 5473 const DeclRefExpr *LHS, 5474 const DeclRefExpr *RHS) { 5475 if (PrivateRef->getType()->isArrayType()) { 5476 // Emit reduction for array section. 5477 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5478 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5479 EmitOMPAggregateReduction( 5480 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5481 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5482 emitReductionCombiner(CGF, ReductionOp); 5483 }); 5484 } else { 5485 // Emit reduction for array subscript or single variable. 5486 emitReductionCombiner(CGF, ReductionOp); 5487 } 5488 } 5489 5490 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5491 ArrayRef<const Expr *> Privates, 5492 ArrayRef<const Expr *> LHSExprs, 5493 ArrayRef<const Expr *> RHSExprs, 5494 ArrayRef<const Expr *> ReductionOps, 5495 ReductionOptionsTy Options) { 5496 if (!CGF.HaveInsertPoint()) 5497 return; 5498 5499 bool WithNowait = Options.WithNowait; 5500 bool SimpleReduction = Options.SimpleReduction; 5501 5502 // Next code should be emitted for reduction: 5503 // 5504 // static kmp_critical_name lock = { 0 }; 5505 // 5506 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5507 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5508 // ... 5509 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5510 // *(Type<n>-1*)rhs[<n>-1]); 5511 // } 5512 // 5513 // ... 5514 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5515 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5516 // RedList, reduce_func, &<lock>)) { 5517 // case 1: 5518 // ... 5519 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5520 // ... 5521 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5522 // break; 5523 // case 2: 5524 // ... 5525 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5526 // ... 5527 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5528 // break; 5529 // default:; 5530 // } 5531 // 5532 // if SimpleReduction is true, only the next code is generated: 5533 // ... 5534 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5535 // ... 5536 5537 ASTContext &C = CGM.getContext(); 5538 5539 if (SimpleReduction) { 5540 CodeGenFunction::RunCleanupsScope Scope(CGF); 5541 auto IPriv = Privates.begin(); 5542 auto ILHS = LHSExprs.begin(); 5543 auto IRHS = RHSExprs.begin(); 5544 for (const Expr *E : ReductionOps) { 5545 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5546 cast<DeclRefExpr>(*IRHS)); 5547 ++IPriv; 5548 ++ILHS; 5549 ++IRHS; 5550 } 5551 return; 5552 } 5553 5554 // 1. Build a list of reduction variables. 5555 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5556 auto Size = RHSExprs.size(); 5557 for (const Expr *E : Privates) { 5558 if (E->getType()->isVariablyModifiedType()) 5559 // Reserve place for array size. 5560 ++Size; 5561 } 5562 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5563 QualType ReductionArrayTy = 5564 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5565 /*IndexTypeQuals=*/0); 5566 Address ReductionList = 5567 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5568 auto IPriv = Privates.begin(); 5569 unsigned Idx = 0; 5570 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5571 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5572 CGF.Builder.CreateStore( 5573 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5574 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5575 Elem); 5576 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5577 // Store array size. 5578 ++Idx; 5579 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5580 llvm::Value *Size = CGF.Builder.CreateIntCast( 5581 CGF.getVLASize( 5582 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5583 .NumElts, 5584 CGF.SizeTy, /*isSigned=*/false); 5585 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5586 Elem); 5587 } 5588 } 5589 5590 // 2. Emit reduce_func(). 5591 llvm::Function *ReductionFn = emitReductionFunction( 5592 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5593 LHSExprs, RHSExprs, ReductionOps); 5594 5595 // 3. Create static kmp_critical_name lock = { 0 }; 5596 std::string Name = getName({"reduction"}); 5597 llvm::Value *Lock = getCriticalRegionLock(Name); 5598 5599 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5600 // RedList, reduce_func, &<lock>); 5601 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5602 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5603 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5604 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5605 ReductionList.getPointer(), CGF.VoidPtrTy); 5606 llvm::Value *Args[] = { 5607 IdentTLoc, // ident_t *<loc> 5608 ThreadId, // i32 <gtid> 5609 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5610 ReductionArrayTySize, // size_type sizeof(RedList) 5611 RL, // void *RedList 5612 ReductionFn, // void (*) (void *, void *) <reduce_func> 5613 Lock // kmp_critical_name *&<lock> 5614 }; 5615 llvm::Value *Res = CGF.EmitRuntimeCall( 5616 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5617 CGM.getModule(), 5618 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5619 Args); 5620 5621 // 5. Build switch(res) 5622 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5623 llvm::SwitchInst *SwInst = 5624 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5625 5626 // 6. Build case 1: 5627 // ... 5628 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5629 // ... 5630 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5631 // break; 5632 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5633 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5634 CGF.EmitBlock(Case1BB); 5635 5636 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5637 llvm::Value *EndArgs[] = { 5638 IdentTLoc, // ident_t *<loc> 5639 ThreadId, // i32 <gtid> 5640 Lock // kmp_critical_name *&<lock> 5641 }; 5642 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5643 CodeGenFunction &CGF, PrePostActionTy &Action) { 5644 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5645 auto IPriv = Privates.begin(); 5646 auto ILHS = LHSExprs.begin(); 5647 auto IRHS = RHSExprs.begin(); 5648 for (const Expr *E : ReductionOps) { 5649 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5650 cast<DeclRefExpr>(*IRHS)); 5651 ++IPriv; 5652 ++ILHS; 5653 ++IRHS; 5654 } 5655 }; 5656 RegionCodeGenTy RCG(CodeGen); 5657 CommonActionTy Action( 5658 nullptr, llvm::None, 5659 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5660 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5661 : OMPRTL___kmpc_end_reduce), 5662 EndArgs); 5663 RCG.setAction(Action); 5664 RCG(CGF); 5665 5666 CGF.EmitBranch(DefaultBB); 5667 5668 // 7. Build case 2: 5669 // ... 5670 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5671 // ... 5672 // break; 5673 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5674 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5675 CGF.EmitBlock(Case2BB); 5676 5677 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5678 CodeGenFunction &CGF, PrePostActionTy &Action) { 5679 auto ILHS = LHSExprs.begin(); 5680 auto IRHS = RHSExprs.begin(); 5681 auto IPriv = Privates.begin(); 5682 for (const Expr *E : ReductionOps) { 5683 const Expr *XExpr = nullptr; 5684 const Expr *EExpr = nullptr; 5685 const Expr *UpExpr = nullptr; 5686 BinaryOperatorKind BO = BO_Comma; 5687 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5688 if (BO->getOpcode() == BO_Assign) { 5689 XExpr = BO->getLHS(); 5690 UpExpr = BO->getRHS(); 5691 } 5692 } 5693 // Try to emit update expression as a simple atomic. 5694 const Expr *RHSExpr = UpExpr; 5695 if (RHSExpr) { 5696 // Analyze RHS part of the whole expression. 5697 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5698 RHSExpr->IgnoreParenImpCasts())) { 5699 // If this is a conditional operator, analyze its condition for 5700 // min/max reduction operator. 5701 RHSExpr = ACO->getCond(); 5702 } 5703 if (const auto *BORHS = 5704 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5705 EExpr = BORHS->getRHS(); 5706 BO = BORHS->getOpcode(); 5707 } 5708 } 5709 if (XExpr) { 5710 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5711 auto &&AtomicRedGen = [BO, VD, 5712 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5713 const Expr *EExpr, const Expr *UpExpr) { 5714 LValue X = CGF.EmitLValue(XExpr); 5715 RValue E; 5716 if (EExpr) 5717 E = CGF.EmitAnyExpr(EExpr); 5718 CGF.EmitOMPAtomicSimpleUpdateExpr( 5719 X, E, BO, /*IsXLHSInRHSPart=*/true, 5720 llvm::AtomicOrdering::Monotonic, Loc, 5721 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5722 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5723 PrivateScope.addPrivate( 5724 VD, [&CGF, VD, XRValue, Loc]() { 5725 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5726 CGF.emitOMPSimpleStore( 5727 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5728 VD->getType().getNonReferenceType(), Loc); 5729 return LHSTemp; 5730 }); 5731 (void)PrivateScope.Privatize(); 5732 return CGF.EmitAnyExpr(UpExpr); 5733 }); 5734 }; 5735 if ((*IPriv)->getType()->isArrayType()) { 5736 // Emit atomic reduction for array section. 5737 const auto *RHSVar = 5738 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5739 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5740 AtomicRedGen, XExpr, EExpr, UpExpr); 5741 } else { 5742 // Emit atomic reduction for array subscript or single variable. 5743 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5744 } 5745 } else { 5746 // Emit as a critical region. 5747 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5748 const Expr *, const Expr *) { 5749 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5750 std::string Name = RT.getName({"atomic_reduction"}); 5751 RT.emitCriticalRegion( 5752 CGF, Name, 5753 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5754 Action.Enter(CGF); 5755 emitReductionCombiner(CGF, E); 5756 }, 5757 Loc); 5758 }; 5759 if ((*IPriv)->getType()->isArrayType()) { 5760 const auto *LHSVar = 5761 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5762 const auto *RHSVar = 5763 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5764 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5765 CritRedGen); 5766 } else { 5767 CritRedGen(CGF, nullptr, nullptr, nullptr); 5768 } 5769 } 5770 ++ILHS; 5771 ++IRHS; 5772 ++IPriv; 5773 } 5774 }; 5775 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5776 if (!WithNowait) { 5777 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5778 llvm::Value *EndArgs[] = { 5779 IdentTLoc, // ident_t *<loc> 5780 ThreadId, // i32 <gtid> 5781 Lock // kmp_critical_name *&<lock> 5782 }; 5783 CommonActionTy Action(nullptr, llvm::None, 5784 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5785 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5786 EndArgs); 5787 AtomicRCG.setAction(Action); 5788 AtomicRCG(CGF); 5789 } else { 5790 AtomicRCG(CGF); 5791 } 5792 5793 CGF.EmitBranch(DefaultBB); 5794 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5795 } 5796 5797 /// Generates unique name for artificial threadprivate variables. 5798 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5799 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5800 const Expr *Ref) { 5801 SmallString<256> Buffer; 5802 llvm::raw_svector_ostream Out(Buffer); 5803 const clang::DeclRefExpr *DE; 5804 const VarDecl *D = ::getBaseDecl(Ref, DE); 5805 if (!D) 5806 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5807 D = D->getCanonicalDecl(); 5808 std::string Name = CGM.getOpenMPRuntime().getName( 5809 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5810 Out << Prefix << Name << "_" 5811 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5812 return std::string(Out.str()); 5813 } 5814 5815 /// Emits reduction initializer function: 5816 /// \code 5817 /// void @.red_init(void* %arg, void* %orig) { 5818 /// %0 = bitcast void* %arg to <type>* 5819 /// store <type> <init>, <type>* %0 5820 /// ret void 5821 /// } 5822 /// \endcode 5823 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5824 SourceLocation Loc, 5825 ReductionCodeGen &RCG, unsigned N) { 5826 ASTContext &C = CGM.getContext(); 5827 QualType VoidPtrTy = C.VoidPtrTy; 5828 VoidPtrTy.addRestrict(); 5829 FunctionArgList Args; 5830 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5831 ImplicitParamDecl::Other); 5832 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5833 ImplicitParamDecl::Other); 5834 Args.emplace_back(&Param); 5835 Args.emplace_back(&ParamOrig); 5836 const auto &FnInfo = 5837 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5838 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5839 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5840 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5841 Name, &CGM.getModule()); 5842 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5843 Fn->setDoesNotRecurse(); 5844 CodeGenFunction CGF(CGM); 5845 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5846 Address PrivateAddr = CGF.EmitLoadOfPointer( 5847 CGF.GetAddrOfLocalVar(&Param), 5848 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5849 llvm::Value *Size = nullptr; 5850 // If the size of the reduction item is non-constant, load it from global 5851 // threadprivate variable. 5852 if (RCG.getSizes(N).second) { 5853 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5854 CGF, CGM.getContext().getSizeType(), 5855 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5856 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5857 CGM.getContext().getSizeType(), Loc); 5858 } 5859 RCG.emitAggregateType(CGF, N, Size); 5860 LValue OrigLVal; 5861 // If initializer uses initializer from declare reduction construct, emit a 5862 // pointer to the address of the original reduction item (reuired by reduction 5863 // initializer) 5864 if (RCG.usesReductionInitializer(N)) { 5865 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5866 SharedAddr = CGF.EmitLoadOfPointer( 5867 SharedAddr, 5868 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5869 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5870 } else { 5871 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5872 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5873 CGM.getContext().VoidPtrTy); 5874 } 5875 // Emit the initializer: 5876 // %0 = bitcast void* %arg to <type>* 5877 // store <type> <init>, <type>* %0 5878 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5879 [](CodeGenFunction &) { return false; }); 5880 CGF.FinishFunction(); 5881 return Fn; 5882 } 5883 5884 /// Emits reduction combiner function: 5885 /// \code 5886 /// void @.red_comb(void* %arg0, void* %arg1) { 5887 /// %lhs = bitcast void* %arg0 to <type>* 5888 /// %rhs = bitcast void* %arg1 to <type>* 5889 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5890 /// store <type> %2, <type>* %lhs 5891 /// ret void 5892 /// } 5893 /// \endcode 5894 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5895 SourceLocation Loc, 5896 ReductionCodeGen &RCG, unsigned N, 5897 const Expr *ReductionOp, 5898 const Expr *LHS, const Expr *RHS, 5899 const Expr *PrivateRef) { 5900 ASTContext &C = CGM.getContext(); 5901 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5902 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5903 FunctionArgList Args; 5904 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5905 C.VoidPtrTy, ImplicitParamDecl::Other); 5906 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5907 ImplicitParamDecl::Other); 5908 Args.emplace_back(&ParamInOut); 5909 Args.emplace_back(&ParamIn); 5910 const auto &FnInfo = 5911 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5912 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5913 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5914 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5915 Name, &CGM.getModule()); 5916 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5917 Fn->setDoesNotRecurse(); 5918 CodeGenFunction CGF(CGM); 5919 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5920 llvm::Value *Size = nullptr; 5921 // If the size of the reduction item is non-constant, load it from global 5922 // threadprivate variable. 5923 if (RCG.getSizes(N).second) { 5924 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5925 CGF, CGM.getContext().getSizeType(), 5926 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5927 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5928 CGM.getContext().getSizeType(), Loc); 5929 } 5930 RCG.emitAggregateType(CGF, N, Size); 5931 // Remap lhs and rhs variables to the addresses of the function arguments. 5932 // %lhs = bitcast void* %arg0 to <type>* 5933 // %rhs = bitcast void* %arg1 to <type>* 5934 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5935 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5936 // Pull out the pointer to the variable. 5937 Address PtrAddr = CGF.EmitLoadOfPointer( 5938 CGF.GetAddrOfLocalVar(&ParamInOut), 5939 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5940 return CGF.Builder.CreateElementBitCast( 5941 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5942 }); 5943 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5944 // Pull out the pointer to the variable. 5945 Address PtrAddr = CGF.EmitLoadOfPointer( 5946 CGF.GetAddrOfLocalVar(&ParamIn), 5947 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5948 return CGF.Builder.CreateElementBitCast( 5949 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5950 }); 5951 PrivateScope.Privatize(); 5952 // Emit the combiner body: 5953 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5954 // store <type> %2, <type>* %lhs 5955 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5956 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5957 cast<DeclRefExpr>(RHS)); 5958 CGF.FinishFunction(); 5959 return Fn; 5960 } 5961 5962 /// Emits reduction finalizer function: 5963 /// \code 5964 /// void @.red_fini(void* %arg) { 5965 /// %0 = bitcast void* %arg to <type>* 5966 /// <destroy>(<type>* %0) 5967 /// ret void 5968 /// } 5969 /// \endcode 5970 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5971 SourceLocation Loc, 5972 ReductionCodeGen &RCG, unsigned N) { 5973 if (!RCG.needCleanups(N)) 5974 return nullptr; 5975 ASTContext &C = CGM.getContext(); 5976 FunctionArgList Args; 5977 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5978 ImplicitParamDecl::Other); 5979 Args.emplace_back(&Param); 5980 const auto &FnInfo = 5981 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5982 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5983 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5984 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5985 Name, &CGM.getModule()); 5986 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5987 Fn->setDoesNotRecurse(); 5988 CodeGenFunction CGF(CGM); 5989 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5990 Address PrivateAddr = CGF.EmitLoadOfPointer( 5991 CGF.GetAddrOfLocalVar(&Param), 5992 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5993 llvm::Value *Size = nullptr; 5994 // If the size of the reduction item is non-constant, load it from global 5995 // threadprivate variable. 5996 if (RCG.getSizes(N).second) { 5997 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5998 CGF, CGM.getContext().getSizeType(), 5999 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6000 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6001 CGM.getContext().getSizeType(), Loc); 6002 } 6003 RCG.emitAggregateType(CGF, N, Size); 6004 // Emit the finalizer body: 6005 // <destroy>(<type>* %0) 6006 RCG.emitCleanups(CGF, N, PrivateAddr); 6007 CGF.FinishFunction(Loc); 6008 return Fn; 6009 } 6010 6011 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6012 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6013 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6014 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6015 return nullptr; 6016 6017 // Build typedef struct: 6018 // kmp_taskred_input { 6019 // void *reduce_shar; // shared reduction item 6020 // void *reduce_orig; // original reduction item used for initialization 6021 // size_t reduce_size; // size of data item 6022 // void *reduce_init; // data initialization routine 6023 // void *reduce_fini; // data finalization routine 6024 // void *reduce_comb; // data combiner routine 6025 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6026 // } kmp_taskred_input_t; 6027 ASTContext &C = CGM.getContext(); 6028 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6029 RD->startDefinition(); 6030 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6031 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6032 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6033 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6034 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6035 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6036 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6037 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6038 RD->completeDefinition(); 6039 QualType RDType = C.getRecordType(RD); 6040 unsigned Size = Data.ReductionVars.size(); 6041 llvm::APInt ArraySize(/*numBits=*/64, Size); 6042 QualType ArrayRDType = C.getConstantArrayType( 6043 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6044 // kmp_task_red_input_t .rd_input.[Size]; 6045 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6046 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6047 Data.ReductionCopies, Data.ReductionOps); 6048 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6049 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6050 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6051 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6052 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6053 TaskRedInput.getPointer(), Idxs, 6054 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6055 ".rd_input.gep."); 6056 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6057 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6058 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6059 RCG.emitSharedOrigLValue(CGF, Cnt); 6060 llvm::Value *CastedShared = 6061 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6062 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6063 // ElemLVal.reduce_orig = &Origs[Cnt]; 6064 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6065 llvm::Value *CastedOrig = 6066 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6067 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6068 RCG.emitAggregateType(CGF, Cnt); 6069 llvm::Value *SizeValInChars; 6070 llvm::Value *SizeVal; 6071 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6072 // We use delayed creation/initialization for VLAs and array sections. It is 6073 // required because runtime does not provide the way to pass the sizes of 6074 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6075 // threadprivate global variables are used to store these values and use 6076 // them in the functions. 6077 bool DelayedCreation = !!SizeVal; 6078 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6079 /*isSigned=*/false); 6080 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6081 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6082 // ElemLVal.reduce_init = init; 6083 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6084 llvm::Value *InitAddr = 6085 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6086 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6087 // ElemLVal.reduce_fini = fini; 6088 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6089 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6090 llvm::Value *FiniAddr = Fini 6091 ? CGF.EmitCastToVoidPtr(Fini) 6092 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6093 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6094 // ElemLVal.reduce_comb = comb; 6095 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6096 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6097 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6098 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6099 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6100 // ElemLVal.flags = 0; 6101 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6102 if (DelayedCreation) { 6103 CGF.EmitStoreOfScalar( 6104 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6105 FlagsLVal); 6106 } else 6107 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6108 FlagsLVal.getType()); 6109 } 6110 if (Data.IsReductionWithTaskMod) { 6111 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6112 // is_ws, int num, void *data); 6113 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6114 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6115 CGM.IntTy, /*isSigned=*/true); 6116 llvm::Value *Args[] = { 6117 IdentTLoc, GTid, 6118 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6119 /*isSigned=*/true), 6120 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6121 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6122 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6123 return CGF.EmitRuntimeCall( 6124 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6125 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6126 Args); 6127 } 6128 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6129 llvm::Value *Args[] = { 6130 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6131 /*isSigned=*/true), 6132 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6133 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6134 CGM.VoidPtrTy)}; 6135 return CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6136 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6137 Args); 6138 } 6139 6140 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6141 SourceLocation Loc, 6142 bool IsWorksharingReduction) { 6143 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6144 // is_ws, int num, void *data); 6145 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6146 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6147 CGM.IntTy, /*isSigned=*/true); 6148 llvm::Value *Args[] = {IdentTLoc, GTid, 6149 llvm::ConstantInt::get(CGM.IntTy, 6150 IsWorksharingReduction ? 1 : 0, 6151 /*isSigned=*/true)}; 6152 (void)CGF.EmitRuntimeCall( 6153 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6154 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6155 Args); 6156 } 6157 6158 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6159 SourceLocation Loc, 6160 ReductionCodeGen &RCG, 6161 unsigned N) { 6162 auto Sizes = RCG.getSizes(N); 6163 // Emit threadprivate global variable if the type is non-constant 6164 // (Sizes.second = nullptr). 6165 if (Sizes.second) { 6166 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6167 /*isSigned=*/false); 6168 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6169 CGF, CGM.getContext().getSizeType(), 6170 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6171 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6172 } 6173 } 6174 6175 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6176 SourceLocation Loc, 6177 llvm::Value *ReductionsPtr, 6178 LValue SharedLVal) { 6179 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6180 // *d); 6181 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6182 CGM.IntTy, 6183 /*isSigned=*/true), 6184 ReductionsPtr, 6185 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6186 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6187 return Address( 6188 CGF.EmitRuntimeCall( 6189 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6190 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6191 Args), 6192 SharedLVal.getAlignment()); 6193 } 6194 6195 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6196 SourceLocation Loc) { 6197 if (!CGF.HaveInsertPoint()) 6198 return; 6199 6200 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6201 if (OMPBuilder) { 6202 OMPBuilder->CreateTaskwait(CGF.Builder); 6203 } else { 6204 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6205 // global_tid); 6206 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6207 // Ignore return result until untied tasks are supported. 6208 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6209 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6210 Args); 6211 } 6212 6213 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6214 Region->emitUntiedSwitch(CGF); 6215 } 6216 6217 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6218 OpenMPDirectiveKind InnerKind, 6219 const RegionCodeGenTy &CodeGen, 6220 bool HasCancel) { 6221 if (!CGF.HaveInsertPoint()) 6222 return; 6223 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6224 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6225 } 6226 6227 namespace { 6228 enum RTCancelKind { 6229 CancelNoreq = 0, 6230 CancelParallel = 1, 6231 CancelLoop = 2, 6232 CancelSections = 3, 6233 CancelTaskgroup = 4 6234 }; 6235 } // anonymous namespace 6236 6237 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6238 RTCancelKind CancelKind = CancelNoreq; 6239 if (CancelRegion == OMPD_parallel) 6240 CancelKind = CancelParallel; 6241 else if (CancelRegion == OMPD_for) 6242 CancelKind = CancelLoop; 6243 else if (CancelRegion == OMPD_sections) 6244 CancelKind = CancelSections; 6245 else { 6246 assert(CancelRegion == OMPD_taskgroup); 6247 CancelKind = CancelTaskgroup; 6248 } 6249 return CancelKind; 6250 } 6251 6252 void CGOpenMPRuntime::emitCancellationPointCall( 6253 CodeGenFunction &CGF, SourceLocation Loc, 6254 OpenMPDirectiveKind CancelRegion) { 6255 if (!CGF.HaveInsertPoint()) 6256 return; 6257 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6258 // global_tid, kmp_int32 cncl_kind); 6259 if (auto *OMPRegionInfo = 6260 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6261 // For 'cancellation point taskgroup', the task region info may not have a 6262 // cancel. This may instead happen in another adjacent task. 6263 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6264 llvm::Value *Args[] = { 6265 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6266 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6267 // Ignore return result until untied tasks are supported. 6268 llvm::Value *Result = CGF.EmitRuntimeCall( 6269 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6270 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6271 Args); 6272 // if (__kmpc_cancellationpoint()) { 6273 // exit from construct; 6274 // } 6275 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6276 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6277 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6278 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6279 CGF.EmitBlock(ExitBB); 6280 // exit from construct; 6281 CodeGenFunction::JumpDest CancelDest = 6282 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6283 CGF.EmitBranchThroughCleanup(CancelDest); 6284 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6285 } 6286 } 6287 } 6288 6289 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6290 const Expr *IfCond, 6291 OpenMPDirectiveKind CancelRegion) { 6292 if (!CGF.HaveInsertPoint()) 6293 return; 6294 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6295 // kmp_int32 cncl_kind); 6296 auto &M = CGM.getModule(); 6297 if (auto *OMPRegionInfo = 6298 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6299 auto &&ThenGen = [&M, Loc, CancelRegion, 6300 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6301 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6302 llvm::Value *Args[] = { 6303 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6304 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6305 // Ignore return result until untied tasks are supported. 6306 llvm::Value *Result = 6307 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6308 M, OMPRTL___kmpc_cancel), 6309 Args); 6310 // if (__kmpc_cancel()) { 6311 // exit from construct; 6312 // } 6313 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6314 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6315 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6316 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6317 CGF.EmitBlock(ExitBB); 6318 // exit from construct; 6319 CodeGenFunction::JumpDest CancelDest = 6320 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6321 CGF.EmitBranchThroughCleanup(CancelDest); 6322 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6323 }; 6324 if (IfCond) { 6325 emitIfClause(CGF, IfCond, ThenGen, 6326 [](CodeGenFunction &, PrePostActionTy &) {}); 6327 } else { 6328 RegionCodeGenTy ThenRCG(ThenGen); 6329 ThenRCG(CGF); 6330 } 6331 } 6332 } 6333 6334 namespace { 6335 /// Cleanup action for uses_allocators support. 6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6337 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6338 6339 public: 6340 OMPUsesAllocatorsActionTy( 6341 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6342 : Allocators(Allocators) {} 6343 void Enter(CodeGenFunction &CGF) override { 6344 if (!CGF.HaveInsertPoint()) 6345 return; 6346 for (const auto &AllocatorData : Allocators) { 6347 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6348 CGF, AllocatorData.first, AllocatorData.second); 6349 } 6350 } 6351 void Exit(CodeGenFunction &CGF) override { 6352 if (!CGF.HaveInsertPoint()) 6353 return; 6354 for (const auto &AllocatorData : Allocators) { 6355 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6356 AllocatorData.first); 6357 } 6358 } 6359 }; 6360 } // namespace 6361 6362 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6363 const OMPExecutableDirective &D, StringRef ParentName, 6364 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6365 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6366 assert(!ParentName.empty() && "Invalid target region parent name!"); 6367 HasEmittedTargetRegion = true; 6368 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6369 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6370 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6371 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6372 if (!D.AllocatorTraits) 6373 continue; 6374 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6375 } 6376 } 6377 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6378 CodeGen.setAction(UsesAllocatorAction); 6379 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6380 IsOffloadEntry, CodeGen); 6381 } 6382 6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6384 const Expr *Allocator, 6385 const Expr *AllocatorTraits) { 6386 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6387 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6388 // Use default memspace handle. 6389 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6390 llvm::Value *NumTraits = llvm::ConstantInt::get( 6391 CGF.IntTy, cast<ConstantArrayType>( 6392 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6393 ->getSize() 6394 .getLimitedValue()); 6395 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6396 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6397 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6398 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6399 AllocatorTraitsLVal.getBaseInfo(), 6400 AllocatorTraitsLVal.getTBAAInfo()); 6401 llvm::Value *Traits = 6402 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6403 6404 llvm::Value *AllocatorVal = 6405 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6406 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6407 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6408 // Store to allocator. 6409 CGF.EmitVarDecl(*cast<VarDecl>( 6410 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6411 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6412 AllocatorVal = 6413 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6414 Allocator->getType(), Allocator->getExprLoc()); 6415 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6416 } 6417 6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6419 const Expr *Allocator) { 6420 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6421 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6422 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6423 llvm::Value *AllocatorVal = 6424 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6425 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6426 CGF.getContext().VoidPtrTy, 6427 Allocator->getExprLoc()); 6428 (void)CGF.EmitRuntimeCall( 6429 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6430 CGM.getModule(), OMPRTL___kmpc_destroy_allocator), 6431 {ThreadId, AllocatorVal}); 6432 } 6433 6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6435 const OMPExecutableDirective &D, StringRef ParentName, 6436 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6437 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6438 // Create a unique name for the entry function using the source location 6439 // information of the current target region. The name will be something like: 6440 // 6441 // __omp_offloading_DD_FFFF_PP_lBB 6442 // 6443 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6444 // mangled name of the function that encloses the target region and BB is the 6445 // line number of the target region. 6446 6447 unsigned DeviceID; 6448 unsigned FileID; 6449 unsigned Line; 6450 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6451 Line); 6452 SmallString<64> EntryFnName; 6453 { 6454 llvm::raw_svector_ostream OS(EntryFnName); 6455 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6456 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6457 } 6458 6459 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6460 6461 CodeGenFunction CGF(CGM, true); 6462 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6463 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6464 6465 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6466 6467 // If this target outline function is not an offload entry, we don't need to 6468 // register it. 6469 if (!IsOffloadEntry) 6470 return; 6471 6472 // The target region ID is used by the runtime library to identify the current 6473 // target region, so it only has to be unique and not necessarily point to 6474 // anything. It could be the pointer to the outlined function that implements 6475 // the target region, but we aren't using that so that the compiler doesn't 6476 // need to keep that, and could therefore inline the host function if proven 6477 // worthwhile during optimization. In the other hand, if emitting code for the 6478 // device, the ID has to be the function address so that it can retrieved from 6479 // the offloading entry and launched by the runtime library. We also mark the 6480 // outlined function to have external linkage in case we are emitting code for 6481 // the device, because these functions will be entry points to the device. 6482 6483 if (CGM.getLangOpts().OpenMPIsDevice) { 6484 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6485 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6486 OutlinedFn->setDSOLocal(false); 6487 } else { 6488 std::string Name = getName({EntryFnName, "region_id"}); 6489 OutlinedFnID = new llvm::GlobalVariable( 6490 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6491 llvm::GlobalValue::WeakAnyLinkage, 6492 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6493 } 6494 6495 // Register the information for the entry associated with this target region. 6496 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6497 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6498 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6499 } 6500 6501 /// Checks if the expression is constant or does not have non-trivial function 6502 /// calls. 6503 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6504 // We can skip constant expressions. 6505 // We can skip expressions with trivial calls or simple expressions. 6506 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6507 !E->hasNonTrivialCall(Ctx)) && 6508 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6509 } 6510 6511 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6512 const Stmt *Body) { 6513 const Stmt *Child = Body->IgnoreContainers(); 6514 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6515 Child = nullptr; 6516 for (const Stmt *S : C->body()) { 6517 if (const auto *E = dyn_cast<Expr>(S)) { 6518 if (isTrivial(Ctx, E)) 6519 continue; 6520 } 6521 // Some of the statements can be ignored. 6522 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6523 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6524 continue; 6525 // Analyze declarations. 6526 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6527 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6528 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6529 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6530 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6531 isa<UsingDirectiveDecl>(D) || 6532 isa<OMPDeclareReductionDecl>(D) || 6533 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6534 return true; 6535 const auto *VD = dyn_cast<VarDecl>(D); 6536 if (!VD) 6537 return false; 6538 return VD->isConstexpr() || 6539 ((VD->getType().isTrivialType(Ctx) || 6540 VD->getType()->isReferenceType()) && 6541 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6542 })) 6543 continue; 6544 } 6545 // Found multiple children - cannot get the one child only. 6546 if (Child) 6547 return nullptr; 6548 Child = S; 6549 } 6550 if (Child) 6551 Child = Child->IgnoreContainers(); 6552 } 6553 return Child; 6554 } 6555 6556 /// Emit the number of teams for a target directive. Inspect the num_teams 6557 /// clause associated with a teams construct combined or closely nested 6558 /// with the target directive. 6559 /// 6560 /// Emit a team of size one for directives such as 'target parallel' that 6561 /// have no associated teams construct. 6562 /// 6563 /// Otherwise, return nullptr. 6564 static llvm::Value * 6565 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6566 const OMPExecutableDirective &D) { 6567 assert(!CGF.getLangOpts().OpenMPIsDevice && 6568 "Clauses associated with the teams directive expected to be emitted " 6569 "only for the host!"); 6570 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6571 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6572 "Expected target-based executable directive."); 6573 CGBuilderTy &Bld = CGF.Builder; 6574 switch (DirectiveKind) { 6575 case OMPD_target: { 6576 const auto *CS = D.getInnermostCapturedStmt(); 6577 const auto *Body = 6578 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6579 const Stmt *ChildStmt = 6580 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6581 if (const auto *NestedDir = 6582 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6583 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6584 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6585 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6586 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6587 const Expr *NumTeams = 6588 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6589 llvm::Value *NumTeamsVal = 6590 CGF.EmitScalarExpr(NumTeams, 6591 /*IgnoreResultAssign*/ true); 6592 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6593 /*isSigned=*/true); 6594 } 6595 return Bld.getInt32(0); 6596 } 6597 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6598 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6599 return Bld.getInt32(1); 6600 return Bld.getInt32(0); 6601 } 6602 return nullptr; 6603 } 6604 case OMPD_target_teams: 6605 case OMPD_target_teams_distribute: 6606 case OMPD_target_teams_distribute_simd: 6607 case OMPD_target_teams_distribute_parallel_for: 6608 case OMPD_target_teams_distribute_parallel_for_simd: { 6609 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6610 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6611 const Expr *NumTeams = 6612 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6613 llvm::Value *NumTeamsVal = 6614 CGF.EmitScalarExpr(NumTeams, 6615 /*IgnoreResultAssign*/ true); 6616 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6617 /*isSigned=*/true); 6618 } 6619 return Bld.getInt32(0); 6620 } 6621 case OMPD_target_parallel: 6622 case OMPD_target_parallel_for: 6623 case OMPD_target_parallel_for_simd: 6624 case OMPD_target_simd: 6625 return Bld.getInt32(1); 6626 case OMPD_parallel: 6627 case OMPD_for: 6628 case OMPD_parallel_for: 6629 case OMPD_parallel_master: 6630 case OMPD_parallel_sections: 6631 case OMPD_for_simd: 6632 case OMPD_parallel_for_simd: 6633 case OMPD_cancel: 6634 case OMPD_cancellation_point: 6635 case OMPD_ordered: 6636 case OMPD_threadprivate: 6637 case OMPD_allocate: 6638 case OMPD_task: 6639 case OMPD_simd: 6640 case OMPD_sections: 6641 case OMPD_section: 6642 case OMPD_single: 6643 case OMPD_master: 6644 case OMPD_critical: 6645 case OMPD_taskyield: 6646 case OMPD_barrier: 6647 case OMPD_taskwait: 6648 case OMPD_taskgroup: 6649 case OMPD_atomic: 6650 case OMPD_flush: 6651 case OMPD_depobj: 6652 case OMPD_scan: 6653 case OMPD_teams: 6654 case OMPD_target_data: 6655 case OMPD_target_exit_data: 6656 case OMPD_target_enter_data: 6657 case OMPD_distribute: 6658 case OMPD_distribute_simd: 6659 case OMPD_distribute_parallel_for: 6660 case OMPD_distribute_parallel_for_simd: 6661 case OMPD_teams_distribute: 6662 case OMPD_teams_distribute_simd: 6663 case OMPD_teams_distribute_parallel_for: 6664 case OMPD_teams_distribute_parallel_for_simd: 6665 case OMPD_target_update: 6666 case OMPD_declare_simd: 6667 case OMPD_declare_variant: 6668 case OMPD_begin_declare_variant: 6669 case OMPD_end_declare_variant: 6670 case OMPD_declare_target: 6671 case OMPD_end_declare_target: 6672 case OMPD_declare_reduction: 6673 case OMPD_declare_mapper: 6674 case OMPD_taskloop: 6675 case OMPD_taskloop_simd: 6676 case OMPD_master_taskloop: 6677 case OMPD_master_taskloop_simd: 6678 case OMPD_parallel_master_taskloop: 6679 case OMPD_parallel_master_taskloop_simd: 6680 case OMPD_requires: 6681 case OMPD_unknown: 6682 break; 6683 default: 6684 break; 6685 } 6686 llvm_unreachable("Unexpected directive kind."); 6687 } 6688 6689 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6690 llvm::Value *DefaultThreadLimitVal) { 6691 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6692 CGF.getContext(), CS->getCapturedStmt()); 6693 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6694 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6695 llvm::Value *NumThreads = nullptr; 6696 llvm::Value *CondVal = nullptr; 6697 // Handle if clause. If if clause present, the number of threads is 6698 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6699 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6700 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6701 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6702 const OMPIfClause *IfClause = nullptr; 6703 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6704 if (C->getNameModifier() == OMPD_unknown || 6705 C->getNameModifier() == OMPD_parallel) { 6706 IfClause = C; 6707 break; 6708 } 6709 } 6710 if (IfClause) { 6711 const Expr *Cond = IfClause->getCondition(); 6712 bool Result; 6713 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6714 if (!Result) 6715 return CGF.Builder.getInt32(1); 6716 } else { 6717 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6718 if (const auto *PreInit = 6719 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6720 for (const auto *I : PreInit->decls()) { 6721 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6722 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6723 } else { 6724 CodeGenFunction::AutoVarEmission Emission = 6725 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6726 CGF.EmitAutoVarCleanups(Emission); 6727 } 6728 } 6729 } 6730 CondVal = CGF.EvaluateExprAsBool(Cond); 6731 } 6732 } 6733 } 6734 // Check the value of num_threads clause iff if clause was not specified 6735 // or is not evaluated to false. 6736 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6737 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6738 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6739 const auto *NumThreadsClause = 6740 Dir->getSingleClause<OMPNumThreadsClause>(); 6741 CodeGenFunction::LexicalScope Scope( 6742 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6743 if (const auto *PreInit = 6744 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6745 for (const auto *I : PreInit->decls()) { 6746 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6747 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6748 } else { 6749 CodeGenFunction::AutoVarEmission Emission = 6750 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6751 CGF.EmitAutoVarCleanups(Emission); 6752 } 6753 } 6754 } 6755 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6756 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6757 /*isSigned=*/false); 6758 if (DefaultThreadLimitVal) 6759 NumThreads = CGF.Builder.CreateSelect( 6760 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6761 DefaultThreadLimitVal, NumThreads); 6762 } else { 6763 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6764 : CGF.Builder.getInt32(0); 6765 } 6766 // Process condition of the if clause. 6767 if (CondVal) { 6768 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6769 CGF.Builder.getInt32(1)); 6770 } 6771 return NumThreads; 6772 } 6773 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6774 return CGF.Builder.getInt32(1); 6775 return DefaultThreadLimitVal; 6776 } 6777 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6778 : CGF.Builder.getInt32(0); 6779 } 6780 6781 /// Emit the number of threads for a target directive. Inspect the 6782 /// thread_limit clause associated with a teams construct combined or closely 6783 /// nested with the target directive. 6784 /// 6785 /// Emit the num_threads clause for directives such as 'target parallel' that 6786 /// have no associated teams construct. 6787 /// 6788 /// Otherwise, return nullptr. 6789 static llvm::Value * 6790 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6791 const OMPExecutableDirective &D) { 6792 assert(!CGF.getLangOpts().OpenMPIsDevice && 6793 "Clauses associated with the teams directive expected to be emitted " 6794 "only for the host!"); 6795 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6796 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6797 "Expected target-based executable directive."); 6798 CGBuilderTy &Bld = CGF.Builder; 6799 llvm::Value *ThreadLimitVal = nullptr; 6800 llvm::Value *NumThreadsVal = nullptr; 6801 switch (DirectiveKind) { 6802 case OMPD_target: { 6803 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6804 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6805 return NumThreads; 6806 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6807 CGF.getContext(), CS->getCapturedStmt()); 6808 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6809 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6810 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6811 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6812 const auto *ThreadLimitClause = 6813 Dir->getSingleClause<OMPThreadLimitClause>(); 6814 CodeGenFunction::LexicalScope Scope( 6815 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6816 if (const auto *PreInit = 6817 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6818 for (const auto *I : PreInit->decls()) { 6819 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6820 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6821 } else { 6822 CodeGenFunction::AutoVarEmission Emission = 6823 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6824 CGF.EmitAutoVarCleanups(Emission); 6825 } 6826 } 6827 } 6828 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6829 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6830 ThreadLimitVal = 6831 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6832 } 6833 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6834 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6835 CS = Dir->getInnermostCapturedStmt(); 6836 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6837 CGF.getContext(), CS->getCapturedStmt()); 6838 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6839 } 6840 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6841 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6842 CS = Dir->getInnermostCapturedStmt(); 6843 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6844 return NumThreads; 6845 } 6846 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6847 return Bld.getInt32(1); 6848 } 6849 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6850 } 6851 case OMPD_target_teams: { 6852 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6853 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6854 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6855 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6856 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6857 ThreadLimitVal = 6858 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6859 } 6860 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6861 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6862 return NumThreads; 6863 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6864 CGF.getContext(), CS->getCapturedStmt()); 6865 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6866 if (Dir->getDirectiveKind() == OMPD_distribute) { 6867 CS = Dir->getInnermostCapturedStmt(); 6868 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6869 return NumThreads; 6870 } 6871 } 6872 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6873 } 6874 case OMPD_target_teams_distribute: 6875 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6876 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6877 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6878 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6879 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6880 ThreadLimitVal = 6881 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6882 } 6883 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6884 case OMPD_target_parallel: 6885 case OMPD_target_parallel_for: 6886 case OMPD_target_parallel_for_simd: 6887 case OMPD_target_teams_distribute_parallel_for: 6888 case OMPD_target_teams_distribute_parallel_for_simd: { 6889 llvm::Value *CondVal = nullptr; 6890 // Handle if clause. If if clause present, the number of threads is 6891 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6892 if (D.hasClausesOfKind<OMPIfClause>()) { 6893 const OMPIfClause *IfClause = nullptr; 6894 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6895 if (C->getNameModifier() == OMPD_unknown || 6896 C->getNameModifier() == OMPD_parallel) { 6897 IfClause = C; 6898 break; 6899 } 6900 } 6901 if (IfClause) { 6902 const Expr *Cond = IfClause->getCondition(); 6903 bool Result; 6904 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6905 if (!Result) 6906 return Bld.getInt32(1); 6907 } else { 6908 CodeGenFunction::RunCleanupsScope Scope(CGF); 6909 CondVal = CGF.EvaluateExprAsBool(Cond); 6910 } 6911 } 6912 } 6913 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6914 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6915 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6916 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6917 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6918 ThreadLimitVal = 6919 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6920 } 6921 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6922 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6923 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6924 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6925 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6926 NumThreadsVal = 6927 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6928 ThreadLimitVal = ThreadLimitVal 6929 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6930 ThreadLimitVal), 6931 NumThreadsVal, ThreadLimitVal) 6932 : NumThreadsVal; 6933 } 6934 if (!ThreadLimitVal) 6935 ThreadLimitVal = Bld.getInt32(0); 6936 if (CondVal) 6937 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6938 return ThreadLimitVal; 6939 } 6940 case OMPD_target_teams_distribute_simd: 6941 case OMPD_target_simd: 6942 return Bld.getInt32(1); 6943 case OMPD_parallel: 6944 case OMPD_for: 6945 case OMPD_parallel_for: 6946 case OMPD_parallel_master: 6947 case OMPD_parallel_sections: 6948 case OMPD_for_simd: 6949 case OMPD_parallel_for_simd: 6950 case OMPD_cancel: 6951 case OMPD_cancellation_point: 6952 case OMPD_ordered: 6953 case OMPD_threadprivate: 6954 case OMPD_allocate: 6955 case OMPD_task: 6956 case OMPD_simd: 6957 case OMPD_sections: 6958 case OMPD_section: 6959 case OMPD_single: 6960 case OMPD_master: 6961 case OMPD_critical: 6962 case OMPD_taskyield: 6963 case OMPD_barrier: 6964 case OMPD_taskwait: 6965 case OMPD_taskgroup: 6966 case OMPD_atomic: 6967 case OMPD_flush: 6968 case OMPD_depobj: 6969 case OMPD_scan: 6970 case OMPD_teams: 6971 case OMPD_target_data: 6972 case OMPD_target_exit_data: 6973 case OMPD_target_enter_data: 6974 case OMPD_distribute: 6975 case OMPD_distribute_simd: 6976 case OMPD_distribute_parallel_for: 6977 case OMPD_distribute_parallel_for_simd: 6978 case OMPD_teams_distribute: 6979 case OMPD_teams_distribute_simd: 6980 case OMPD_teams_distribute_parallel_for: 6981 case OMPD_teams_distribute_parallel_for_simd: 6982 case OMPD_target_update: 6983 case OMPD_declare_simd: 6984 case OMPD_declare_variant: 6985 case OMPD_begin_declare_variant: 6986 case OMPD_end_declare_variant: 6987 case OMPD_declare_target: 6988 case OMPD_end_declare_target: 6989 case OMPD_declare_reduction: 6990 case OMPD_declare_mapper: 6991 case OMPD_taskloop: 6992 case OMPD_taskloop_simd: 6993 case OMPD_master_taskloop: 6994 case OMPD_master_taskloop_simd: 6995 case OMPD_parallel_master_taskloop: 6996 case OMPD_parallel_master_taskloop_simd: 6997 case OMPD_requires: 6998 case OMPD_unknown: 6999 break; 7000 default: 7001 break; 7002 } 7003 llvm_unreachable("Unsupported directive kind."); 7004 } 7005 7006 namespace { 7007 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7008 7009 // Utility to handle information from clauses associated with a given 7010 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7011 // It provides a convenient interface to obtain the information and generate 7012 // code for that information. 7013 class MappableExprsHandler { 7014 public: 7015 /// Values for bit flags used to specify the mapping type for 7016 /// offloading. 7017 enum OpenMPOffloadMappingFlags : uint64_t { 7018 /// No flags 7019 OMP_MAP_NONE = 0x0, 7020 /// Allocate memory on the device and move data from host to device. 7021 OMP_MAP_TO = 0x01, 7022 /// Allocate memory on the device and move data from device to host. 7023 OMP_MAP_FROM = 0x02, 7024 /// Always perform the requested mapping action on the element, even 7025 /// if it was already mapped before. 7026 OMP_MAP_ALWAYS = 0x04, 7027 /// Delete the element from the device environment, ignoring the 7028 /// current reference count associated with the element. 7029 OMP_MAP_DELETE = 0x08, 7030 /// The element being mapped is a pointer-pointee pair; both the 7031 /// pointer and the pointee should be mapped. 7032 OMP_MAP_PTR_AND_OBJ = 0x10, 7033 /// This flags signals that the base address of an entry should be 7034 /// passed to the target kernel as an argument. 7035 OMP_MAP_TARGET_PARAM = 0x20, 7036 /// Signal that the runtime library has to return the device pointer 7037 /// in the current position for the data being mapped. Used when we have the 7038 /// use_device_ptr or use_device_addr clause. 7039 OMP_MAP_RETURN_PARAM = 0x40, 7040 /// This flag signals that the reference being passed is a pointer to 7041 /// private data. 7042 OMP_MAP_PRIVATE = 0x80, 7043 /// Pass the element to the device by value. 7044 OMP_MAP_LITERAL = 0x100, 7045 /// Implicit map 7046 OMP_MAP_IMPLICIT = 0x200, 7047 /// Close is a hint to the runtime to allocate memory close to 7048 /// the target device. 7049 OMP_MAP_CLOSE = 0x400, 7050 /// The 16 MSBs of the flags indicate whether the entry is member of some 7051 /// struct/class. 7052 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7053 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7054 }; 7055 7056 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7057 static unsigned getFlagMemberOffset() { 7058 unsigned Offset = 0; 7059 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7060 Remain = Remain >> 1) 7061 Offset++; 7062 return Offset; 7063 } 7064 7065 /// Class that associates information with a base pointer to be passed to the 7066 /// runtime library. 7067 class BasePointerInfo { 7068 /// The base pointer. 7069 llvm::Value *Ptr = nullptr; 7070 /// The base declaration that refers to this device pointer, or null if 7071 /// there is none. 7072 const ValueDecl *DevPtrDecl = nullptr; 7073 7074 public: 7075 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7076 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7077 llvm::Value *operator*() const { return Ptr; } 7078 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7079 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7080 }; 7081 7082 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7083 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7084 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7085 7086 /// Map between a struct and the its lowest & highest elements which have been 7087 /// mapped. 7088 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7089 /// HE(FieldIndex, Pointer)} 7090 struct StructRangeInfoTy { 7091 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7092 0, Address::invalid()}; 7093 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7094 0, Address::invalid()}; 7095 Address Base = Address::invalid(); 7096 }; 7097 7098 private: 7099 /// Kind that defines how a device pointer has to be returned. 7100 struct MapInfo { 7101 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7102 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7103 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7104 bool ReturnDevicePointer = false; 7105 bool IsImplicit = false; 7106 bool ForDeviceAddr = false; 7107 7108 MapInfo() = default; 7109 MapInfo( 7110 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7111 OpenMPMapClauseKind MapType, 7112 ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, 7113 bool IsImplicit, bool ForDeviceAddr = false) 7114 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7115 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7116 ForDeviceAddr(ForDeviceAddr) {} 7117 }; 7118 7119 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7120 /// member and there is no map information about it, then emission of that 7121 /// entry is deferred until the whole struct has been processed. 7122 struct DeferredDevicePtrEntryTy { 7123 const Expr *IE = nullptr; 7124 const ValueDecl *VD = nullptr; 7125 bool ForDeviceAddr = false; 7126 7127 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7128 bool ForDeviceAddr) 7129 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7130 }; 7131 7132 /// The target directive from where the mappable clauses were extracted. It 7133 /// is either a executable directive or a user-defined mapper directive. 7134 llvm::PointerUnion<const OMPExecutableDirective *, 7135 const OMPDeclareMapperDecl *> 7136 CurDir; 7137 7138 /// Function the directive is being generated for. 7139 CodeGenFunction &CGF; 7140 7141 /// Set of all first private variables in the current directive. 7142 /// bool data is set to true if the variable is implicitly marked as 7143 /// firstprivate, false otherwise. 7144 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7145 7146 /// Map between device pointer declarations and their expression components. 7147 /// The key value for declarations in 'this' is null. 7148 llvm::DenseMap< 7149 const ValueDecl *, 7150 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7151 DevPointersMap; 7152 7153 llvm::Value *getExprTypeSize(const Expr *E) const { 7154 QualType ExprTy = E->getType().getCanonicalType(); 7155 7156 // Calculate the size for array shaping expression. 7157 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7158 llvm::Value *Size = 7159 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7160 for (const Expr *SE : OAE->getDimensions()) { 7161 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7162 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7163 CGF.getContext().getSizeType(), 7164 SE->getExprLoc()); 7165 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7166 } 7167 return Size; 7168 } 7169 7170 // Reference types are ignored for mapping purposes. 7171 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7172 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7173 7174 // Given that an array section is considered a built-in type, we need to 7175 // do the calculation based on the length of the section instead of relying 7176 // on CGF.getTypeSize(E->getType()). 7177 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7178 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7179 OAE->getBase()->IgnoreParenImpCasts()) 7180 .getCanonicalType(); 7181 7182 // If there is no length associated with the expression and lower bound is 7183 // not specified too, that means we are using the whole length of the 7184 // base. 7185 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7186 !OAE->getLowerBound()) 7187 return CGF.getTypeSize(BaseTy); 7188 7189 llvm::Value *ElemSize; 7190 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7191 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7192 } else { 7193 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7194 assert(ATy && "Expecting array type if not a pointer type."); 7195 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7196 } 7197 7198 // If we don't have a length at this point, that is because we have an 7199 // array section with a single element. 7200 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7201 return ElemSize; 7202 7203 if (const Expr *LenExpr = OAE->getLength()) { 7204 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7205 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7206 CGF.getContext().getSizeType(), 7207 LenExpr->getExprLoc()); 7208 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7209 } 7210 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7211 OAE->getLowerBound() && "expected array_section[lb:]."); 7212 // Size = sizetype - lb * elemtype; 7213 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7214 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7215 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7216 CGF.getContext().getSizeType(), 7217 OAE->getLowerBound()->getExprLoc()); 7218 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7219 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7220 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7221 LengthVal = CGF.Builder.CreateSelect( 7222 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7223 return LengthVal; 7224 } 7225 return CGF.getTypeSize(ExprTy); 7226 } 7227 7228 /// Return the corresponding bits for a given map clause modifier. Add 7229 /// a flag marking the map as a pointer if requested. Add a flag marking the 7230 /// map as the first one of a series of maps that relate to the same map 7231 /// expression. 7232 OpenMPOffloadMappingFlags getMapTypeBits( 7233 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7234 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7235 OpenMPOffloadMappingFlags Bits = 7236 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7237 switch (MapType) { 7238 case OMPC_MAP_alloc: 7239 case OMPC_MAP_release: 7240 // alloc and release is the default behavior in the runtime library, i.e. 7241 // if we don't pass any bits alloc/release that is what the runtime is 7242 // going to do. Therefore, we don't need to signal anything for these two 7243 // type modifiers. 7244 break; 7245 case OMPC_MAP_to: 7246 Bits |= OMP_MAP_TO; 7247 break; 7248 case OMPC_MAP_from: 7249 Bits |= OMP_MAP_FROM; 7250 break; 7251 case OMPC_MAP_tofrom: 7252 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7253 break; 7254 case OMPC_MAP_delete: 7255 Bits |= OMP_MAP_DELETE; 7256 break; 7257 case OMPC_MAP_unknown: 7258 llvm_unreachable("Unexpected map type!"); 7259 } 7260 if (AddPtrFlag) 7261 Bits |= OMP_MAP_PTR_AND_OBJ; 7262 if (AddIsTargetParamFlag) 7263 Bits |= OMP_MAP_TARGET_PARAM; 7264 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7265 != MapModifiers.end()) 7266 Bits |= OMP_MAP_ALWAYS; 7267 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7268 != MapModifiers.end()) 7269 Bits |= OMP_MAP_CLOSE; 7270 return Bits; 7271 } 7272 7273 /// Return true if the provided expression is a final array section. A 7274 /// final array section, is one whose length can't be proved to be one. 7275 bool isFinalArraySectionExpression(const Expr *E) const { 7276 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7277 7278 // It is not an array section and therefore not a unity-size one. 7279 if (!OASE) 7280 return false; 7281 7282 // An array section with no colon always refer to a single element. 7283 if (OASE->getColonLoc().isInvalid()) 7284 return false; 7285 7286 const Expr *Length = OASE->getLength(); 7287 7288 // If we don't have a length we have to check if the array has size 1 7289 // for this dimension. Also, we should always expect a length if the 7290 // base type is pointer. 7291 if (!Length) { 7292 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7293 OASE->getBase()->IgnoreParenImpCasts()) 7294 .getCanonicalType(); 7295 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7296 return ATy->getSize().getSExtValue() != 1; 7297 // If we don't have a constant dimension length, we have to consider 7298 // the current section as having any size, so it is not necessarily 7299 // unitary. If it happen to be unity size, that's user fault. 7300 return true; 7301 } 7302 7303 // Check if the length evaluates to 1. 7304 Expr::EvalResult Result; 7305 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7306 return true; // Can have more that size 1. 7307 7308 llvm::APSInt ConstLength = Result.Val.getInt(); 7309 return ConstLength.getSExtValue() != 1; 7310 } 7311 7312 /// Generate the base pointers, section pointers, sizes and map type 7313 /// bits for the provided map type, map modifier, and expression components. 7314 /// \a IsFirstComponent should be set to true if the provided set of 7315 /// components is the first associated with a capture. 7316 void generateInfoForComponentList( 7317 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7318 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7319 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7320 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7321 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7322 bool IsImplicit, bool ForDeviceAddr = false, 7323 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7324 OverlappedElements = llvm::None) const { 7325 // The following summarizes what has to be generated for each map and the 7326 // types below. The generated information is expressed in this order: 7327 // base pointer, section pointer, size, flags 7328 // (to add to the ones that come from the map type and modifier). 7329 // 7330 // double d; 7331 // int i[100]; 7332 // float *p; 7333 // 7334 // struct S1 { 7335 // int i; 7336 // float f[50]; 7337 // } 7338 // struct S2 { 7339 // int i; 7340 // float f[50]; 7341 // S1 s; 7342 // double *p; 7343 // struct S2 *ps; 7344 // } 7345 // S2 s; 7346 // S2 *ps; 7347 // 7348 // map(d) 7349 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7350 // 7351 // map(i) 7352 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7353 // 7354 // map(i[1:23]) 7355 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7356 // 7357 // map(p) 7358 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7359 // 7360 // map(p[1:24]) 7361 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7362 // 7363 // map(s) 7364 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7365 // 7366 // map(s.i) 7367 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7368 // 7369 // map(s.s.f) 7370 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7371 // 7372 // map(s.p) 7373 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7374 // 7375 // map(to: s.p[:22]) 7376 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7377 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7378 // &(s.p), &(s.p[0]), 22*sizeof(double), 7379 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7380 // (*) alloc space for struct members, only this is a target parameter 7381 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7382 // optimizes this entry out, same in the examples below) 7383 // (***) map the pointee (map: to) 7384 // 7385 // map(s.ps) 7386 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7387 // 7388 // map(from: s.ps->s.i) 7389 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7390 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7391 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7392 // 7393 // map(to: s.ps->ps) 7394 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7395 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7396 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7397 // 7398 // map(s.ps->ps->ps) 7399 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7400 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7401 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7402 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7403 // 7404 // map(to: s.ps->ps->s.f[:22]) 7405 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7406 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7407 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7408 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7409 // 7410 // map(ps) 7411 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7412 // 7413 // map(ps->i) 7414 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7415 // 7416 // map(ps->s.f) 7417 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7418 // 7419 // map(from: ps->p) 7420 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7421 // 7422 // map(to: ps->p[:22]) 7423 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7424 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7425 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7426 // 7427 // map(ps->ps) 7428 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7429 // 7430 // map(from: ps->ps->s.i) 7431 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7432 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7433 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7434 // 7435 // map(from: ps->ps->ps) 7436 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7437 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7438 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7439 // 7440 // map(ps->ps->ps->ps) 7441 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7442 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7443 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7444 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7445 // 7446 // map(to: ps->ps->ps->s.f[:22]) 7447 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7448 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7449 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7450 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7451 // 7452 // map(to: s.f[:22]) map(from: s.p[:33]) 7453 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7454 // sizeof(double*) (**), TARGET_PARAM 7455 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7456 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7457 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7458 // (*) allocate contiguous space needed to fit all mapped members even if 7459 // we allocate space for members not mapped (in this example, 7460 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7461 // them as well because they fall between &s.f[0] and &s.p) 7462 // 7463 // map(from: s.f[:22]) map(to: ps->p[:33]) 7464 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7465 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7466 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7467 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7468 // (*) the struct this entry pertains to is the 2nd element in the list of 7469 // arguments, hence MEMBER_OF(2) 7470 // 7471 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7472 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7473 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7474 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7475 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7476 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7477 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7478 // (*) the struct this entry pertains to is the 4th element in the list 7479 // of arguments, hence MEMBER_OF(4) 7480 7481 // Track if the map information being generated is the first for a capture. 7482 bool IsCaptureFirstInfo = IsFirstComponentList; 7483 // When the variable is on a declare target link or in a to clause with 7484 // unified memory, a reference is needed to hold the host/device address 7485 // of the variable. 7486 bool RequiresReference = false; 7487 7488 // Scan the components from the base to the complete expression. 7489 auto CI = Components.rbegin(); 7490 auto CE = Components.rend(); 7491 auto I = CI; 7492 7493 // Track if the map information being generated is the first for a list of 7494 // components. 7495 bool IsExpressionFirstInfo = true; 7496 Address BP = Address::invalid(); 7497 const Expr *AssocExpr = I->getAssociatedExpression(); 7498 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7499 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7500 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7501 7502 if (isa<MemberExpr>(AssocExpr)) { 7503 // The base is the 'this' pointer. The content of the pointer is going 7504 // to be the base of the field being mapped. 7505 BP = CGF.LoadCXXThisAddress(); 7506 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7507 (OASE && 7508 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7509 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7510 } else if (OAShE && 7511 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7512 BP = Address( 7513 CGF.EmitScalarExpr(OAShE->getBase()), 7514 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7515 } else { 7516 // The base is the reference to the variable. 7517 // BP = &Var. 7518 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7519 if (const auto *VD = 7520 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7521 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7523 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7524 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7525 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7526 RequiresReference = true; 7527 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7528 } 7529 } 7530 } 7531 7532 // If the variable is a pointer and is being dereferenced (i.e. is not 7533 // the last component), the base has to be the pointer itself, not its 7534 // reference. References are ignored for mapping purposes. 7535 QualType Ty = 7536 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7537 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7538 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7539 7540 // We do not need to generate individual map information for the 7541 // pointer, it can be associated with the combined storage. 7542 ++I; 7543 } 7544 } 7545 7546 // Track whether a component of the list should be marked as MEMBER_OF some 7547 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7548 // in a component list should be marked as MEMBER_OF, all subsequent entries 7549 // do not belong to the base struct. E.g. 7550 // struct S2 s; 7551 // s.ps->ps->ps->f[:] 7552 // (1) (2) (3) (4) 7553 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7554 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7555 // is the pointee of ps(2) which is not member of struct s, so it should not 7556 // be marked as such (it is still PTR_AND_OBJ). 7557 // The variable is initialized to false so that PTR_AND_OBJ entries which 7558 // are not struct members are not considered (e.g. array of pointers to 7559 // data). 7560 bool ShouldBeMemberOf = false; 7561 7562 // Variable keeping track of whether or not we have encountered a component 7563 // in the component list which is a member expression. Useful when we have a 7564 // pointer or a final array section, in which case it is the previous 7565 // component in the list which tells us whether we have a member expression. 7566 // E.g. X.f[:] 7567 // While processing the final array section "[:]" it is "f" which tells us 7568 // whether we are dealing with a member of a declared struct. 7569 const MemberExpr *EncounteredME = nullptr; 7570 7571 for (; I != CE; ++I) { 7572 // If the current component is member of a struct (parent struct) mark it. 7573 if (!EncounteredME) { 7574 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7575 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7576 // as MEMBER_OF the parent struct. 7577 if (EncounteredME) 7578 ShouldBeMemberOf = true; 7579 } 7580 7581 auto Next = std::next(I); 7582 7583 // We need to generate the addresses and sizes if this is the last 7584 // component, if the component is a pointer or if it is an array section 7585 // whose length can't be proved to be one. If this is a pointer, it 7586 // becomes the base address for the following components. 7587 7588 // A final array section, is one whose length can't be proved to be one. 7589 bool IsFinalArraySection = 7590 isFinalArraySectionExpression(I->getAssociatedExpression()); 7591 7592 // Get information on whether the element is a pointer. Have to do a 7593 // special treatment for array sections given that they are built-in 7594 // types. 7595 const auto *OASE = 7596 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7597 const auto *OAShE = 7598 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7599 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7600 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7601 bool IsPointer = 7602 OAShE || 7603 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7604 .getCanonicalType() 7605 ->isAnyPointerType()) || 7606 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7607 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7608 7609 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7610 // If this is not the last component, we expect the pointer to be 7611 // associated with an array expression or member expression. 7612 assert((Next == CE || 7613 isa<MemberExpr>(Next->getAssociatedExpression()) || 7614 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7615 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7616 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7617 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7618 "Unexpected expression"); 7619 7620 Address LB = Address::invalid(); 7621 if (OAShE) { 7622 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7623 CGF.getContext().getTypeAlignInChars( 7624 OAShE->getBase()->getType())); 7625 } else { 7626 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7627 .getAddress(CGF); 7628 } 7629 7630 // If this component is a pointer inside the base struct then we don't 7631 // need to create any entry for it - it will be combined with the object 7632 // it is pointing to into a single PTR_AND_OBJ entry. 7633 bool IsMemberPointerOrAddr = 7634 (IsPointer || ForDeviceAddr) && EncounteredME && 7635 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7636 EncounteredME); 7637 if (!OverlappedElements.empty()) { 7638 // Handle base element with the info for overlapped elements. 7639 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7640 assert(Next == CE && 7641 "Expected last element for the overlapped elements."); 7642 assert(!IsPointer && 7643 "Unexpected base element with the pointer type."); 7644 // Mark the whole struct as the struct that requires allocation on the 7645 // device. 7646 PartialStruct.LowestElem = {0, LB}; 7647 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7648 I->getAssociatedExpression()->getType()); 7649 Address HB = CGF.Builder.CreateConstGEP( 7650 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7651 CGF.VoidPtrTy), 7652 TypeSize.getQuantity() - 1); 7653 PartialStruct.HighestElem = { 7654 std::numeric_limits<decltype( 7655 PartialStruct.HighestElem.first)>::max(), 7656 HB}; 7657 PartialStruct.Base = BP; 7658 // Emit data for non-overlapped data. 7659 OpenMPOffloadMappingFlags Flags = 7660 OMP_MAP_MEMBER_OF | 7661 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7662 /*AddPtrFlag=*/false, 7663 /*AddIsTargetParamFlag=*/false); 7664 LB = BP; 7665 llvm::Value *Size = nullptr; 7666 // Do bitcopy of all non-overlapped structure elements. 7667 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7668 Component : OverlappedElements) { 7669 Address ComponentLB = Address::invalid(); 7670 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7671 Component) { 7672 if (MC.getAssociatedDeclaration()) { 7673 ComponentLB = 7674 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7675 .getAddress(CGF); 7676 Size = CGF.Builder.CreatePtrDiff( 7677 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7678 CGF.EmitCastToVoidPtr(LB.getPointer())); 7679 break; 7680 } 7681 } 7682 BasePointers.push_back(BP.getPointer()); 7683 Pointers.push_back(LB.getPointer()); 7684 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7685 /*isSigned=*/true)); 7686 Types.push_back(Flags); 7687 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7688 } 7689 BasePointers.push_back(BP.getPointer()); 7690 Pointers.push_back(LB.getPointer()); 7691 Size = CGF.Builder.CreatePtrDiff( 7692 CGF.EmitCastToVoidPtr( 7693 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7694 CGF.EmitCastToVoidPtr(LB.getPointer())); 7695 Sizes.push_back( 7696 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7697 Types.push_back(Flags); 7698 break; 7699 } 7700 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7701 if (!IsMemberPointerOrAddr) { 7702 BasePointers.push_back(BP.getPointer()); 7703 Pointers.push_back(LB.getPointer()); 7704 Sizes.push_back( 7705 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7706 7707 // We need to add a pointer flag for each map that comes from the 7708 // same expression except for the first one. We also need to signal 7709 // this map is the first one that relates with the current capture 7710 // (there is a set of entries for each capture). 7711 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7712 MapType, MapModifiers, IsImplicit, 7713 !IsExpressionFirstInfo || RequiresReference, 7714 IsCaptureFirstInfo && !RequiresReference); 7715 7716 if (!IsExpressionFirstInfo) { 7717 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7718 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7719 if (IsPointer) 7720 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7721 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7722 7723 if (ShouldBeMemberOf) { 7724 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7725 // should be later updated with the correct value of MEMBER_OF. 7726 Flags |= OMP_MAP_MEMBER_OF; 7727 // From now on, all subsequent PTR_AND_OBJ entries should not be 7728 // marked as MEMBER_OF. 7729 ShouldBeMemberOf = false; 7730 } 7731 } 7732 7733 Types.push_back(Flags); 7734 } 7735 7736 // If we have encountered a member expression so far, keep track of the 7737 // mapped member. If the parent is "*this", then the value declaration 7738 // is nullptr. 7739 if (EncounteredME) { 7740 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7741 unsigned FieldIndex = FD->getFieldIndex(); 7742 7743 // Update info about the lowest and highest elements for this struct 7744 if (!PartialStruct.Base.isValid()) { 7745 PartialStruct.LowestElem = {FieldIndex, LB}; 7746 if (IsFinalArraySection) { 7747 Address HB = 7748 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7749 .getAddress(CGF); 7750 PartialStruct.HighestElem = {FieldIndex, HB}; 7751 } else { 7752 PartialStruct.HighestElem = {FieldIndex, LB}; 7753 } 7754 PartialStruct.Base = BP; 7755 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7756 PartialStruct.LowestElem = {FieldIndex, LB}; 7757 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7758 PartialStruct.HighestElem = {FieldIndex, LB}; 7759 } 7760 } 7761 7762 // If we have a final array section, we are done with this expression. 7763 if (IsFinalArraySection) 7764 break; 7765 7766 // The pointer becomes the base for the next element. 7767 if (Next != CE) 7768 BP = LB; 7769 7770 IsExpressionFirstInfo = false; 7771 IsCaptureFirstInfo = false; 7772 } 7773 } 7774 } 7775 7776 /// Return the adjusted map modifiers if the declaration a capture refers to 7777 /// appears in a first-private clause. This is expected to be used only with 7778 /// directives that start with 'target'. 7779 MappableExprsHandler::OpenMPOffloadMappingFlags 7780 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7781 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7782 7783 // A first private variable captured by reference will use only the 7784 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7785 // declaration is known as first-private in this handler. 7786 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7787 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7788 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7789 return MappableExprsHandler::OMP_MAP_ALWAYS | 7790 MappableExprsHandler::OMP_MAP_TO; 7791 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7792 return MappableExprsHandler::OMP_MAP_TO | 7793 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7794 return MappableExprsHandler::OMP_MAP_PRIVATE | 7795 MappableExprsHandler::OMP_MAP_TO; 7796 } 7797 return MappableExprsHandler::OMP_MAP_TO | 7798 MappableExprsHandler::OMP_MAP_FROM; 7799 } 7800 7801 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7802 // Rotate by getFlagMemberOffset() bits. 7803 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7804 << getFlagMemberOffset()); 7805 } 7806 7807 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7808 OpenMPOffloadMappingFlags MemberOfFlag) { 7809 // If the entry is PTR_AND_OBJ but has not been marked with the special 7810 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7811 // marked as MEMBER_OF. 7812 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7813 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7814 return; 7815 7816 // Reset the placeholder value to prepare the flag for the assignment of the 7817 // proper MEMBER_OF value. 7818 Flags &= ~OMP_MAP_MEMBER_OF; 7819 Flags |= MemberOfFlag; 7820 } 7821 7822 void getPlainLayout(const CXXRecordDecl *RD, 7823 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7824 bool AsBase) const { 7825 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7826 7827 llvm::StructType *St = 7828 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7829 7830 unsigned NumElements = St->getNumElements(); 7831 llvm::SmallVector< 7832 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7833 RecordLayout(NumElements); 7834 7835 // Fill bases. 7836 for (const auto &I : RD->bases()) { 7837 if (I.isVirtual()) 7838 continue; 7839 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7840 // Ignore empty bases. 7841 if (Base->isEmpty() || CGF.getContext() 7842 .getASTRecordLayout(Base) 7843 .getNonVirtualSize() 7844 .isZero()) 7845 continue; 7846 7847 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7848 RecordLayout[FieldIndex] = Base; 7849 } 7850 // Fill in virtual bases. 7851 for (const auto &I : RD->vbases()) { 7852 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7853 // Ignore empty bases. 7854 if (Base->isEmpty()) 7855 continue; 7856 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7857 if (RecordLayout[FieldIndex]) 7858 continue; 7859 RecordLayout[FieldIndex] = Base; 7860 } 7861 // Fill in all the fields. 7862 assert(!RD->isUnion() && "Unexpected union."); 7863 for (const auto *Field : RD->fields()) { 7864 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7865 // will fill in later.) 7866 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7867 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7868 RecordLayout[FieldIndex] = Field; 7869 } 7870 } 7871 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7872 &Data : RecordLayout) { 7873 if (Data.isNull()) 7874 continue; 7875 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7876 getPlainLayout(Base, Layout, /*AsBase=*/true); 7877 else 7878 Layout.push_back(Data.get<const FieldDecl *>()); 7879 } 7880 } 7881 7882 public: 7883 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7884 : CurDir(&Dir), CGF(CGF) { 7885 // Extract firstprivate clause information. 7886 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7887 for (const auto *D : C->varlists()) 7888 FirstPrivateDecls.try_emplace( 7889 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7890 // Extract implicit firstprivates from uses_allocators clauses. 7891 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7892 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7893 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7894 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7895 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7896 /*Implicit=*/true); 7897 else if (const auto *VD = dyn_cast<VarDecl>( 7898 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7899 ->getDecl())) 7900 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7901 } 7902 } 7903 // Extract device pointer clause information. 7904 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7905 for (auto L : C->component_lists()) 7906 DevPointersMap[L.first].push_back(L.second); 7907 } 7908 7909 /// Constructor for the declare mapper directive. 7910 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7911 : CurDir(&Dir), CGF(CGF) {} 7912 7913 /// Generate code for the combined entry if we have a partially mapped struct 7914 /// and take care of the mapping flags of the arguments corresponding to 7915 /// individual struct members. 7916 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7917 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7918 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7919 const StructRangeInfoTy &PartialStruct) const { 7920 // Base is the base of the struct 7921 BasePointers.push_back(PartialStruct.Base.getPointer()); 7922 // Pointer is the address of the lowest element 7923 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7924 Pointers.push_back(LB); 7925 // Size is (addr of {highest+1} element) - (addr of lowest element) 7926 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7927 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7928 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7929 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7930 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7931 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7932 /*isSigned=*/false); 7933 Sizes.push_back(Size); 7934 // Map type is always TARGET_PARAM 7935 Types.push_back(OMP_MAP_TARGET_PARAM); 7936 // Remove TARGET_PARAM flag from the first element 7937 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7938 7939 // All other current entries will be MEMBER_OF the combined entry 7940 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7941 // 0xFFFF in the MEMBER_OF field). 7942 OpenMPOffloadMappingFlags MemberOfFlag = 7943 getMemberOfFlag(BasePointers.size() - 1); 7944 for (auto &M : CurTypes) 7945 setCorrectMemberOfFlag(M, MemberOfFlag); 7946 } 7947 7948 /// Generate all the base pointers, section pointers, sizes and map 7949 /// types for the extracted mappable expressions. Also, for each item that 7950 /// relates with a device pointer, a pair of the relevant declaration and 7951 /// index where it occurs is appended to the device pointers info array. 7952 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7953 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7954 MapFlagsArrayTy &Types) const { 7955 // We have to process the component lists that relate with the same 7956 // declaration in a single chunk so that we can generate the map flags 7957 // correctly. Therefore, we organize all lists in a map. 7958 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7959 7960 // Helper function to fill the information map for the different supported 7961 // clauses. 7962 auto &&InfoGen = 7963 [&Info](const ValueDecl *D, 7964 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7965 OpenMPMapClauseKind MapType, 7966 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7967 bool ReturnDevicePointer, bool IsImplicit, 7968 bool ForDeviceAddr = false) { 7969 const ValueDecl *VD = 7970 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7971 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7972 IsImplicit, ForDeviceAddr); 7973 }; 7974 7975 assert(CurDir.is<const OMPExecutableDirective *>() && 7976 "Expect a executable directive"); 7977 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7978 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7979 for (const auto L : C->component_lists()) { 7980 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7981 /*ReturnDevicePointer=*/false, C->isImplicit()); 7982 } 7983 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7984 for (const auto L : C->component_lists()) { 7985 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7986 /*ReturnDevicePointer=*/false, C->isImplicit()); 7987 } 7988 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7989 for (const auto L : C->component_lists()) { 7990 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7991 /*ReturnDevicePointer=*/false, C->isImplicit()); 7992 } 7993 7994 // Look at the use_device_ptr clause information and mark the existing map 7995 // entries as such. If there is no map information for an entry in the 7996 // use_device_ptr list, we create one with map type 'alloc' and zero size 7997 // section. It is the user fault if that was not mapped before. If there is 7998 // no map information and the pointer is a struct member, then we defer the 7999 // emission of that entry until the whole struct has been processed. 8000 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8001 DeferredInfo; 8002 8003 for (const auto *C : 8004 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8005 for (const auto L : C->component_lists()) { 8006 assert(!L.second.empty() && "Not expecting empty list of components!"); 8007 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8008 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8009 const Expr *IE = L.second.back().getAssociatedExpression(); 8010 // If the first component is a member expression, we have to look into 8011 // 'this', which maps to null in the map of map information. Otherwise 8012 // look directly for the information. 8013 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8014 8015 // We potentially have map information for this declaration already. 8016 // Look for the first set of components that refer to it. 8017 if (It != Info.end()) { 8018 auto CI = std::find_if( 8019 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8020 return MI.Components.back().getAssociatedDeclaration() == VD; 8021 }); 8022 // If we found a map entry, signal that the pointer has to be returned 8023 // and move on to the next declaration. 8024 if (CI != It->second.end()) { 8025 CI->ReturnDevicePointer = true; 8026 continue; 8027 } 8028 } 8029 8030 // We didn't find any match in our map information - generate a zero 8031 // size array section - if the pointer is a struct member we defer this 8032 // action until the whole struct has been processed. 8033 if (isa<MemberExpr>(IE)) { 8034 // Insert the pointer into Info to be processed by 8035 // generateInfoForComponentList. Because it is a member pointer 8036 // without a pointee, no entry will be generated for it, therefore 8037 // we need to generate one after the whole struct has been processed. 8038 // Nonetheless, generateInfoForComponentList must be called to take 8039 // the pointer into account for the calculation of the range of the 8040 // partial struct. 8041 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8042 /*ReturnDevicePointer=*/false, C->isImplicit()); 8043 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8044 } else { 8045 llvm::Value *Ptr = 8046 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8047 BasePointers.emplace_back(Ptr, VD); 8048 Pointers.push_back(Ptr); 8049 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8050 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8051 } 8052 } 8053 } 8054 8055 // Look at the use_device_addr clause information and mark the existing map 8056 // entries as such. If there is no map information for an entry in the 8057 // use_device_addr list, we create one with map type 'alloc' and zero size 8058 // section. It is the user fault if that was not mapped before. If there is 8059 // no map information and the pointer is a struct member, then we defer the 8060 // emission of that entry until the whole struct has been processed. 8061 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8062 for (const auto *C : 8063 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8064 for (const auto L : C->component_lists()) { 8065 assert(!L.second.empty() && "Not expecting empty list of components!"); 8066 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8067 if (!Processed.insert(VD).second) 8068 continue; 8069 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8070 const Expr *IE = L.second.back().getAssociatedExpression(); 8071 // If the first component is a member expression, we have to look into 8072 // 'this', which maps to null in the map of map information. Otherwise 8073 // look directly for the information. 8074 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8075 8076 // We potentially have map information for this declaration already. 8077 // Look for the first set of components that refer to it. 8078 if (It != Info.end()) { 8079 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8080 return MI.Components.back().getAssociatedDeclaration() == VD; 8081 }); 8082 // If we found a map entry, signal that the pointer has to be returned 8083 // and move on to the next declaration. 8084 if (CI != It->second.end()) { 8085 CI->ReturnDevicePointer = true; 8086 continue; 8087 } 8088 } 8089 8090 // We didn't find any match in our map information - generate a zero 8091 // size array section - if the pointer is a struct member we defer this 8092 // action until the whole struct has been processed. 8093 if (isa<MemberExpr>(IE)) { 8094 // Insert the pointer into Info to be processed by 8095 // generateInfoForComponentList. Because it is a member pointer 8096 // without a pointee, no entry will be generated for it, therefore 8097 // we need to generate one after the whole struct has been processed. 8098 // Nonetheless, generateInfoForComponentList must be called to take 8099 // the pointer into account for the calculation of the range of the 8100 // partial struct. 8101 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8102 /*ReturnDevicePointer=*/false, C->isImplicit(), 8103 /*ForDeviceAddr=*/true); 8104 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8105 } else { 8106 llvm::Value *Ptr; 8107 if (IE->isGLValue()) 8108 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8109 else 8110 Ptr = CGF.EmitScalarExpr(IE); 8111 BasePointers.emplace_back(Ptr, VD); 8112 Pointers.push_back(Ptr); 8113 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8114 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8115 } 8116 } 8117 } 8118 8119 for (const auto &M : Info) { 8120 // We need to know when we generate information for the first component 8121 // associated with a capture, because the mapping flags depend on it. 8122 bool IsFirstComponentList = true; 8123 8124 // Temporary versions of arrays 8125 MapBaseValuesArrayTy CurBasePointers; 8126 MapValuesArrayTy CurPointers; 8127 MapValuesArrayTy CurSizes; 8128 MapFlagsArrayTy CurTypes; 8129 StructRangeInfoTy PartialStruct; 8130 8131 for (const MapInfo &L : M.second) { 8132 assert(!L.Components.empty() && 8133 "Not expecting declaration with no component lists."); 8134 8135 // Remember the current base pointer index. 8136 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8137 generateInfoForComponentList( 8138 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8139 CurPointers, CurSizes, CurTypes, PartialStruct, 8140 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); 8141 8142 // If this entry relates with a device pointer, set the relevant 8143 // declaration and add the 'return pointer' flag. 8144 if (L.ReturnDevicePointer) { 8145 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8146 "Unexpected number of mapped base pointers."); 8147 8148 const ValueDecl *RelevantVD = 8149 L.Components.back().getAssociatedDeclaration(); 8150 assert(RelevantVD && 8151 "No relevant declaration related with device pointer??"); 8152 8153 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8154 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8155 } 8156 IsFirstComponentList = false; 8157 } 8158 8159 // Append any pending zero-length pointers which are struct members and 8160 // used with use_device_ptr or use_device_addr. 8161 auto CI = DeferredInfo.find(M.first); 8162 if (CI != DeferredInfo.end()) { 8163 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8164 llvm::Value *BasePtr; 8165 llvm::Value *Ptr; 8166 if (L.ForDeviceAddr) { 8167 if (L.IE->isGLValue()) 8168 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8169 else 8170 Ptr = this->CGF.EmitScalarExpr(L.IE); 8171 BasePtr = Ptr; 8172 // Entry is RETURN_PARAM. Also, set the placeholder value 8173 // MEMBER_OF=FFFF so that the entry is later updated with the 8174 // correct value of MEMBER_OF. 8175 CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8176 } else { 8177 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8178 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8179 L.IE->getExprLoc()); 8180 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8181 // value MEMBER_OF=FFFF so that the entry is later updated with the 8182 // correct value of MEMBER_OF. 8183 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8184 OMP_MAP_MEMBER_OF); 8185 } 8186 CurBasePointers.emplace_back(BasePtr, L.VD); 8187 CurPointers.push_back(Ptr); 8188 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8189 } 8190 } 8191 8192 // If there is an entry in PartialStruct it means we have a struct with 8193 // individual members mapped. Emit an extra combined entry. 8194 if (PartialStruct.Base.isValid()) 8195 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8196 PartialStruct); 8197 8198 // We need to append the results of this capture to what we already have. 8199 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8200 Pointers.append(CurPointers.begin(), CurPointers.end()); 8201 Sizes.append(CurSizes.begin(), CurSizes.end()); 8202 Types.append(CurTypes.begin(), CurTypes.end()); 8203 } 8204 } 8205 8206 /// Generate all the base pointers, section pointers, sizes and map types for 8207 /// the extracted map clauses of user-defined mapper. 8208 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8209 MapValuesArrayTy &Pointers, 8210 MapValuesArrayTy &Sizes, 8211 MapFlagsArrayTy &Types) const { 8212 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8213 "Expect a declare mapper directive"); 8214 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8215 // We have to process the component lists that relate with the same 8216 // declaration in a single chunk so that we can generate the map flags 8217 // correctly. Therefore, we organize all lists in a map. 8218 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8219 8220 // Helper function to fill the information map for the different supported 8221 // clauses. 8222 auto &&InfoGen = [&Info]( 8223 const ValueDecl *D, 8224 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8225 OpenMPMapClauseKind MapType, 8226 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8227 bool ReturnDevicePointer, bool IsImplicit) { 8228 const ValueDecl *VD = 8229 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8230 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8231 IsImplicit); 8232 }; 8233 8234 for (const auto *C : CurMapperDir->clauselists()) { 8235 const auto *MC = cast<OMPMapClause>(C); 8236 for (const auto L : MC->component_lists()) { 8237 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8238 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8239 } 8240 } 8241 8242 for (const auto &M : Info) { 8243 // We need to know when we generate information for the first component 8244 // associated with a capture, because the mapping flags depend on it. 8245 bool IsFirstComponentList = true; 8246 8247 // Temporary versions of arrays 8248 MapBaseValuesArrayTy CurBasePointers; 8249 MapValuesArrayTy CurPointers; 8250 MapValuesArrayTy CurSizes; 8251 MapFlagsArrayTy CurTypes; 8252 StructRangeInfoTy PartialStruct; 8253 8254 for (const MapInfo &L : M.second) { 8255 assert(!L.Components.empty() && 8256 "Not expecting declaration with no component lists."); 8257 generateInfoForComponentList( 8258 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8259 CurPointers, CurSizes, CurTypes, PartialStruct, 8260 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); 8261 IsFirstComponentList = false; 8262 } 8263 8264 // If there is an entry in PartialStruct it means we have a struct with 8265 // individual members mapped. Emit an extra combined entry. 8266 if (PartialStruct.Base.isValid()) 8267 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8268 PartialStruct); 8269 8270 // We need to append the results of this capture to what we already have. 8271 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8272 Pointers.append(CurPointers.begin(), CurPointers.end()); 8273 Sizes.append(CurSizes.begin(), CurSizes.end()); 8274 Types.append(CurTypes.begin(), CurTypes.end()); 8275 } 8276 } 8277 8278 /// Emit capture info for lambdas for variables captured by reference. 8279 void generateInfoForLambdaCaptures( 8280 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8281 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8282 MapFlagsArrayTy &Types, 8283 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8284 const auto *RD = VD->getType() 8285 .getCanonicalType() 8286 .getNonReferenceType() 8287 ->getAsCXXRecordDecl(); 8288 if (!RD || !RD->isLambda()) 8289 return; 8290 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8291 LValue VDLVal = CGF.MakeAddrLValue( 8292 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8293 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8294 FieldDecl *ThisCapture = nullptr; 8295 RD->getCaptureFields(Captures, ThisCapture); 8296 if (ThisCapture) { 8297 LValue ThisLVal = 8298 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8299 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8300 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8301 VDLVal.getPointer(CGF)); 8302 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8303 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8304 Sizes.push_back( 8305 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8306 CGF.Int64Ty, /*isSigned=*/true)); 8307 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8308 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8309 } 8310 for (const LambdaCapture &LC : RD->captures()) { 8311 if (!LC.capturesVariable()) 8312 continue; 8313 const VarDecl *VD = LC.getCapturedVar(); 8314 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8315 continue; 8316 auto It = Captures.find(VD); 8317 assert(It != Captures.end() && "Found lambda capture without field."); 8318 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8319 if (LC.getCaptureKind() == LCK_ByRef) { 8320 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8321 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8322 VDLVal.getPointer(CGF)); 8323 BasePointers.push_back(VarLVal.getPointer(CGF)); 8324 Pointers.push_back(VarLValVal.getPointer(CGF)); 8325 Sizes.push_back(CGF.Builder.CreateIntCast( 8326 CGF.getTypeSize( 8327 VD->getType().getCanonicalType().getNonReferenceType()), 8328 CGF.Int64Ty, /*isSigned=*/true)); 8329 } else { 8330 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8331 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8332 VDLVal.getPointer(CGF)); 8333 BasePointers.push_back(VarLVal.getPointer(CGF)); 8334 Pointers.push_back(VarRVal.getScalarVal()); 8335 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8336 } 8337 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8338 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8339 } 8340 } 8341 8342 /// Set correct indices for lambdas captures. 8343 void adjustMemberOfForLambdaCaptures( 8344 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8345 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8346 MapFlagsArrayTy &Types) const { 8347 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8348 // Set correct member_of idx for all implicit lambda captures. 8349 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8350 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8351 continue; 8352 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8353 assert(BasePtr && "Unable to find base lambda address."); 8354 int TgtIdx = -1; 8355 for (unsigned J = I; J > 0; --J) { 8356 unsigned Idx = J - 1; 8357 if (Pointers[Idx] != BasePtr) 8358 continue; 8359 TgtIdx = Idx; 8360 break; 8361 } 8362 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8363 // All other current entries will be MEMBER_OF the combined entry 8364 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8365 // 0xFFFF in the MEMBER_OF field). 8366 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8367 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8368 } 8369 } 8370 8371 /// Generate the base pointers, section pointers, sizes and map types 8372 /// associated to a given capture. 8373 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8374 llvm::Value *Arg, 8375 MapBaseValuesArrayTy &BasePointers, 8376 MapValuesArrayTy &Pointers, 8377 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8378 StructRangeInfoTy &PartialStruct) const { 8379 assert(!Cap->capturesVariableArrayType() && 8380 "Not expecting to generate map info for a variable array type!"); 8381 8382 // We need to know when we generating information for the first component 8383 const ValueDecl *VD = Cap->capturesThis() 8384 ? nullptr 8385 : Cap->getCapturedVar()->getCanonicalDecl(); 8386 8387 // If this declaration appears in a is_device_ptr clause we just have to 8388 // pass the pointer by value. If it is a reference to a declaration, we just 8389 // pass its value. 8390 if (DevPointersMap.count(VD)) { 8391 BasePointers.emplace_back(Arg, VD); 8392 Pointers.push_back(Arg); 8393 Sizes.push_back( 8394 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8395 CGF.Int64Ty, /*isSigned=*/true)); 8396 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8397 return; 8398 } 8399 8400 using MapData = 8401 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8402 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8403 SmallVector<MapData, 4> DeclComponentLists; 8404 assert(CurDir.is<const OMPExecutableDirective *>() && 8405 "Expect a executable directive"); 8406 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8407 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8408 for (const auto L : C->decl_component_lists(VD)) { 8409 assert(L.first == VD && 8410 "We got information for the wrong declaration??"); 8411 assert(!L.second.empty() && 8412 "Not expecting declaration with no component lists."); 8413 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8414 C->getMapTypeModifiers(), 8415 C->isImplicit()); 8416 } 8417 } 8418 8419 // Find overlapping elements (including the offset from the base element). 8420 llvm::SmallDenseMap< 8421 const MapData *, 8422 llvm::SmallVector< 8423 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8424 4> 8425 OverlappedData; 8426 size_t Count = 0; 8427 for (const MapData &L : DeclComponentLists) { 8428 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8429 OpenMPMapClauseKind MapType; 8430 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8431 bool IsImplicit; 8432 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8433 ++Count; 8434 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8435 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8436 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8437 auto CI = Components.rbegin(); 8438 auto CE = Components.rend(); 8439 auto SI = Components1.rbegin(); 8440 auto SE = Components1.rend(); 8441 for (; CI != CE && SI != SE; ++CI, ++SI) { 8442 if (CI->getAssociatedExpression()->getStmtClass() != 8443 SI->getAssociatedExpression()->getStmtClass()) 8444 break; 8445 // Are we dealing with different variables/fields? 8446 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8447 break; 8448 } 8449 // Found overlapping if, at least for one component, reached the head of 8450 // the components list. 8451 if (CI == CE || SI == SE) { 8452 assert((CI != CE || SI != SE) && 8453 "Unexpected full match of the mapping components."); 8454 const MapData &BaseData = CI == CE ? L : L1; 8455 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8456 SI == SE ? Components : Components1; 8457 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8458 OverlappedElements.getSecond().push_back(SubData); 8459 } 8460 } 8461 } 8462 // Sort the overlapped elements for each item. 8463 llvm::SmallVector<const FieldDecl *, 4> Layout; 8464 if (!OverlappedData.empty()) { 8465 if (const auto *CRD = 8466 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8467 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8468 else { 8469 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8470 Layout.append(RD->field_begin(), RD->field_end()); 8471 } 8472 } 8473 for (auto &Pair : OverlappedData) { 8474 llvm::sort( 8475 Pair.getSecond(), 8476 [&Layout]( 8477 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8478 OMPClauseMappableExprCommon::MappableExprComponentListRef 8479 Second) { 8480 auto CI = First.rbegin(); 8481 auto CE = First.rend(); 8482 auto SI = Second.rbegin(); 8483 auto SE = Second.rend(); 8484 for (; CI != CE && SI != SE; ++CI, ++SI) { 8485 if (CI->getAssociatedExpression()->getStmtClass() != 8486 SI->getAssociatedExpression()->getStmtClass()) 8487 break; 8488 // Are we dealing with different variables/fields? 8489 if (CI->getAssociatedDeclaration() != 8490 SI->getAssociatedDeclaration()) 8491 break; 8492 } 8493 8494 // Lists contain the same elements. 8495 if (CI == CE && SI == SE) 8496 return false; 8497 8498 // List with less elements is less than list with more elements. 8499 if (CI == CE || SI == SE) 8500 return CI == CE; 8501 8502 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8503 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8504 if (FD1->getParent() == FD2->getParent()) 8505 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8506 const auto It = 8507 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8508 return FD == FD1 || FD == FD2; 8509 }); 8510 return *It == FD1; 8511 }); 8512 } 8513 8514 // Associated with a capture, because the mapping flags depend on it. 8515 // Go through all of the elements with the overlapped elements. 8516 for (const auto &Pair : OverlappedData) { 8517 const MapData &L = *Pair.getFirst(); 8518 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8519 OpenMPMapClauseKind MapType; 8520 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8521 bool IsImplicit; 8522 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8523 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8524 OverlappedComponents = Pair.getSecond(); 8525 bool IsFirstComponentList = true; 8526 generateInfoForComponentList( 8527 MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, 8528 Types, PartialStruct, IsFirstComponentList, IsImplicit, 8529 /*ForDeviceAddr=*/false, OverlappedComponents); 8530 } 8531 // Go through other elements without overlapped elements. 8532 bool IsFirstComponentList = OverlappedData.empty(); 8533 for (const MapData &L : DeclComponentLists) { 8534 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8535 OpenMPMapClauseKind MapType; 8536 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8537 bool IsImplicit; 8538 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8539 auto It = OverlappedData.find(&L); 8540 if (It == OverlappedData.end()) 8541 generateInfoForComponentList(MapType, MapModifiers, Components, 8542 BasePointers, Pointers, Sizes, Types, 8543 PartialStruct, IsFirstComponentList, 8544 IsImplicit); 8545 IsFirstComponentList = false; 8546 } 8547 } 8548 8549 /// Generate the base pointers, section pointers, sizes and map types 8550 /// associated with the declare target link variables. 8551 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8552 MapValuesArrayTy &Pointers, 8553 MapValuesArrayTy &Sizes, 8554 MapFlagsArrayTy &Types) const { 8555 assert(CurDir.is<const OMPExecutableDirective *>() && 8556 "Expect a executable directive"); 8557 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8558 // Map other list items in the map clause which are not captured variables 8559 // but "declare target link" global variables. 8560 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8561 for (const auto L : C->component_lists()) { 8562 if (!L.first) 8563 continue; 8564 const auto *VD = dyn_cast<VarDecl>(L.first); 8565 if (!VD) 8566 continue; 8567 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8568 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8569 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8570 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8571 continue; 8572 StructRangeInfoTy PartialStruct; 8573 generateInfoForComponentList( 8574 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8575 Pointers, Sizes, Types, PartialStruct, 8576 /*IsFirstComponentList=*/true, C->isImplicit()); 8577 assert(!PartialStruct.Base.isValid() && 8578 "No partial structs for declare target link expected."); 8579 } 8580 } 8581 } 8582 8583 /// Generate the default map information for a given capture \a CI, 8584 /// record field declaration \a RI and captured value \a CV. 8585 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8586 const FieldDecl &RI, llvm::Value *CV, 8587 MapBaseValuesArrayTy &CurBasePointers, 8588 MapValuesArrayTy &CurPointers, 8589 MapValuesArrayTy &CurSizes, 8590 MapFlagsArrayTy &CurMapTypes) const { 8591 bool IsImplicit = true; 8592 // Do the default mapping. 8593 if (CI.capturesThis()) { 8594 CurBasePointers.push_back(CV); 8595 CurPointers.push_back(CV); 8596 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8597 CurSizes.push_back( 8598 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8599 CGF.Int64Ty, /*isSigned=*/true)); 8600 // Default map type. 8601 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8602 } else if (CI.capturesVariableByCopy()) { 8603 CurBasePointers.push_back(CV); 8604 CurPointers.push_back(CV); 8605 if (!RI.getType()->isAnyPointerType()) { 8606 // We have to signal to the runtime captures passed by value that are 8607 // not pointers. 8608 CurMapTypes.push_back(OMP_MAP_LITERAL); 8609 CurSizes.push_back(CGF.Builder.CreateIntCast( 8610 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8611 } else { 8612 // Pointers are implicitly mapped with a zero size and no flags 8613 // (other than first map that is added for all implicit maps). 8614 CurMapTypes.push_back(OMP_MAP_NONE); 8615 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8616 } 8617 const VarDecl *VD = CI.getCapturedVar(); 8618 auto I = FirstPrivateDecls.find(VD); 8619 if (I != FirstPrivateDecls.end()) 8620 IsImplicit = I->getSecond(); 8621 } else { 8622 assert(CI.capturesVariable() && "Expected captured reference."); 8623 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8624 QualType ElementType = PtrTy->getPointeeType(); 8625 CurSizes.push_back(CGF.Builder.CreateIntCast( 8626 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8627 // The default map type for a scalar/complex type is 'to' because by 8628 // default the value doesn't have to be retrieved. For an aggregate 8629 // type, the default is 'tofrom'. 8630 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8631 const VarDecl *VD = CI.getCapturedVar(); 8632 auto I = FirstPrivateDecls.find(VD); 8633 if (I != FirstPrivateDecls.end() && 8634 VD->getType().isConstant(CGF.getContext())) { 8635 llvm::Constant *Addr = 8636 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8637 // Copy the value of the original variable to the new global copy. 8638 CGF.Builder.CreateMemCpy( 8639 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8640 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8641 CurSizes.back(), /*IsVolatile=*/false); 8642 // Use new global variable as the base pointers. 8643 CurBasePointers.push_back(Addr); 8644 CurPointers.push_back(Addr); 8645 } else { 8646 CurBasePointers.push_back(CV); 8647 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8648 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8649 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8650 AlignmentSource::Decl)); 8651 CurPointers.push_back(PtrAddr.getPointer()); 8652 } else { 8653 CurPointers.push_back(CV); 8654 } 8655 } 8656 if (I != FirstPrivateDecls.end()) 8657 IsImplicit = I->getSecond(); 8658 } 8659 // Every default map produces a single argument which is a target parameter. 8660 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8661 8662 // Add flag stating this is an implicit map. 8663 if (IsImplicit) 8664 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8665 } 8666 }; 8667 } // anonymous namespace 8668 8669 /// Emit the arrays used to pass the captures and map information to the 8670 /// offloading runtime library. If there is no map or capture information, 8671 /// return nullptr by reference. 8672 static void 8673 emitOffloadingArrays(CodeGenFunction &CGF, 8674 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8675 MappableExprsHandler::MapValuesArrayTy &Pointers, 8676 MappableExprsHandler::MapValuesArrayTy &Sizes, 8677 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8678 CGOpenMPRuntime::TargetDataInfo &Info) { 8679 CodeGenModule &CGM = CGF.CGM; 8680 ASTContext &Ctx = CGF.getContext(); 8681 8682 // Reset the array information. 8683 Info.clearArrayInfo(); 8684 Info.NumberOfPtrs = BasePointers.size(); 8685 8686 if (Info.NumberOfPtrs) { 8687 // Detect if we have any capture size requiring runtime evaluation of the 8688 // size so that a constant array could be eventually used. 8689 bool hasRuntimeEvaluationCaptureSize = false; 8690 for (llvm::Value *S : Sizes) 8691 if (!isa<llvm::Constant>(S)) { 8692 hasRuntimeEvaluationCaptureSize = true; 8693 break; 8694 } 8695 8696 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8697 QualType PointerArrayType = Ctx.getConstantArrayType( 8698 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8699 /*IndexTypeQuals=*/0); 8700 8701 Info.BasePointersArray = 8702 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8703 Info.PointersArray = 8704 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8705 8706 // If we don't have any VLA types or other types that require runtime 8707 // evaluation, we can use a constant array for the map sizes, otherwise we 8708 // need to fill up the arrays as we do for the pointers. 8709 QualType Int64Ty = 8710 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8711 if (hasRuntimeEvaluationCaptureSize) { 8712 QualType SizeArrayType = Ctx.getConstantArrayType( 8713 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8714 /*IndexTypeQuals=*/0); 8715 Info.SizesArray = 8716 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8717 } else { 8718 // We expect all the sizes to be constant, so we collect them to create 8719 // a constant array. 8720 SmallVector<llvm::Constant *, 16> ConstSizes; 8721 for (llvm::Value *S : Sizes) 8722 ConstSizes.push_back(cast<llvm::Constant>(S)); 8723 8724 auto *SizesArrayInit = llvm::ConstantArray::get( 8725 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8726 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8727 auto *SizesArrayGbl = new llvm::GlobalVariable( 8728 CGM.getModule(), SizesArrayInit->getType(), 8729 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8730 SizesArrayInit, Name); 8731 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8732 Info.SizesArray = SizesArrayGbl; 8733 } 8734 8735 // The map types are always constant so we don't need to generate code to 8736 // fill arrays. Instead, we create an array constant. 8737 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8738 llvm::copy(MapTypes, Mapping.begin()); 8739 llvm::Constant *MapTypesArrayInit = 8740 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8741 std::string MaptypesName = 8742 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8743 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8744 CGM.getModule(), MapTypesArrayInit->getType(), 8745 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8746 MapTypesArrayInit, MaptypesName); 8747 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8748 Info.MapTypesArray = MapTypesArrayGbl; 8749 8750 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8751 llvm::Value *BPVal = *BasePointers[I]; 8752 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8753 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8754 Info.BasePointersArray, 0, I); 8755 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8756 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8757 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8758 CGF.Builder.CreateStore(BPVal, BPAddr); 8759 8760 if (Info.requiresDevicePointerInfo()) 8761 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8762 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8763 8764 llvm::Value *PVal = Pointers[I]; 8765 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8766 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8767 Info.PointersArray, 0, I); 8768 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8769 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8770 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8771 CGF.Builder.CreateStore(PVal, PAddr); 8772 8773 if (hasRuntimeEvaluationCaptureSize) { 8774 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8775 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8776 Info.SizesArray, 8777 /*Idx0=*/0, 8778 /*Idx1=*/I); 8779 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8780 CGF.Builder.CreateStore( 8781 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8782 SAddr); 8783 } 8784 } 8785 } 8786 } 8787 8788 /// Emit the arguments to be passed to the runtime library based on the 8789 /// arrays of pointers, sizes and map types. 8790 static void emitOffloadingArraysArgument( 8791 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8792 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8793 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8794 CodeGenModule &CGM = CGF.CGM; 8795 if (Info.NumberOfPtrs) { 8796 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8797 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8798 Info.BasePointersArray, 8799 /*Idx0=*/0, /*Idx1=*/0); 8800 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8801 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8802 Info.PointersArray, 8803 /*Idx0=*/0, 8804 /*Idx1=*/0); 8805 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8806 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8807 /*Idx0=*/0, /*Idx1=*/0); 8808 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8809 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8810 Info.MapTypesArray, 8811 /*Idx0=*/0, 8812 /*Idx1=*/0); 8813 } else { 8814 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8815 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8816 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8817 MapTypesArrayArg = 8818 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8819 } 8820 } 8821 8822 /// Check for inner distribute directive. 8823 static const OMPExecutableDirective * 8824 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8825 const auto *CS = D.getInnermostCapturedStmt(); 8826 const auto *Body = 8827 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8828 const Stmt *ChildStmt = 8829 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8830 8831 if (const auto *NestedDir = 8832 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8833 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8834 switch (D.getDirectiveKind()) { 8835 case OMPD_target: 8836 if (isOpenMPDistributeDirective(DKind)) 8837 return NestedDir; 8838 if (DKind == OMPD_teams) { 8839 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8840 /*IgnoreCaptured=*/true); 8841 if (!Body) 8842 return nullptr; 8843 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8844 if (const auto *NND = 8845 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8846 DKind = NND->getDirectiveKind(); 8847 if (isOpenMPDistributeDirective(DKind)) 8848 return NND; 8849 } 8850 } 8851 return nullptr; 8852 case OMPD_target_teams: 8853 if (isOpenMPDistributeDirective(DKind)) 8854 return NestedDir; 8855 return nullptr; 8856 case OMPD_target_parallel: 8857 case OMPD_target_simd: 8858 case OMPD_target_parallel_for: 8859 case OMPD_target_parallel_for_simd: 8860 return nullptr; 8861 case OMPD_target_teams_distribute: 8862 case OMPD_target_teams_distribute_simd: 8863 case OMPD_target_teams_distribute_parallel_for: 8864 case OMPD_target_teams_distribute_parallel_for_simd: 8865 case OMPD_parallel: 8866 case OMPD_for: 8867 case OMPD_parallel_for: 8868 case OMPD_parallel_master: 8869 case OMPD_parallel_sections: 8870 case OMPD_for_simd: 8871 case OMPD_parallel_for_simd: 8872 case OMPD_cancel: 8873 case OMPD_cancellation_point: 8874 case OMPD_ordered: 8875 case OMPD_threadprivate: 8876 case OMPD_allocate: 8877 case OMPD_task: 8878 case OMPD_simd: 8879 case OMPD_sections: 8880 case OMPD_section: 8881 case OMPD_single: 8882 case OMPD_master: 8883 case OMPD_critical: 8884 case OMPD_taskyield: 8885 case OMPD_barrier: 8886 case OMPD_taskwait: 8887 case OMPD_taskgroup: 8888 case OMPD_atomic: 8889 case OMPD_flush: 8890 case OMPD_depobj: 8891 case OMPD_scan: 8892 case OMPD_teams: 8893 case OMPD_target_data: 8894 case OMPD_target_exit_data: 8895 case OMPD_target_enter_data: 8896 case OMPD_distribute: 8897 case OMPD_distribute_simd: 8898 case OMPD_distribute_parallel_for: 8899 case OMPD_distribute_parallel_for_simd: 8900 case OMPD_teams_distribute: 8901 case OMPD_teams_distribute_simd: 8902 case OMPD_teams_distribute_parallel_for: 8903 case OMPD_teams_distribute_parallel_for_simd: 8904 case OMPD_target_update: 8905 case OMPD_declare_simd: 8906 case OMPD_declare_variant: 8907 case OMPD_begin_declare_variant: 8908 case OMPD_end_declare_variant: 8909 case OMPD_declare_target: 8910 case OMPD_end_declare_target: 8911 case OMPD_declare_reduction: 8912 case OMPD_declare_mapper: 8913 case OMPD_taskloop: 8914 case OMPD_taskloop_simd: 8915 case OMPD_master_taskloop: 8916 case OMPD_master_taskloop_simd: 8917 case OMPD_parallel_master_taskloop: 8918 case OMPD_parallel_master_taskloop_simd: 8919 case OMPD_requires: 8920 case OMPD_unknown: 8921 default: 8922 llvm_unreachable("Unexpected directive."); 8923 } 8924 } 8925 8926 return nullptr; 8927 } 8928 8929 /// Emit the user-defined mapper function. The code generation follows the 8930 /// pattern in the example below. 8931 /// \code 8932 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8933 /// void *base, void *begin, 8934 /// int64_t size, int64_t type) { 8935 /// // Allocate space for an array section first. 8936 /// if (size > 1 && !maptype.IsDelete) 8937 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8938 /// size*sizeof(Ty), clearToFrom(type)); 8939 /// // Map members. 8940 /// for (unsigned i = 0; i < size; i++) { 8941 /// // For each component specified by this mapper: 8942 /// for (auto c : all_components) { 8943 /// if (c.hasMapper()) 8944 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8945 /// c.arg_type); 8946 /// else 8947 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8948 /// c.arg_begin, c.arg_size, c.arg_type); 8949 /// } 8950 /// } 8951 /// // Delete the array section. 8952 /// if (size > 1 && maptype.IsDelete) 8953 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8954 /// size*sizeof(Ty), clearToFrom(type)); 8955 /// } 8956 /// \endcode 8957 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8958 CodeGenFunction *CGF) { 8959 if (UDMMap.count(D) > 0) 8960 return; 8961 ASTContext &C = CGM.getContext(); 8962 QualType Ty = D->getType(); 8963 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8964 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8965 auto *MapperVarDecl = 8966 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8967 SourceLocation Loc = D->getLocation(); 8968 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8969 8970 // Prepare mapper function arguments and attributes. 8971 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8972 C.VoidPtrTy, ImplicitParamDecl::Other); 8973 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8974 ImplicitParamDecl::Other); 8975 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8976 C.VoidPtrTy, ImplicitParamDecl::Other); 8977 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8978 ImplicitParamDecl::Other); 8979 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8980 ImplicitParamDecl::Other); 8981 FunctionArgList Args; 8982 Args.push_back(&HandleArg); 8983 Args.push_back(&BaseArg); 8984 Args.push_back(&BeginArg); 8985 Args.push_back(&SizeArg); 8986 Args.push_back(&TypeArg); 8987 const CGFunctionInfo &FnInfo = 8988 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8989 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8990 SmallString<64> TyStr; 8991 llvm::raw_svector_ostream Out(TyStr); 8992 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8993 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8994 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8995 Name, &CGM.getModule()); 8996 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8997 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8998 // Start the mapper function code generation. 8999 CodeGenFunction MapperCGF(CGM); 9000 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9001 // Compute the starting and end addreses of array elements. 9002 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9003 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9004 C.getPointerType(Int64Ty), Loc); 9005 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9006 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9007 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9008 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9009 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9010 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9011 C.getPointerType(Int64Ty), Loc); 9012 // Prepare common arguments for array initiation and deletion. 9013 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9014 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9015 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9016 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9017 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9018 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9019 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9020 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9021 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9022 9023 // Emit array initiation if this is an array section and \p MapType indicates 9024 // that memory allocation is required. 9025 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9026 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9027 ElementSize, HeadBB, /*IsInit=*/true); 9028 9029 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9030 9031 // Emit the loop header block. 9032 MapperCGF.EmitBlock(HeadBB); 9033 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9034 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9035 // Evaluate whether the initial condition is satisfied. 9036 llvm::Value *IsEmpty = 9037 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9038 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9039 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9040 9041 // Emit the loop body block. 9042 MapperCGF.EmitBlock(BodyBB); 9043 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9044 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9045 PtrPHI->addIncoming(PtrBegin, EntryBB); 9046 Address PtrCurrent = 9047 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9048 .getAlignment() 9049 .alignmentOfArrayElement(ElementSize)); 9050 // Privatize the declared variable of mapper to be the current array element. 9051 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9052 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9053 return MapperCGF 9054 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9055 .getAddress(MapperCGF); 9056 }); 9057 (void)Scope.Privatize(); 9058 9059 // Get map clause information. Fill up the arrays with all mapped variables. 9060 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9061 MappableExprsHandler::MapValuesArrayTy Pointers; 9062 MappableExprsHandler::MapValuesArrayTy Sizes; 9063 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9064 MappableExprsHandler MEHandler(*D, MapperCGF); 9065 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9066 9067 // Call the runtime API __tgt_mapper_num_components to get the number of 9068 // pre-existing components. 9069 llvm::Value *OffloadingArgs[] = {Handle}; 9070 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9071 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9072 CGM.getModule(), OMPRTL___tgt_mapper_num_components), 9073 OffloadingArgs); 9074 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9075 PreviousSize, 9076 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9077 9078 // Fill up the runtime mapper handle for all components. 9079 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9080 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9081 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9082 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9083 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9084 llvm::Value *CurSizeArg = Sizes[I]; 9085 9086 // Extract the MEMBER_OF field from the map type. 9087 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9088 MapperCGF.EmitBlock(MemberBB); 9089 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9090 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9091 OriMapType, 9092 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9093 llvm::BasicBlock *MemberCombineBB = 9094 MapperCGF.createBasicBlock("omp.member.combine"); 9095 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9096 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9097 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9098 // Add the number of pre-existing components to the MEMBER_OF field if it 9099 // is valid. 9100 MapperCGF.EmitBlock(MemberCombineBB); 9101 llvm::Value *CombinedMember = 9102 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9103 // Do nothing if it is not a member of previous components. 9104 MapperCGF.EmitBlock(TypeBB); 9105 llvm::PHINode *MemberMapType = 9106 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9107 MemberMapType->addIncoming(OriMapType, MemberBB); 9108 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9109 9110 // Combine the map type inherited from user-defined mapper with that 9111 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9112 // bits of the \a MapType, which is the input argument of the mapper 9113 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9114 // bits of MemberMapType. 9115 // [OpenMP 5.0], 1.2.6. map-type decay. 9116 // | alloc | to | from | tofrom | release | delete 9117 // ---------------------------------------------------------- 9118 // alloc | alloc | alloc | alloc | alloc | release | delete 9119 // to | alloc | to | alloc | to | release | delete 9120 // from | alloc | alloc | from | from | release | delete 9121 // tofrom | alloc | to | from | tofrom | release | delete 9122 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9123 MapType, 9124 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9125 MappableExprsHandler::OMP_MAP_FROM)); 9126 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9127 llvm::BasicBlock *AllocElseBB = 9128 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9129 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9130 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9131 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9132 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9133 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9134 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9135 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9136 MapperCGF.EmitBlock(AllocBB); 9137 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9138 MemberMapType, 9139 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9140 MappableExprsHandler::OMP_MAP_FROM))); 9141 MapperCGF.Builder.CreateBr(EndBB); 9142 MapperCGF.EmitBlock(AllocElseBB); 9143 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9144 LeftToFrom, 9145 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9146 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9147 // In case of to, clear OMP_MAP_FROM. 9148 MapperCGF.EmitBlock(ToBB); 9149 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9150 MemberMapType, 9151 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9152 MapperCGF.Builder.CreateBr(EndBB); 9153 MapperCGF.EmitBlock(ToElseBB); 9154 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9155 LeftToFrom, 9156 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9157 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9158 // In case of from, clear OMP_MAP_TO. 9159 MapperCGF.EmitBlock(FromBB); 9160 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9161 MemberMapType, 9162 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9163 // In case of tofrom, do nothing. 9164 MapperCGF.EmitBlock(EndBB); 9165 llvm::PHINode *CurMapType = 9166 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9167 CurMapType->addIncoming(AllocMapType, AllocBB); 9168 CurMapType->addIncoming(ToMapType, ToBB); 9169 CurMapType->addIncoming(FromMapType, FromBB); 9170 CurMapType->addIncoming(MemberMapType, ToElseBB); 9171 9172 // TODO: call the corresponding mapper function if a user-defined mapper is 9173 // associated with this map clause. 9174 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9175 // data structure. 9176 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9177 CurSizeArg, CurMapType}; 9178 MapperCGF.EmitRuntimeCall( 9179 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9180 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9181 OffloadingArgs); 9182 } 9183 9184 // Update the pointer to point to the next element that needs to be mapped, 9185 // and check whether we have mapped all elements. 9186 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9187 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9188 PtrPHI->addIncoming(PtrNext, BodyBB); 9189 llvm::Value *IsDone = 9190 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9191 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9192 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9193 9194 MapperCGF.EmitBlock(ExitBB); 9195 // Emit array deletion if this is an array section and \p MapType indicates 9196 // that deletion is required. 9197 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9198 ElementSize, DoneBB, /*IsInit=*/false); 9199 9200 // Emit the function exit block. 9201 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9202 MapperCGF.FinishFunction(); 9203 UDMMap.try_emplace(D, Fn); 9204 if (CGF) { 9205 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9206 Decls.second.push_back(D); 9207 } 9208 } 9209 9210 /// Emit the array initialization or deletion portion for user-defined mapper 9211 /// code generation. First, it evaluates whether an array section is mapped and 9212 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9213 /// true, and \a MapType indicates to not delete this array, array 9214 /// initialization code is generated. If \a IsInit is false, and \a MapType 9215 /// indicates to not this array, array deletion code is generated. 9216 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9217 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9218 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9219 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9220 StringRef Prefix = IsInit ? ".init" : ".del"; 9221 9222 // Evaluate if this is an array section. 9223 llvm::BasicBlock *IsDeleteBB = 9224 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9225 llvm::BasicBlock *BodyBB = 9226 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9227 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9228 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9229 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9230 9231 // Evaluate if we are going to delete this section. 9232 MapperCGF.EmitBlock(IsDeleteBB); 9233 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9234 MapType, 9235 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9236 llvm::Value *DeleteCond; 9237 if (IsInit) { 9238 DeleteCond = MapperCGF.Builder.CreateIsNull( 9239 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9240 } else { 9241 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9242 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9243 } 9244 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9245 9246 MapperCGF.EmitBlock(BodyBB); 9247 // Get the array size by multiplying element size and element number (i.e., \p 9248 // Size). 9249 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9250 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9251 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9252 // memory allocation/deletion purpose only. 9253 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9254 MapType, 9255 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9256 MappableExprsHandler::OMP_MAP_FROM))); 9257 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9258 // data structure. 9259 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9260 MapperCGF.EmitRuntimeCall( 9261 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9262 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9263 OffloadingArgs); 9264 } 9265 9266 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9267 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9268 llvm::Value *DeviceID, 9269 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9270 const OMPLoopDirective &D)> 9271 SizeEmitter) { 9272 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9273 const OMPExecutableDirective *TD = &D; 9274 // Get nested teams distribute kind directive, if any. 9275 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9276 TD = getNestedDistributeDirective(CGM.getContext(), D); 9277 if (!TD) 9278 return; 9279 const auto *LD = cast<OMPLoopDirective>(TD); 9280 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9281 PrePostActionTy &) { 9282 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9283 llvm::Value *Args[] = {DeviceID, NumIterations}; 9284 CGF.EmitRuntimeCall( 9285 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9286 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9287 Args); 9288 } 9289 }; 9290 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9291 } 9292 9293 void CGOpenMPRuntime::emitTargetCall( 9294 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9295 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9296 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9297 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9298 const OMPLoopDirective &D)> 9299 SizeEmitter) { 9300 if (!CGF.HaveInsertPoint()) 9301 return; 9302 9303 assert(OutlinedFn && "Invalid outlined function!"); 9304 9305 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9306 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9307 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9308 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9309 PrePostActionTy &) { 9310 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9311 }; 9312 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9313 9314 CodeGenFunction::OMPTargetDataInfo InputInfo; 9315 llvm::Value *MapTypesArray = nullptr; 9316 // Fill up the pointer arrays and transfer execution to the device. 9317 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9318 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9319 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9320 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9321 // Reverse offloading is not supported, so just execute on the host. 9322 if (RequiresOuterTask) { 9323 CapturedVars.clear(); 9324 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9325 } 9326 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9327 return; 9328 } 9329 9330 // On top of the arrays that were filled up, the target offloading call 9331 // takes as arguments the device id as well as the host pointer. The host 9332 // pointer is used by the runtime library to identify the current target 9333 // region, so it only has to be unique and not necessarily point to 9334 // anything. It could be the pointer to the outlined function that 9335 // implements the target region, but we aren't using that so that the 9336 // compiler doesn't need to keep that, and could therefore inline the host 9337 // function if proven worthwhile during optimization. 9338 9339 // From this point on, we need to have an ID of the target region defined. 9340 assert(OutlinedFnID && "Invalid outlined function ID!"); 9341 9342 // Emit device ID if any. 9343 llvm::Value *DeviceID; 9344 if (Device.getPointer()) { 9345 assert((Device.getInt() == OMPC_DEVICE_unknown || 9346 Device.getInt() == OMPC_DEVICE_device_num) && 9347 "Expected device_num modifier."); 9348 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9349 DeviceID = 9350 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9351 } else { 9352 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9353 } 9354 9355 // Emit the number of elements in the offloading arrays. 9356 llvm::Value *PointerNum = 9357 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9358 9359 // Return value of the runtime offloading call. 9360 llvm::Value *Return; 9361 9362 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9363 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9364 9365 // Emit tripcount for the target loop-based directive. 9366 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9367 9368 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9369 // The target region is an outlined function launched by the runtime 9370 // via calls __tgt_target() or __tgt_target_teams(). 9371 // 9372 // __tgt_target() launches a target region with one team and one thread, 9373 // executing a serial region. This master thread may in turn launch 9374 // more threads within its team upon encountering a parallel region, 9375 // however, no additional teams can be launched on the device. 9376 // 9377 // __tgt_target_teams() launches a target region with one or more teams, 9378 // each with one or more threads. This call is required for target 9379 // constructs such as: 9380 // 'target teams' 9381 // 'target' / 'teams' 9382 // 'target teams distribute parallel for' 9383 // 'target parallel' 9384 // and so on. 9385 // 9386 // Note that on the host and CPU targets, the runtime implementation of 9387 // these calls simply call the outlined function without forking threads. 9388 // The outlined functions themselves have runtime calls to 9389 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9390 // the compiler in emitTeamsCall() and emitParallelCall(). 9391 // 9392 // In contrast, on the NVPTX target, the implementation of 9393 // __tgt_target_teams() launches a GPU kernel with the requested number 9394 // of teams and threads so no additional calls to the runtime are required. 9395 if (NumTeams) { 9396 // If we have NumTeams defined this means that we have an enclosed teams 9397 // region. Therefore we also expect to have NumThreads defined. These two 9398 // values should be defined in the presence of a teams directive, 9399 // regardless of having any clauses associated. If the user is using teams 9400 // but no clauses, these two values will be the default that should be 9401 // passed to the runtime library - a 32-bit integer with the value zero. 9402 assert(NumThreads && "Thread limit expression should be available along " 9403 "with number of teams."); 9404 llvm::Value *OffloadingArgs[] = {DeviceID, 9405 OutlinedFnID, 9406 PointerNum, 9407 InputInfo.BasePointersArray.getPointer(), 9408 InputInfo.PointersArray.getPointer(), 9409 InputInfo.SizesArray.getPointer(), 9410 MapTypesArray, 9411 NumTeams, 9412 NumThreads}; 9413 Return = CGF.EmitRuntimeCall( 9414 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9415 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait 9416 : OMPRTL___tgt_target_teams), 9417 OffloadingArgs); 9418 } else { 9419 llvm::Value *OffloadingArgs[] = {DeviceID, 9420 OutlinedFnID, 9421 PointerNum, 9422 InputInfo.BasePointersArray.getPointer(), 9423 InputInfo.PointersArray.getPointer(), 9424 InputInfo.SizesArray.getPointer(), 9425 MapTypesArray}; 9426 Return = CGF.EmitRuntimeCall( 9427 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9428 CGM.getModule(), 9429 HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target), 9430 OffloadingArgs); 9431 } 9432 9433 // Check the error code and execute the host version if required. 9434 llvm::BasicBlock *OffloadFailedBlock = 9435 CGF.createBasicBlock("omp_offload.failed"); 9436 llvm::BasicBlock *OffloadContBlock = 9437 CGF.createBasicBlock("omp_offload.cont"); 9438 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9439 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9440 9441 CGF.EmitBlock(OffloadFailedBlock); 9442 if (RequiresOuterTask) { 9443 CapturedVars.clear(); 9444 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9445 } 9446 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9447 CGF.EmitBranch(OffloadContBlock); 9448 9449 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9450 }; 9451 9452 // Notify that the host version must be executed. 9453 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9454 RequiresOuterTask](CodeGenFunction &CGF, 9455 PrePostActionTy &) { 9456 if (RequiresOuterTask) { 9457 CapturedVars.clear(); 9458 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9459 } 9460 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9461 }; 9462 9463 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9464 &CapturedVars, RequiresOuterTask, 9465 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9466 // Fill up the arrays with all the captured variables. 9467 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9468 MappableExprsHandler::MapValuesArrayTy Pointers; 9469 MappableExprsHandler::MapValuesArrayTy Sizes; 9470 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9471 9472 // Get mappable expression information. 9473 MappableExprsHandler MEHandler(D, CGF); 9474 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9475 9476 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9477 auto CV = CapturedVars.begin(); 9478 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9479 CE = CS.capture_end(); 9480 CI != CE; ++CI, ++RI, ++CV) { 9481 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9482 MappableExprsHandler::MapValuesArrayTy CurPointers; 9483 MappableExprsHandler::MapValuesArrayTy CurSizes; 9484 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9485 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9486 9487 // VLA sizes are passed to the outlined region by copy and do not have map 9488 // information associated. 9489 if (CI->capturesVariableArrayType()) { 9490 CurBasePointers.push_back(*CV); 9491 CurPointers.push_back(*CV); 9492 CurSizes.push_back(CGF.Builder.CreateIntCast( 9493 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9494 // Copy to the device as an argument. No need to retrieve it. 9495 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9496 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9497 MappableExprsHandler::OMP_MAP_IMPLICIT); 9498 } else { 9499 // If we have any information in the map clause, we use it, otherwise we 9500 // just do a default mapping. 9501 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9502 CurSizes, CurMapTypes, PartialStruct); 9503 if (CurBasePointers.empty()) 9504 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9505 CurPointers, CurSizes, CurMapTypes); 9506 // Generate correct mapping for variables captured by reference in 9507 // lambdas. 9508 if (CI->capturesVariable()) 9509 MEHandler.generateInfoForLambdaCaptures( 9510 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9511 CurMapTypes, LambdaPointers); 9512 } 9513 // We expect to have at least an element of information for this capture. 9514 assert(!CurBasePointers.empty() && 9515 "Non-existing map pointer for capture!"); 9516 assert(CurBasePointers.size() == CurPointers.size() && 9517 CurBasePointers.size() == CurSizes.size() && 9518 CurBasePointers.size() == CurMapTypes.size() && 9519 "Inconsistent map information sizes!"); 9520 9521 // If there is an entry in PartialStruct it means we have a struct with 9522 // individual members mapped. Emit an extra combined entry. 9523 if (PartialStruct.Base.isValid()) 9524 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9525 CurMapTypes, PartialStruct); 9526 9527 // We need to append the results of this capture to what we already have. 9528 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9529 Pointers.append(CurPointers.begin(), CurPointers.end()); 9530 Sizes.append(CurSizes.begin(), CurSizes.end()); 9531 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9532 } 9533 // Adjust MEMBER_OF flags for the lambdas captures. 9534 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9535 Pointers, MapTypes); 9536 // Map other list items in the map clause which are not captured variables 9537 // but "declare target link" global variables. 9538 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9539 MapTypes); 9540 9541 TargetDataInfo Info; 9542 // Fill up the arrays and create the arguments. 9543 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9544 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9545 Info.PointersArray, Info.SizesArray, 9546 Info.MapTypesArray, Info); 9547 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9548 InputInfo.BasePointersArray = 9549 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9550 InputInfo.PointersArray = 9551 Address(Info.PointersArray, CGM.getPointerAlign()); 9552 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9553 MapTypesArray = Info.MapTypesArray; 9554 if (RequiresOuterTask) 9555 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9556 else 9557 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9558 }; 9559 9560 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9561 CodeGenFunction &CGF, PrePostActionTy &) { 9562 if (RequiresOuterTask) { 9563 CodeGenFunction::OMPTargetDataInfo InputInfo; 9564 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9565 } else { 9566 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9567 } 9568 }; 9569 9570 // If we have a target function ID it means that we need to support 9571 // offloading, otherwise, just execute on the host. We need to execute on host 9572 // regardless of the conditional in the if clause if, e.g., the user do not 9573 // specify target triples. 9574 if (OutlinedFnID) { 9575 if (IfCond) { 9576 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9577 } else { 9578 RegionCodeGenTy ThenRCG(TargetThenGen); 9579 ThenRCG(CGF); 9580 } 9581 } else { 9582 RegionCodeGenTy ElseRCG(TargetElseGen); 9583 ElseRCG(CGF); 9584 } 9585 } 9586 9587 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9588 StringRef ParentName) { 9589 if (!S) 9590 return; 9591 9592 // Codegen OMP target directives that offload compute to the device. 9593 bool RequiresDeviceCodegen = 9594 isa<OMPExecutableDirective>(S) && 9595 isOpenMPTargetExecutionDirective( 9596 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9597 9598 if (RequiresDeviceCodegen) { 9599 const auto &E = *cast<OMPExecutableDirective>(S); 9600 unsigned DeviceID; 9601 unsigned FileID; 9602 unsigned Line; 9603 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9604 FileID, Line); 9605 9606 // Is this a target region that should not be emitted as an entry point? If 9607 // so just signal we are done with this target region. 9608 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9609 ParentName, Line)) 9610 return; 9611 9612 switch (E.getDirectiveKind()) { 9613 case OMPD_target: 9614 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9615 cast<OMPTargetDirective>(E)); 9616 break; 9617 case OMPD_target_parallel: 9618 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9619 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9620 break; 9621 case OMPD_target_teams: 9622 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9623 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9624 break; 9625 case OMPD_target_teams_distribute: 9626 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9627 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9628 break; 9629 case OMPD_target_teams_distribute_simd: 9630 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9631 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9632 break; 9633 case OMPD_target_parallel_for: 9634 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9635 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9636 break; 9637 case OMPD_target_parallel_for_simd: 9638 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9639 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9640 break; 9641 case OMPD_target_simd: 9642 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9643 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9644 break; 9645 case OMPD_target_teams_distribute_parallel_for: 9646 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9647 CGM, ParentName, 9648 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9649 break; 9650 case OMPD_target_teams_distribute_parallel_for_simd: 9651 CodeGenFunction:: 9652 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9653 CGM, ParentName, 9654 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9655 break; 9656 case OMPD_parallel: 9657 case OMPD_for: 9658 case OMPD_parallel_for: 9659 case OMPD_parallel_master: 9660 case OMPD_parallel_sections: 9661 case OMPD_for_simd: 9662 case OMPD_parallel_for_simd: 9663 case OMPD_cancel: 9664 case OMPD_cancellation_point: 9665 case OMPD_ordered: 9666 case OMPD_threadprivate: 9667 case OMPD_allocate: 9668 case OMPD_task: 9669 case OMPD_simd: 9670 case OMPD_sections: 9671 case OMPD_section: 9672 case OMPD_single: 9673 case OMPD_master: 9674 case OMPD_critical: 9675 case OMPD_taskyield: 9676 case OMPD_barrier: 9677 case OMPD_taskwait: 9678 case OMPD_taskgroup: 9679 case OMPD_atomic: 9680 case OMPD_flush: 9681 case OMPD_depobj: 9682 case OMPD_scan: 9683 case OMPD_teams: 9684 case OMPD_target_data: 9685 case OMPD_target_exit_data: 9686 case OMPD_target_enter_data: 9687 case OMPD_distribute: 9688 case OMPD_distribute_simd: 9689 case OMPD_distribute_parallel_for: 9690 case OMPD_distribute_parallel_for_simd: 9691 case OMPD_teams_distribute: 9692 case OMPD_teams_distribute_simd: 9693 case OMPD_teams_distribute_parallel_for: 9694 case OMPD_teams_distribute_parallel_for_simd: 9695 case OMPD_target_update: 9696 case OMPD_declare_simd: 9697 case OMPD_declare_variant: 9698 case OMPD_begin_declare_variant: 9699 case OMPD_end_declare_variant: 9700 case OMPD_declare_target: 9701 case OMPD_end_declare_target: 9702 case OMPD_declare_reduction: 9703 case OMPD_declare_mapper: 9704 case OMPD_taskloop: 9705 case OMPD_taskloop_simd: 9706 case OMPD_master_taskloop: 9707 case OMPD_master_taskloop_simd: 9708 case OMPD_parallel_master_taskloop: 9709 case OMPD_parallel_master_taskloop_simd: 9710 case OMPD_requires: 9711 case OMPD_unknown: 9712 default: 9713 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9714 } 9715 return; 9716 } 9717 9718 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9719 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9720 return; 9721 9722 scanForTargetRegionsFunctions( 9723 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9724 return; 9725 } 9726 9727 // If this is a lambda function, look into its body. 9728 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9729 S = L->getBody(); 9730 9731 // Keep looking for target regions recursively. 9732 for (const Stmt *II : S->children()) 9733 scanForTargetRegionsFunctions(II, ParentName); 9734 } 9735 9736 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9737 // If emitting code for the host, we do not process FD here. Instead we do 9738 // the normal code generation. 9739 if (!CGM.getLangOpts().OpenMPIsDevice) { 9740 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9741 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9742 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9743 // Do not emit device_type(nohost) functions for the host. 9744 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9745 return true; 9746 } 9747 return false; 9748 } 9749 9750 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9751 // Try to detect target regions in the function. 9752 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9753 StringRef Name = CGM.getMangledName(GD); 9754 scanForTargetRegionsFunctions(FD->getBody(), Name); 9755 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9756 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9757 // Do not emit device_type(nohost) functions for the host. 9758 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9759 return true; 9760 } 9761 9762 // Do not to emit function if it is not marked as declare target. 9763 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9764 AlreadyEmittedTargetDecls.count(VD) == 0; 9765 } 9766 9767 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9768 if (!CGM.getLangOpts().OpenMPIsDevice) 9769 return false; 9770 9771 // Check if there are Ctors/Dtors in this declaration and look for target 9772 // regions in it. We use the complete variant to produce the kernel name 9773 // mangling. 9774 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9775 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9776 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9777 StringRef ParentName = 9778 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9779 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9780 } 9781 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9782 StringRef ParentName = 9783 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9784 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9785 } 9786 } 9787 9788 // Do not to emit variable if it is not marked as declare target. 9789 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9790 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9791 cast<VarDecl>(GD.getDecl())); 9792 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9793 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9794 HasRequiresUnifiedSharedMemory)) { 9795 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9796 return true; 9797 } 9798 return false; 9799 } 9800 9801 llvm::Constant * 9802 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9803 const VarDecl *VD) { 9804 assert(VD->getType().isConstant(CGM.getContext()) && 9805 "Expected constant variable."); 9806 StringRef VarName; 9807 llvm::Constant *Addr; 9808 llvm::GlobalValue::LinkageTypes Linkage; 9809 QualType Ty = VD->getType(); 9810 SmallString<128> Buffer; 9811 { 9812 unsigned DeviceID; 9813 unsigned FileID; 9814 unsigned Line; 9815 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9816 FileID, Line); 9817 llvm::raw_svector_ostream OS(Buffer); 9818 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9819 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9820 VarName = OS.str(); 9821 } 9822 Linkage = llvm::GlobalValue::InternalLinkage; 9823 Addr = 9824 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9825 getDefaultFirstprivateAddressSpace()); 9826 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9827 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9828 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9829 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9830 VarName, Addr, VarSize, 9831 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9832 return Addr; 9833 } 9834 9835 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9836 llvm::Constant *Addr) { 9837 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9838 !CGM.getLangOpts().OpenMPIsDevice) 9839 return; 9840 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9841 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9842 if (!Res) { 9843 if (CGM.getLangOpts().OpenMPIsDevice) { 9844 // Register non-target variables being emitted in device code (debug info 9845 // may cause this). 9846 StringRef VarName = CGM.getMangledName(VD); 9847 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9848 } 9849 return; 9850 } 9851 // Register declare target variables. 9852 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9853 StringRef VarName; 9854 CharUnits VarSize; 9855 llvm::GlobalValue::LinkageTypes Linkage; 9856 9857 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9858 !HasRequiresUnifiedSharedMemory) { 9859 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9860 VarName = CGM.getMangledName(VD); 9861 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9862 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9863 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9864 } else { 9865 VarSize = CharUnits::Zero(); 9866 } 9867 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9868 // Temp solution to prevent optimizations of the internal variables. 9869 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9870 std::string RefName = getName({VarName, "ref"}); 9871 if (!CGM.GetGlobalValue(RefName)) { 9872 llvm::Constant *AddrRef = 9873 getOrCreateInternalVariable(Addr->getType(), RefName); 9874 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9875 GVAddrRef->setConstant(/*Val=*/true); 9876 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9877 GVAddrRef->setInitializer(Addr); 9878 CGM.addCompilerUsedGlobal(GVAddrRef); 9879 } 9880 } 9881 } else { 9882 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9883 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9884 HasRequiresUnifiedSharedMemory)) && 9885 "Declare target attribute must link or to with unified memory."); 9886 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9887 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9888 else 9889 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9890 9891 if (CGM.getLangOpts().OpenMPIsDevice) { 9892 VarName = Addr->getName(); 9893 Addr = nullptr; 9894 } else { 9895 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9896 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9897 } 9898 VarSize = CGM.getPointerSize(); 9899 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9900 } 9901 9902 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9903 VarName, Addr, VarSize, Flags, Linkage); 9904 } 9905 9906 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9907 if (isa<FunctionDecl>(GD.getDecl()) || 9908 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9909 return emitTargetFunctions(GD); 9910 9911 return emitTargetGlobalVariable(GD); 9912 } 9913 9914 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9915 for (const VarDecl *VD : DeferredGlobalVariables) { 9916 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9917 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9918 if (!Res) 9919 continue; 9920 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9921 !HasRequiresUnifiedSharedMemory) { 9922 CGM.EmitGlobal(VD); 9923 } else { 9924 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9925 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9926 HasRequiresUnifiedSharedMemory)) && 9927 "Expected link clause or to clause with unified memory."); 9928 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9929 } 9930 } 9931 } 9932 9933 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9934 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9935 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9936 " Expected target-based directive."); 9937 } 9938 9939 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9940 for (const OMPClause *Clause : D->clauselists()) { 9941 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9942 HasRequiresUnifiedSharedMemory = true; 9943 } else if (const auto *AC = 9944 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9945 switch (AC->getAtomicDefaultMemOrderKind()) { 9946 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9947 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9948 break; 9949 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9950 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 9951 break; 9952 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 9953 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 9954 break; 9955 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 9956 break; 9957 } 9958 } 9959 } 9960 } 9961 9962 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 9963 return RequiresAtomicOrdering; 9964 } 9965 9966 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9967 LangAS &AS) { 9968 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9969 return false; 9970 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9971 switch(A->getAllocatorType()) { 9972 case OMPAllocateDeclAttr::OMPNullMemAlloc: 9973 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9974 // Not supported, fallback to the default mem space. 9975 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9976 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9977 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9978 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9979 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9980 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9981 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9982 AS = LangAS::Default; 9983 return true; 9984 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9985 llvm_unreachable("Expected predefined allocator for the variables with the " 9986 "static storage."); 9987 } 9988 return false; 9989 } 9990 9991 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9992 return HasRequiresUnifiedSharedMemory; 9993 } 9994 9995 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9996 CodeGenModule &CGM) 9997 : CGM(CGM) { 9998 if (CGM.getLangOpts().OpenMPIsDevice) { 9999 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10000 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10001 } 10002 } 10003 10004 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10005 if (CGM.getLangOpts().OpenMPIsDevice) 10006 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10007 } 10008 10009 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10010 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10011 return true; 10012 10013 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10014 // Do not to emit function if it is marked as declare target as it was already 10015 // emitted. 10016 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10017 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10018 if (auto *F = dyn_cast_or_null<llvm::Function>( 10019 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10020 return !F->isDeclaration(); 10021 return false; 10022 } 10023 return true; 10024 } 10025 10026 return !AlreadyEmittedTargetDecls.insert(D).second; 10027 } 10028 10029 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10030 // If we don't have entries or if we are emitting code for the device, we 10031 // don't need to do anything. 10032 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10033 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10034 (OffloadEntriesInfoManager.empty() && 10035 !HasEmittedDeclareTargetRegion && 10036 !HasEmittedTargetRegion)) 10037 return nullptr; 10038 10039 // Create and register the function that handles the requires directives. 10040 ASTContext &C = CGM.getContext(); 10041 10042 llvm::Function *RequiresRegFn; 10043 { 10044 CodeGenFunction CGF(CGM); 10045 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10046 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10047 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10048 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10049 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10050 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10051 // TODO: check for other requires clauses. 10052 // The requires directive takes effect only when a target region is 10053 // present in the compilation unit. Otherwise it is ignored and not 10054 // passed to the runtime. This avoids the runtime from throwing an error 10055 // for mismatching requires clauses across compilation units that don't 10056 // contain at least 1 target region. 10057 assert((HasEmittedTargetRegion || 10058 HasEmittedDeclareTargetRegion || 10059 !OffloadEntriesInfoManager.empty()) && 10060 "Target or declare target region expected."); 10061 if (HasRequiresUnifiedSharedMemory) 10062 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10063 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10064 CGM.getModule(), OMPRTL___tgt_register_requires), 10065 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10066 CGF.FinishFunction(); 10067 } 10068 return RequiresRegFn; 10069 } 10070 10071 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10072 const OMPExecutableDirective &D, 10073 SourceLocation Loc, 10074 llvm::Function *OutlinedFn, 10075 ArrayRef<llvm::Value *> CapturedVars) { 10076 if (!CGF.HaveInsertPoint()) 10077 return; 10078 10079 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10080 CodeGenFunction::RunCleanupsScope Scope(CGF); 10081 10082 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10083 llvm::Value *Args[] = { 10084 RTLoc, 10085 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10086 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10087 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10088 RealArgs.append(std::begin(Args), std::end(Args)); 10089 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10090 10091 llvm::FunctionCallee RTLFn = 10092 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10093 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10094 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10095 } 10096 10097 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10098 const Expr *NumTeams, 10099 const Expr *ThreadLimit, 10100 SourceLocation Loc) { 10101 if (!CGF.HaveInsertPoint()) 10102 return; 10103 10104 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10105 10106 llvm::Value *NumTeamsVal = 10107 NumTeams 10108 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10109 CGF.CGM.Int32Ty, /* isSigned = */ true) 10110 : CGF.Builder.getInt32(0); 10111 10112 llvm::Value *ThreadLimitVal = 10113 ThreadLimit 10114 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10115 CGF.CGM.Int32Ty, /* isSigned = */ true) 10116 : CGF.Builder.getInt32(0); 10117 10118 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10119 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10120 ThreadLimitVal}; 10121 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10122 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10123 PushNumTeamsArgs); 10124 } 10125 10126 void CGOpenMPRuntime::emitTargetDataCalls( 10127 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10128 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10129 if (!CGF.HaveInsertPoint()) 10130 return; 10131 10132 // Action used to replace the default codegen action and turn privatization 10133 // off. 10134 PrePostActionTy NoPrivAction; 10135 10136 // Generate the code for the opening of the data environment. Capture all the 10137 // arguments of the runtime call by reference because they are used in the 10138 // closing of the region. 10139 auto &&BeginThenGen = [this, &D, Device, &Info, 10140 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10141 // Fill up the arrays with all the mapped variables. 10142 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10143 MappableExprsHandler::MapValuesArrayTy Pointers; 10144 MappableExprsHandler::MapValuesArrayTy Sizes; 10145 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10146 10147 // Get map clause information. 10148 MappableExprsHandler MCHandler(D, CGF); 10149 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10150 10151 // Fill up the arrays and create the arguments. 10152 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10153 10154 llvm::Value *BasePointersArrayArg = nullptr; 10155 llvm::Value *PointersArrayArg = nullptr; 10156 llvm::Value *SizesArrayArg = nullptr; 10157 llvm::Value *MapTypesArrayArg = nullptr; 10158 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10159 SizesArrayArg, MapTypesArrayArg, Info); 10160 10161 // Emit device ID if any. 10162 llvm::Value *DeviceID = nullptr; 10163 if (Device) { 10164 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10165 CGF.Int64Ty, /*isSigned=*/true); 10166 } else { 10167 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10168 } 10169 10170 // Emit the number of elements in the offloading arrays. 10171 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10172 10173 llvm::Value *OffloadingArgs[] = { 10174 DeviceID, PointerNum, BasePointersArrayArg, 10175 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10176 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10177 CGM.getModule(), OMPRTL___tgt_target_data_begin), 10178 OffloadingArgs); 10179 10180 // If device pointer privatization is required, emit the body of the region 10181 // here. It will have to be duplicated: with and without privatization. 10182 if (!Info.CaptureDeviceAddrMap.empty()) 10183 CodeGen(CGF); 10184 }; 10185 10186 // Generate code for the closing of the data region. 10187 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10188 PrePostActionTy &) { 10189 assert(Info.isValid() && "Invalid data environment closing arguments."); 10190 10191 llvm::Value *BasePointersArrayArg = nullptr; 10192 llvm::Value *PointersArrayArg = nullptr; 10193 llvm::Value *SizesArrayArg = nullptr; 10194 llvm::Value *MapTypesArrayArg = nullptr; 10195 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10196 SizesArrayArg, MapTypesArrayArg, Info); 10197 10198 // Emit device ID if any. 10199 llvm::Value *DeviceID = nullptr; 10200 if (Device) { 10201 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10202 CGF.Int64Ty, /*isSigned=*/true); 10203 } else { 10204 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10205 } 10206 10207 // Emit the number of elements in the offloading arrays. 10208 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10209 10210 llvm::Value *OffloadingArgs[] = { 10211 DeviceID, PointerNum, BasePointersArrayArg, 10212 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10213 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10214 CGM.getModule(), OMPRTL___tgt_target_data_end), 10215 OffloadingArgs); 10216 }; 10217 10218 // If we need device pointer privatization, we need to emit the body of the 10219 // region with no privatization in the 'else' branch of the conditional. 10220 // Otherwise, we don't have to do anything. 10221 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10222 PrePostActionTy &) { 10223 if (!Info.CaptureDeviceAddrMap.empty()) { 10224 CodeGen.setAction(NoPrivAction); 10225 CodeGen(CGF); 10226 } 10227 }; 10228 10229 // We don't have to do anything to close the region if the if clause evaluates 10230 // to false. 10231 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10232 10233 if (IfCond) { 10234 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10235 } else { 10236 RegionCodeGenTy RCG(BeginThenGen); 10237 RCG(CGF); 10238 } 10239 10240 // If we don't require privatization of device pointers, we emit the body in 10241 // between the runtime calls. This avoids duplicating the body code. 10242 if (Info.CaptureDeviceAddrMap.empty()) { 10243 CodeGen.setAction(NoPrivAction); 10244 CodeGen(CGF); 10245 } 10246 10247 if (IfCond) { 10248 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10249 } else { 10250 RegionCodeGenTy RCG(EndThenGen); 10251 RCG(CGF); 10252 } 10253 } 10254 10255 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10256 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10257 const Expr *Device) { 10258 if (!CGF.HaveInsertPoint()) 10259 return; 10260 10261 assert((isa<OMPTargetEnterDataDirective>(D) || 10262 isa<OMPTargetExitDataDirective>(D) || 10263 isa<OMPTargetUpdateDirective>(D)) && 10264 "Expecting either target enter, exit data, or update directives."); 10265 10266 CodeGenFunction::OMPTargetDataInfo InputInfo; 10267 llvm::Value *MapTypesArray = nullptr; 10268 // Generate the code for the opening of the data environment. 10269 auto &&ThenGen = [this, &D, Device, &InputInfo, 10270 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10271 // Emit device ID if any. 10272 llvm::Value *DeviceID = nullptr; 10273 if (Device) { 10274 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10275 CGF.Int64Ty, /*isSigned=*/true); 10276 } else { 10277 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10278 } 10279 10280 // Emit the number of elements in the offloading arrays. 10281 llvm::Constant *PointerNum = 10282 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10283 10284 llvm::Value *OffloadingArgs[] = {DeviceID, 10285 PointerNum, 10286 InputInfo.BasePointersArray.getPointer(), 10287 InputInfo.PointersArray.getPointer(), 10288 InputInfo.SizesArray.getPointer(), 10289 MapTypesArray}; 10290 10291 // Select the right runtime function call for each expected standalone 10292 // directive. 10293 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10294 RuntimeFunction RTLFn; 10295 switch (D.getDirectiveKind()) { 10296 case OMPD_target_enter_data: 10297 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait 10298 : OMPRTL___tgt_target_data_begin; 10299 break; 10300 case OMPD_target_exit_data: 10301 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait 10302 : OMPRTL___tgt_target_data_end; 10303 break; 10304 case OMPD_target_update: 10305 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait 10306 : OMPRTL___tgt_target_data_update; 10307 break; 10308 case OMPD_parallel: 10309 case OMPD_for: 10310 case OMPD_parallel_for: 10311 case OMPD_parallel_master: 10312 case OMPD_parallel_sections: 10313 case OMPD_for_simd: 10314 case OMPD_parallel_for_simd: 10315 case OMPD_cancel: 10316 case OMPD_cancellation_point: 10317 case OMPD_ordered: 10318 case OMPD_threadprivate: 10319 case OMPD_allocate: 10320 case OMPD_task: 10321 case OMPD_simd: 10322 case OMPD_sections: 10323 case OMPD_section: 10324 case OMPD_single: 10325 case OMPD_master: 10326 case OMPD_critical: 10327 case OMPD_taskyield: 10328 case OMPD_barrier: 10329 case OMPD_taskwait: 10330 case OMPD_taskgroup: 10331 case OMPD_atomic: 10332 case OMPD_flush: 10333 case OMPD_depobj: 10334 case OMPD_scan: 10335 case OMPD_teams: 10336 case OMPD_target_data: 10337 case OMPD_distribute: 10338 case OMPD_distribute_simd: 10339 case OMPD_distribute_parallel_for: 10340 case OMPD_distribute_parallel_for_simd: 10341 case OMPD_teams_distribute: 10342 case OMPD_teams_distribute_simd: 10343 case OMPD_teams_distribute_parallel_for: 10344 case OMPD_teams_distribute_parallel_for_simd: 10345 case OMPD_declare_simd: 10346 case OMPD_declare_variant: 10347 case OMPD_begin_declare_variant: 10348 case OMPD_end_declare_variant: 10349 case OMPD_declare_target: 10350 case OMPD_end_declare_target: 10351 case OMPD_declare_reduction: 10352 case OMPD_declare_mapper: 10353 case OMPD_taskloop: 10354 case OMPD_taskloop_simd: 10355 case OMPD_master_taskloop: 10356 case OMPD_master_taskloop_simd: 10357 case OMPD_parallel_master_taskloop: 10358 case OMPD_parallel_master_taskloop_simd: 10359 case OMPD_target: 10360 case OMPD_target_simd: 10361 case OMPD_target_teams_distribute: 10362 case OMPD_target_teams_distribute_simd: 10363 case OMPD_target_teams_distribute_parallel_for: 10364 case OMPD_target_teams_distribute_parallel_for_simd: 10365 case OMPD_target_teams: 10366 case OMPD_target_parallel: 10367 case OMPD_target_parallel_for: 10368 case OMPD_target_parallel_for_simd: 10369 case OMPD_requires: 10370 case OMPD_unknown: 10371 default: 10372 llvm_unreachable("Unexpected standalone target data directive."); 10373 break; 10374 } 10375 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10376 CGM.getModule(), RTLFn), 10377 OffloadingArgs); 10378 }; 10379 10380 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10381 CodeGenFunction &CGF, PrePostActionTy &) { 10382 // Fill up the arrays with all the mapped variables. 10383 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10384 MappableExprsHandler::MapValuesArrayTy Pointers; 10385 MappableExprsHandler::MapValuesArrayTy Sizes; 10386 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10387 10388 // Get map clause information. 10389 MappableExprsHandler MEHandler(D, CGF); 10390 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10391 10392 TargetDataInfo Info; 10393 // Fill up the arrays and create the arguments. 10394 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10395 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10396 Info.PointersArray, Info.SizesArray, 10397 Info.MapTypesArray, Info); 10398 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10399 InputInfo.BasePointersArray = 10400 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10401 InputInfo.PointersArray = 10402 Address(Info.PointersArray, CGM.getPointerAlign()); 10403 InputInfo.SizesArray = 10404 Address(Info.SizesArray, CGM.getPointerAlign()); 10405 MapTypesArray = Info.MapTypesArray; 10406 if (D.hasClausesOfKind<OMPDependClause>()) 10407 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10408 else 10409 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10410 }; 10411 10412 if (IfCond) { 10413 emitIfClause(CGF, IfCond, TargetThenGen, 10414 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10415 } else { 10416 RegionCodeGenTy ThenRCG(TargetThenGen); 10417 ThenRCG(CGF); 10418 } 10419 } 10420 10421 namespace { 10422 /// Kind of parameter in a function with 'declare simd' directive. 10423 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10424 /// Attribute set of the parameter. 10425 struct ParamAttrTy { 10426 ParamKindTy Kind = Vector; 10427 llvm::APSInt StrideOrArg; 10428 llvm::APSInt Alignment; 10429 }; 10430 } // namespace 10431 10432 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10433 ArrayRef<ParamAttrTy> ParamAttrs) { 10434 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10435 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10436 // of that clause. The VLEN value must be power of 2. 10437 // In other case the notion of the function`s "characteristic data type" (CDT) 10438 // is used to compute the vector length. 10439 // CDT is defined in the following order: 10440 // a) For non-void function, the CDT is the return type. 10441 // b) If the function has any non-uniform, non-linear parameters, then the 10442 // CDT is the type of the first such parameter. 10443 // c) If the CDT determined by a) or b) above is struct, union, or class 10444 // type which is pass-by-value (except for the type that maps to the 10445 // built-in complex data type), the characteristic data type is int. 10446 // d) If none of the above three cases is applicable, the CDT is int. 10447 // The VLEN is then determined based on the CDT and the size of vector 10448 // register of that ISA for which current vector version is generated. The 10449 // VLEN is computed using the formula below: 10450 // VLEN = sizeof(vector_register) / sizeof(CDT), 10451 // where vector register size specified in section 3.2.1 Registers and the 10452 // Stack Frame of original AMD64 ABI document. 10453 QualType RetType = FD->getReturnType(); 10454 if (RetType.isNull()) 10455 return 0; 10456 ASTContext &C = FD->getASTContext(); 10457 QualType CDT; 10458 if (!RetType.isNull() && !RetType->isVoidType()) { 10459 CDT = RetType; 10460 } else { 10461 unsigned Offset = 0; 10462 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10463 if (ParamAttrs[Offset].Kind == Vector) 10464 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10465 ++Offset; 10466 } 10467 if (CDT.isNull()) { 10468 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10469 if (ParamAttrs[I + Offset].Kind == Vector) { 10470 CDT = FD->getParamDecl(I)->getType(); 10471 break; 10472 } 10473 } 10474 } 10475 } 10476 if (CDT.isNull()) 10477 CDT = C.IntTy; 10478 CDT = CDT->getCanonicalTypeUnqualified(); 10479 if (CDT->isRecordType() || CDT->isUnionType()) 10480 CDT = C.IntTy; 10481 return C.getTypeSize(CDT); 10482 } 10483 10484 static void 10485 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10486 const llvm::APSInt &VLENVal, 10487 ArrayRef<ParamAttrTy> ParamAttrs, 10488 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10489 struct ISADataTy { 10490 char ISA; 10491 unsigned VecRegSize; 10492 }; 10493 ISADataTy ISAData[] = { 10494 { 10495 'b', 128 10496 }, // SSE 10497 { 10498 'c', 256 10499 }, // AVX 10500 { 10501 'd', 256 10502 }, // AVX2 10503 { 10504 'e', 512 10505 }, // AVX512 10506 }; 10507 llvm::SmallVector<char, 2> Masked; 10508 switch (State) { 10509 case OMPDeclareSimdDeclAttr::BS_Undefined: 10510 Masked.push_back('N'); 10511 Masked.push_back('M'); 10512 break; 10513 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10514 Masked.push_back('N'); 10515 break; 10516 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10517 Masked.push_back('M'); 10518 break; 10519 } 10520 for (char Mask : Masked) { 10521 for (const ISADataTy &Data : ISAData) { 10522 SmallString<256> Buffer; 10523 llvm::raw_svector_ostream Out(Buffer); 10524 Out << "_ZGV" << Data.ISA << Mask; 10525 if (!VLENVal) { 10526 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10527 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10528 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10529 } else { 10530 Out << VLENVal; 10531 } 10532 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10533 switch (ParamAttr.Kind){ 10534 case LinearWithVarStride: 10535 Out << 's' << ParamAttr.StrideOrArg; 10536 break; 10537 case Linear: 10538 Out << 'l'; 10539 if (ParamAttr.StrideOrArg != 1) 10540 Out << ParamAttr.StrideOrArg; 10541 break; 10542 case Uniform: 10543 Out << 'u'; 10544 break; 10545 case Vector: 10546 Out << 'v'; 10547 break; 10548 } 10549 if (!!ParamAttr.Alignment) 10550 Out << 'a' << ParamAttr.Alignment; 10551 } 10552 Out << '_' << Fn->getName(); 10553 Fn->addFnAttr(Out.str()); 10554 } 10555 } 10556 } 10557 10558 // This are the Functions that are needed to mangle the name of the 10559 // vector functions generated by the compiler, according to the rules 10560 // defined in the "Vector Function ABI specifications for AArch64", 10561 // available at 10562 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10563 10564 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10565 /// 10566 /// TODO: Need to implement the behavior for reference marked with a 10567 /// var or no linear modifiers (1.b in the section). For this, we 10568 /// need to extend ParamKindTy to support the linear modifiers. 10569 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10570 QT = QT.getCanonicalType(); 10571 10572 if (QT->isVoidType()) 10573 return false; 10574 10575 if (Kind == ParamKindTy::Uniform) 10576 return false; 10577 10578 if (Kind == ParamKindTy::Linear) 10579 return false; 10580 10581 // TODO: Handle linear references with modifiers 10582 10583 if (Kind == ParamKindTy::LinearWithVarStride) 10584 return false; 10585 10586 return true; 10587 } 10588 10589 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10590 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10591 QT = QT.getCanonicalType(); 10592 unsigned Size = C.getTypeSize(QT); 10593 10594 // Only scalars and complex within 16 bytes wide set PVB to true. 10595 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10596 return false; 10597 10598 if (QT->isFloatingType()) 10599 return true; 10600 10601 if (QT->isIntegerType()) 10602 return true; 10603 10604 if (QT->isPointerType()) 10605 return true; 10606 10607 // TODO: Add support for complex types (section 3.1.2, item 2). 10608 10609 return false; 10610 } 10611 10612 /// Computes the lane size (LS) of a return type or of an input parameter, 10613 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10614 /// TODO: Add support for references, section 3.2.1, item 1. 10615 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10616 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10617 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10618 if (getAArch64PBV(PTy, C)) 10619 return C.getTypeSize(PTy); 10620 } 10621 if (getAArch64PBV(QT, C)) 10622 return C.getTypeSize(QT); 10623 10624 return C.getTypeSize(C.getUIntPtrType()); 10625 } 10626 10627 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10628 // signature of the scalar function, as defined in 3.2.2 of the 10629 // AAVFABI. 10630 static std::tuple<unsigned, unsigned, bool> 10631 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10632 QualType RetType = FD->getReturnType().getCanonicalType(); 10633 10634 ASTContext &C = FD->getASTContext(); 10635 10636 bool OutputBecomesInput = false; 10637 10638 llvm::SmallVector<unsigned, 8> Sizes; 10639 if (!RetType->isVoidType()) { 10640 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10641 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10642 OutputBecomesInput = true; 10643 } 10644 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10645 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10646 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10647 } 10648 10649 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10650 // The LS of a function parameter / return value can only be a power 10651 // of 2, starting from 8 bits, up to 128. 10652 assert(std::all_of(Sizes.begin(), Sizes.end(), 10653 [](unsigned Size) { 10654 return Size == 8 || Size == 16 || Size == 32 || 10655 Size == 64 || Size == 128; 10656 }) && 10657 "Invalid size"); 10658 10659 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10660 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10661 OutputBecomesInput); 10662 } 10663 10664 /// Mangle the parameter part of the vector function name according to 10665 /// their OpenMP classification. The mangling function is defined in 10666 /// section 3.5 of the AAVFABI. 10667 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10668 SmallString<256> Buffer; 10669 llvm::raw_svector_ostream Out(Buffer); 10670 for (const auto &ParamAttr : ParamAttrs) { 10671 switch (ParamAttr.Kind) { 10672 case LinearWithVarStride: 10673 Out << "ls" << ParamAttr.StrideOrArg; 10674 break; 10675 case Linear: 10676 Out << 'l'; 10677 // Don't print the step value if it is not present or if it is 10678 // equal to 1. 10679 if (ParamAttr.StrideOrArg != 1) 10680 Out << ParamAttr.StrideOrArg; 10681 break; 10682 case Uniform: 10683 Out << 'u'; 10684 break; 10685 case Vector: 10686 Out << 'v'; 10687 break; 10688 } 10689 10690 if (!!ParamAttr.Alignment) 10691 Out << 'a' << ParamAttr.Alignment; 10692 } 10693 10694 return std::string(Out.str()); 10695 } 10696 10697 // Function used to add the attribute. The parameter `VLEN` is 10698 // templated to allow the use of "x" when targeting scalable functions 10699 // for SVE. 10700 template <typename T> 10701 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10702 char ISA, StringRef ParSeq, 10703 StringRef MangledName, bool OutputBecomesInput, 10704 llvm::Function *Fn) { 10705 SmallString<256> Buffer; 10706 llvm::raw_svector_ostream Out(Buffer); 10707 Out << Prefix << ISA << LMask << VLEN; 10708 if (OutputBecomesInput) 10709 Out << "v"; 10710 Out << ParSeq << "_" << MangledName; 10711 Fn->addFnAttr(Out.str()); 10712 } 10713 10714 // Helper function to generate the Advanced SIMD names depending on 10715 // the value of the NDS when simdlen is not present. 10716 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10717 StringRef Prefix, char ISA, 10718 StringRef ParSeq, StringRef MangledName, 10719 bool OutputBecomesInput, 10720 llvm::Function *Fn) { 10721 switch (NDS) { 10722 case 8: 10723 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10724 OutputBecomesInput, Fn); 10725 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10726 OutputBecomesInput, Fn); 10727 break; 10728 case 16: 10729 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10730 OutputBecomesInput, Fn); 10731 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10732 OutputBecomesInput, Fn); 10733 break; 10734 case 32: 10735 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10736 OutputBecomesInput, Fn); 10737 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10738 OutputBecomesInput, Fn); 10739 break; 10740 case 64: 10741 case 128: 10742 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10743 OutputBecomesInput, Fn); 10744 break; 10745 default: 10746 llvm_unreachable("Scalar type is too wide."); 10747 } 10748 } 10749 10750 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10751 static void emitAArch64DeclareSimdFunction( 10752 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10753 ArrayRef<ParamAttrTy> ParamAttrs, 10754 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10755 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10756 10757 // Get basic data for building the vector signature. 10758 const auto Data = getNDSWDS(FD, ParamAttrs); 10759 const unsigned NDS = std::get<0>(Data); 10760 const unsigned WDS = std::get<1>(Data); 10761 const bool OutputBecomesInput = std::get<2>(Data); 10762 10763 // Check the values provided via `simdlen` by the user. 10764 // 1. A `simdlen(1)` doesn't produce vector signatures, 10765 if (UserVLEN == 1) { 10766 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10767 DiagnosticsEngine::Warning, 10768 "The clause simdlen(1) has no effect when targeting aarch64."); 10769 CGM.getDiags().Report(SLoc, DiagID); 10770 return; 10771 } 10772 10773 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10774 // Advanced SIMD output. 10775 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10776 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10777 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10778 "power of 2 when targeting Advanced SIMD."); 10779 CGM.getDiags().Report(SLoc, DiagID); 10780 return; 10781 } 10782 10783 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10784 // limits. 10785 if (ISA == 's' && UserVLEN != 0) { 10786 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10787 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10788 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10789 "lanes in the architectural constraints " 10790 "for SVE (min is 128-bit, max is " 10791 "2048-bit, by steps of 128-bit)"); 10792 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10793 return; 10794 } 10795 } 10796 10797 // Sort out parameter sequence. 10798 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10799 StringRef Prefix = "_ZGV"; 10800 // Generate simdlen from user input (if any). 10801 if (UserVLEN) { 10802 if (ISA == 's') { 10803 // SVE generates only a masked function. 10804 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10805 OutputBecomesInput, Fn); 10806 } else { 10807 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10808 // Advanced SIMD generates one or two functions, depending on 10809 // the `[not]inbranch` clause. 10810 switch (State) { 10811 case OMPDeclareSimdDeclAttr::BS_Undefined: 10812 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10813 OutputBecomesInput, Fn); 10814 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10815 OutputBecomesInput, Fn); 10816 break; 10817 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10818 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10819 OutputBecomesInput, Fn); 10820 break; 10821 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10822 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10823 OutputBecomesInput, Fn); 10824 break; 10825 } 10826 } 10827 } else { 10828 // If no user simdlen is provided, follow the AAVFABI rules for 10829 // generating the vector length. 10830 if (ISA == 's') { 10831 // SVE, section 3.4.1, item 1. 10832 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10833 OutputBecomesInput, Fn); 10834 } else { 10835 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10836 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10837 // two vector names depending on the use of the clause 10838 // `[not]inbranch`. 10839 switch (State) { 10840 case OMPDeclareSimdDeclAttr::BS_Undefined: 10841 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10842 OutputBecomesInput, Fn); 10843 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10844 OutputBecomesInput, Fn); 10845 break; 10846 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10847 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10848 OutputBecomesInput, Fn); 10849 break; 10850 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10851 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10852 OutputBecomesInput, Fn); 10853 break; 10854 } 10855 } 10856 } 10857 } 10858 10859 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10860 llvm::Function *Fn) { 10861 ASTContext &C = CGM.getContext(); 10862 FD = FD->getMostRecentDecl(); 10863 // Map params to their positions in function decl. 10864 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10865 if (isa<CXXMethodDecl>(FD)) 10866 ParamPositions.try_emplace(FD, 0); 10867 unsigned ParamPos = ParamPositions.size(); 10868 for (const ParmVarDecl *P : FD->parameters()) { 10869 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10870 ++ParamPos; 10871 } 10872 while (FD) { 10873 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10874 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10875 // Mark uniform parameters. 10876 for (const Expr *E : Attr->uniforms()) { 10877 E = E->IgnoreParenImpCasts(); 10878 unsigned Pos; 10879 if (isa<CXXThisExpr>(E)) { 10880 Pos = ParamPositions[FD]; 10881 } else { 10882 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10883 ->getCanonicalDecl(); 10884 Pos = ParamPositions[PVD]; 10885 } 10886 ParamAttrs[Pos].Kind = Uniform; 10887 } 10888 // Get alignment info. 10889 auto NI = Attr->alignments_begin(); 10890 for (const Expr *E : Attr->aligneds()) { 10891 E = E->IgnoreParenImpCasts(); 10892 unsigned Pos; 10893 QualType ParmTy; 10894 if (isa<CXXThisExpr>(E)) { 10895 Pos = ParamPositions[FD]; 10896 ParmTy = E->getType(); 10897 } else { 10898 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10899 ->getCanonicalDecl(); 10900 Pos = ParamPositions[PVD]; 10901 ParmTy = PVD->getType(); 10902 } 10903 ParamAttrs[Pos].Alignment = 10904 (*NI) 10905 ? (*NI)->EvaluateKnownConstInt(C) 10906 : llvm::APSInt::getUnsigned( 10907 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10908 .getQuantity()); 10909 ++NI; 10910 } 10911 // Mark linear parameters. 10912 auto SI = Attr->steps_begin(); 10913 auto MI = Attr->modifiers_begin(); 10914 for (const Expr *E : Attr->linears()) { 10915 E = E->IgnoreParenImpCasts(); 10916 unsigned Pos; 10917 // Rescaling factor needed to compute the linear parameter 10918 // value in the mangled name. 10919 unsigned PtrRescalingFactor = 1; 10920 if (isa<CXXThisExpr>(E)) { 10921 Pos = ParamPositions[FD]; 10922 } else { 10923 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10924 ->getCanonicalDecl(); 10925 Pos = ParamPositions[PVD]; 10926 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 10927 PtrRescalingFactor = CGM.getContext() 10928 .getTypeSizeInChars(P->getPointeeType()) 10929 .getQuantity(); 10930 } 10931 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10932 ParamAttr.Kind = Linear; 10933 // Assuming a stride of 1, for `linear` without modifiers. 10934 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 10935 if (*SI) { 10936 Expr::EvalResult Result; 10937 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10938 if (const auto *DRE = 10939 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10940 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10941 ParamAttr.Kind = LinearWithVarStride; 10942 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10943 ParamPositions[StridePVD->getCanonicalDecl()]); 10944 } 10945 } 10946 } else { 10947 ParamAttr.StrideOrArg = Result.Val.getInt(); 10948 } 10949 } 10950 // If we are using a linear clause on a pointer, we need to 10951 // rescale the value of linear_step with the byte size of the 10952 // pointee type. 10953 if (Linear == ParamAttr.Kind) 10954 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 10955 ++SI; 10956 ++MI; 10957 } 10958 llvm::APSInt VLENVal; 10959 SourceLocation ExprLoc; 10960 const Expr *VLENExpr = Attr->getSimdlen(); 10961 if (VLENExpr) { 10962 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10963 ExprLoc = VLENExpr->getExprLoc(); 10964 } 10965 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10966 if (CGM.getTriple().isX86()) { 10967 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10968 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10969 unsigned VLEN = VLENVal.getExtValue(); 10970 StringRef MangledName = Fn->getName(); 10971 if (CGM.getTarget().hasFeature("sve")) 10972 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10973 MangledName, 's', 128, Fn, ExprLoc); 10974 if (CGM.getTarget().hasFeature("neon")) 10975 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10976 MangledName, 'n', 128, Fn, ExprLoc); 10977 } 10978 } 10979 FD = FD->getPreviousDecl(); 10980 } 10981 } 10982 10983 namespace { 10984 /// Cleanup action for doacross support. 10985 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10986 public: 10987 static const int DoacrossFinArgs = 2; 10988 10989 private: 10990 llvm::FunctionCallee RTLFn; 10991 llvm::Value *Args[DoacrossFinArgs]; 10992 10993 public: 10994 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10995 ArrayRef<llvm::Value *> CallArgs) 10996 : RTLFn(RTLFn) { 10997 assert(CallArgs.size() == DoacrossFinArgs); 10998 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10999 } 11000 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11001 if (!CGF.HaveInsertPoint()) 11002 return; 11003 CGF.EmitRuntimeCall(RTLFn, Args); 11004 } 11005 }; 11006 } // namespace 11007 11008 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11009 const OMPLoopDirective &D, 11010 ArrayRef<Expr *> NumIterations) { 11011 if (!CGF.HaveInsertPoint()) 11012 return; 11013 11014 ASTContext &C = CGM.getContext(); 11015 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11016 RecordDecl *RD; 11017 if (KmpDimTy.isNull()) { 11018 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11019 // kmp_int64 lo; // lower 11020 // kmp_int64 up; // upper 11021 // kmp_int64 st; // stride 11022 // }; 11023 RD = C.buildImplicitRecord("kmp_dim"); 11024 RD->startDefinition(); 11025 addFieldToRecordDecl(C, RD, Int64Ty); 11026 addFieldToRecordDecl(C, RD, Int64Ty); 11027 addFieldToRecordDecl(C, RD, Int64Ty); 11028 RD->completeDefinition(); 11029 KmpDimTy = C.getRecordType(RD); 11030 } else { 11031 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11032 } 11033 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11034 QualType ArrayTy = 11035 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11036 11037 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11038 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11039 enum { LowerFD = 0, UpperFD, StrideFD }; 11040 // Fill dims with data. 11041 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11042 LValue DimsLVal = CGF.MakeAddrLValue( 11043 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11044 // dims.upper = num_iterations; 11045 LValue UpperLVal = CGF.EmitLValueForField( 11046 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11047 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11048 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11049 Int64Ty, NumIterations[I]->getExprLoc()); 11050 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11051 // dims.stride = 1; 11052 LValue StrideLVal = CGF.EmitLValueForField( 11053 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11054 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11055 StrideLVal); 11056 } 11057 11058 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11059 // kmp_int32 num_dims, struct kmp_dim * dims); 11060 llvm::Value *Args[] = { 11061 emitUpdateLocation(CGF, D.getBeginLoc()), 11062 getThreadID(CGF, D.getBeginLoc()), 11063 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11064 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11065 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11066 CGM.VoidPtrTy)}; 11067 11068 llvm::FunctionCallee RTLFn = 11069 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11070 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11071 CGF.EmitRuntimeCall(RTLFn, Args); 11072 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11073 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11074 llvm::FunctionCallee FiniRTLFn = 11075 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11076 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11077 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11078 llvm::makeArrayRef(FiniArgs)); 11079 } 11080 11081 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11082 const OMPDependClause *C) { 11083 QualType Int64Ty = 11084 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11085 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11086 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11087 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11088 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11089 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11090 const Expr *CounterVal = C->getLoopData(I); 11091 assert(CounterVal); 11092 llvm::Value *CntVal = CGF.EmitScalarConversion( 11093 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11094 CounterVal->getExprLoc()); 11095 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11096 /*Volatile=*/false, Int64Ty); 11097 } 11098 llvm::Value *Args[] = { 11099 emitUpdateLocation(CGF, C->getBeginLoc()), 11100 getThreadID(CGF, C->getBeginLoc()), 11101 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11102 llvm::FunctionCallee RTLFn; 11103 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11104 RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11105 CGM.getModule(), OMPRTL___kmpc_doacross_post); 11106 } else { 11107 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11108 RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11109 CGM.getModule(), OMPRTL___kmpc_doacross_wait); 11110 } 11111 CGF.EmitRuntimeCall(RTLFn, Args); 11112 } 11113 11114 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11115 llvm::FunctionCallee Callee, 11116 ArrayRef<llvm::Value *> Args) const { 11117 assert(Loc.isValid() && "Outlined function call location must be valid."); 11118 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11119 11120 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11121 if (Fn->doesNotThrow()) { 11122 CGF.EmitNounwindRuntimeCall(Fn, Args); 11123 return; 11124 } 11125 } 11126 CGF.EmitRuntimeCall(Callee, Args); 11127 } 11128 11129 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11130 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11131 ArrayRef<llvm::Value *> Args) const { 11132 emitCall(CGF, Loc, OutlinedFn, Args); 11133 } 11134 11135 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11136 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11137 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11138 HasEmittedDeclareTargetRegion = true; 11139 } 11140 11141 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11142 const VarDecl *NativeParam, 11143 const VarDecl *TargetParam) const { 11144 return CGF.GetAddrOfLocalVar(NativeParam); 11145 } 11146 11147 namespace { 11148 /// Cleanup action for allocate support. 11149 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11150 public: 11151 static const int CleanupArgs = 3; 11152 11153 private: 11154 llvm::FunctionCallee RTLFn; 11155 llvm::Value *Args[CleanupArgs]; 11156 11157 public: 11158 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11159 ArrayRef<llvm::Value *> CallArgs) 11160 : RTLFn(RTLFn) { 11161 assert(CallArgs.size() == CleanupArgs && 11162 "Size of arguments does not match."); 11163 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11164 } 11165 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11166 if (!CGF.HaveInsertPoint()) 11167 return; 11168 CGF.EmitRuntimeCall(RTLFn, Args); 11169 } 11170 }; 11171 } // namespace 11172 11173 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11174 const VarDecl *VD) { 11175 if (!VD) 11176 return Address::invalid(); 11177 const VarDecl *CVD = VD->getCanonicalDecl(); 11178 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11179 return Address::invalid(); 11180 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11181 // Use the default allocation. 11182 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11183 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11184 !AA->getAllocator()) 11185 return Address::invalid(); 11186 llvm::Value *Size; 11187 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11188 if (CVD->getType()->isVariablyModifiedType()) { 11189 Size = CGF.getTypeSize(CVD->getType()); 11190 // Align the size: ((size + align - 1) / align) * align 11191 Size = CGF.Builder.CreateNUWAdd( 11192 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11193 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11194 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11195 } else { 11196 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11197 Size = CGM.getSize(Sz.alignTo(Align)); 11198 } 11199 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11200 assert(AA->getAllocator() && 11201 "Expected allocator expression for non-default allocator."); 11202 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11203 // According to the standard, the original allocator type is a enum (integer). 11204 // Convert to pointer type, if required. 11205 if (Allocator->getType()->isIntegerTy()) 11206 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11207 else if (Allocator->getType()->isPointerTy()) 11208 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11209 CGM.VoidPtrTy); 11210 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11211 11212 llvm::Value *Addr = 11213 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11214 CGM.getModule(), OMPRTL___kmpc_alloc), 11215 Args, getName({CVD->getName(), ".void.addr"})); 11216 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11217 Allocator}; 11218 llvm::FunctionCallee FiniRTLFn = 11219 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(CGM.getModule(), 11220 OMPRTL___kmpc_free); 11221 11222 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11223 llvm::makeArrayRef(FiniArgs)); 11224 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11225 Addr, 11226 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11227 getName({CVD->getName(), ".addr"})); 11228 return Address(Addr, Align); 11229 } 11230 11231 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11232 CodeGenModule &CGM, const OMPLoopDirective &S) 11233 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11234 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11235 if (!NeedToPush) 11236 return; 11237 NontemporalDeclsSet &DS = 11238 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11239 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11240 for (const Stmt *Ref : C->private_refs()) { 11241 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11242 const ValueDecl *VD; 11243 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11244 VD = DRE->getDecl(); 11245 } else { 11246 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11247 assert((ME->isImplicitCXXThis() || 11248 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11249 "Expected member of current class."); 11250 VD = ME->getMemberDecl(); 11251 } 11252 DS.insert(VD); 11253 } 11254 } 11255 } 11256 11257 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11258 if (!NeedToPush) 11259 return; 11260 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11261 } 11262 11263 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11264 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11265 11266 return llvm::any_of( 11267 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11268 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11269 } 11270 11271 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11272 const OMPExecutableDirective &S, 11273 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11274 const { 11275 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11276 // Vars in target/task regions must be excluded completely. 11277 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11278 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11279 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11280 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11281 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11282 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11283 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11284 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11285 } 11286 } 11287 // Exclude vars in private clauses. 11288 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11289 for (const Expr *Ref : C->varlists()) { 11290 if (!Ref->getType()->isScalarType()) 11291 continue; 11292 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11293 if (!DRE) 11294 continue; 11295 NeedToCheckForLPCs.insert(DRE->getDecl()); 11296 } 11297 } 11298 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11299 for (const Expr *Ref : C->varlists()) { 11300 if (!Ref->getType()->isScalarType()) 11301 continue; 11302 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11303 if (!DRE) 11304 continue; 11305 NeedToCheckForLPCs.insert(DRE->getDecl()); 11306 } 11307 } 11308 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11309 for (const Expr *Ref : C->varlists()) { 11310 if (!Ref->getType()->isScalarType()) 11311 continue; 11312 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11313 if (!DRE) 11314 continue; 11315 NeedToCheckForLPCs.insert(DRE->getDecl()); 11316 } 11317 } 11318 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11319 for (const Expr *Ref : C->varlists()) { 11320 if (!Ref->getType()->isScalarType()) 11321 continue; 11322 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11323 if (!DRE) 11324 continue; 11325 NeedToCheckForLPCs.insert(DRE->getDecl()); 11326 } 11327 } 11328 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11329 for (const Expr *Ref : C->varlists()) { 11330 if (!Ref->getType()->isScalarType()) 11331 continue; 11332 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11333 if (!DRE) 11334 continue; 11335 NeedToCheckForLPCs.insert(DRE->getDecl()); 11336 } 11337 } 11338 for (const Decl *VD : NeedToCheckForLPCs) { 11339 for (const LastprivateConditionalData &Data : 11340 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11341 if (Data.DeclToUniqueName.count(VD) > 0) { 11342 if (!Data.Disabled) 11343 NeedToAddForLPCsAsDisabled.insert(VD); 11344 break; 11345 } 11346 } 11347 } 11348 } 11349 11350 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11351 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11352 : CGM(CGF.CGM), 11353 Action((CGM.getLangOpts().OpenMP >= 50 && 11354 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11355 [](const OMPLastprivateClause *C) { 11356 return C->getKind() == 11357 OMPC_LASTPRIVATE_conditional; 11358 })) 11359 ? ActionToDo::PushAsLastprivateConditional 11360 : ActionToDo::DoNotPush) { 11361 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11362 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11363 return; 11364 assert(Action == ActionToDo::PushAsLastprivateConditional && 11365 "Expected a push action."); 11366 LastprivateConditionalData &Data = 11367 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11368 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11369 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11370 continue; 11371 11372 for (const Expr *Ref : C->varlists()) { 11373 Data.DeclToUniqueName.insert(std::make_pair( 11374 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11375 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11376 } 11377 } 11378 Data.IVLVal = IVLVal; 11379 Data.Fn = CGF.CurFn; 11380 } 11381 11382 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11383 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11384 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11385 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11386 if (CGM.getLangOpts().OpenMP < 50) 11387 return; 11388 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11389 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11390 if (!NeedToAddForLPCsAsDisabled.empty()) { 11391 Action = ActionToDo::DisableLastprivateConditional; 11392 LastprivateConditionalData &Data = 11393 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11394 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11395 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11396 Data.Fn = CGF.CurFn; 11397 Data.Disabled = true; 11398 } 11399 } 11400 11401 CGOpenMPRuntime::LastprivateConditionalRAII 11402 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11403 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11404 return LastprivateConditionalRAII(CGF, S); 11405 } 11406 11407 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11408 if (CGM.getLangOpts().OpenMP < 50) 11409 return; 11410 if (Action == ActionToDo::DisableLastprivateConditional) { 11411 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11412 "Expected list of disabled private vars."); 11413 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11414 } 11415 if (Action == ActionToDo::PushAsLastprivateConditional) { 11416 assert( 11417 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11418 "Expected list of lastprivate conditional vars."); 11419 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11420 } 11421 } 11422 11423 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11424 const VarDecl *VD) { 11425 ASTContext &C = CGM.getContext(); 11426 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11427 if (I == LastprivateConditionalToTypes.end()) 11428 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11429 QualType NewType; 11430 const FieldDecl *VDField; 11431 const FieldDecl *FiredField; 11432 LValue BaseLVal; 11433 auto VI = I->getSecond().find(VD); 11434 if (VI == I->getSecond().end()) { 11435 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11436 RD->startDefinition(); 11437 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11438 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11439 RD->completeDefinition(); 11440 NewType = C.getRecordType(RD); 11441 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11442 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11443 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11444 } else { 11445 NewType = std::get<0>(VI->getSecond()); 11446 VDField = std::get<1>(VI->getSecond()); 11447 FiredField = std::get<2>(VI->getSecond()); 11448 BaseLVal = std::get<3>(VI->getSecond()); 11449 } 11450 LValue FiredLVal = 11451 CGF.EmitLValueForField(BaseLVal, FiredField); 11452 CGF.EmitStoreOfScalar( 11453 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11454 FiredLVal); 11455 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11456 } 11457 11458 namespace { 11459 /// Checks if the lastprivate conditional variable is referenced in LHS. 11460 class LastprivateConditionalRefChecker final 11461 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11462 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11463 const Expr *FoundE = nullptr; 11464 const Decl *FoundD = nullptr; 11465 StringRef UniqueDeclName; 11466 LValue IVLVal; 11467 llvm::Function *FoundFn = nullptr; 11468 SourceLocation Loc; 11469 11470 public: 11471 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11472 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11473 llvm::reverse(LPM)) { 11474 auto It = D.DeclToUniqueName.find(E->getDecl()); 11475 if (It == D.DeclToUniqueName.end()) 11476 continue; 11477 if (D.Disabled) 11478 return false; 11479 FoundE = E; 11480 FoundD = E->getDecl()->getCanonicalDecl(); 11481 UniqueDeclName = It->second; 11482 IVLVal = D.IVLVal; 11483 FoundFn = D.Fn; 11484 break; 11485 } 11486 return FoundE == E; 11487 } 11488 bool VisitMemberExpr(const MemberExpr *E) { 11489 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11490 return false; 11491 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11492 llvm::reverse(LPM)) { 11493 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11494 if (It == D.DeclToUniqueName.end()) 11495 continue; 11496 if (D.Disabled) 11497 return false; 11498 FoundE = E; 11499 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11500 UniqueDeclName = It->second; 11501 IVLVal = D.IVLVal; 11502 FoundFn = D.Fn; 11503 break; 11504 } 11505 return FoundE == E; 11506 } 11507 bool VisitStmt(const Stmt *S) { 11508 for (const Stmt *Child : S->children()) { 11509 if (!Child) 11510 continue; 11511 if (const auto *E = dyn_cast<Expr>(Child)) 11512 if (!E->isGLValue()) 11513 continue; 11514 if (Visit(Child)) 11515 return true; 11516 } 11517 return false; 11518 } 11519 explicit LastprivateConditionalRefChecker( 11520 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11521 : LPM(LPM) {} 11522 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11523 getFoundData() const { 11524 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11525 } 11526 }; 11527 } // namespace 11528 11529 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11530 LValue IVLVal, 11531 StringRef UniqueDeclName, 11532 LValue LVal, 11533 SourceLocation Loc) { 11534 // Last updated loop counter for the lastprivate conditional var. 11535 // int<xx> last_iv = 0; 11536 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11537 llvm::Constant *LastIV = 11538 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11539 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11540 IVLVal.getAlignment().getAsAlign()); 11541 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11542 11543 // Last value of the lastprivate conditional. 11544 // decltype(priv_a) last_a; 11545 llvm::Constant *Last = getOrCreateInternalVariable( 11546 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11547 cast<llvm::GlobalVariable>(Last)->setAlignment( 11548 LVal.getAlignment().getAsAlign()); 11549 LValue LastLVal = 11550 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11551 11552 // Global loop counter. Required to handle inner parallel-for regions. 11553 // iv 11554 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11555 11556 // #pragma omp critical(a) 11557 // if (last_iv <= iv) { 11558 // last_iv = iv; 11559 // last_a = priv_a; 11560 // } 11561 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11562 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11563 Action.Enter(CGF); 11564 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11565 // (last_iv <= iv) ? Check if the variable is updated and store new 11566 // value in global var. 11567 llvm::Value *CmpRes; 11568 if (IVLVal.getType()->isSignedIntegerType()) { 11569 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11570 } else { 11571 assert(IVLVal.getType()->isUnsignedIntegerType() && 11572 "Loop iteration variable must be integer."); 11573 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11574 } 11575 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11576 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11577 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11578 // { 11579 CGF.EmitBlock(ThenBB); 11580 11581 // last_iv = iv; 11582 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11583 11584 // last_a = priv_a; 11585 switch (CGF.getEvaluationKind(LVal.getType())) { 11586 case TEK_Scalar: { 11587 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11588 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11589 break; 11590 } 11591 case TEK_Complex: { 11592 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11593 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11594 break; 11595 } 11596 case TEK_Aggregate: 11597 llvm_unreachable( 11598 "Aggregates are not supported in lastprivate conditional."); 11599 } 11600 // } 11601 CGF.EmitBranch(ExitBB); 11602 // There is no need to emit line number for unconditional branch. 11603 (void)ApplyDebugLocation::CreateEmpty(CGF); 11604 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11605 }; 11606 11607 if (CGM.getLangOpts().OpenMPSimd) { 11608 // Do not emit as a critical region as no parallel region could be emitted. 11609 RegionCodeGenTy ThenRCG(CodeGen); 11610 ThenRCG(CGF); 11611 } else { 11612 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11613 } 11614 } 11615 11616 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11617 const Expr *LHS) { 11618 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11619 return; 11620 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11621 if (!Checker.Visit(LHS)) 11622 return; 11623 const Expr *FoundE; 11624 const Decl *FoundD; 11625 StringRef UniqueDeclName; 11626 LValue IVLVal; 11627 llvm::Function *FoundFn; 11628 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11629 Checker.getFoundData(); 11630 if (FoundFn != CGF.CurFn) { 11631 // Special codegen for inner parallel regions. 11632 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11633 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11634 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11635 "Lastprivate conditional is not found in outer region."); 11636 QualType StructTy = std::get<0>(It->getSecond()); 11637 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11638 LValue PrivLVal = CGF.EmitLValue(FoundE); 11639 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11640 PrivLVal.getAddress(CGF), 11641 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11642 LValue BaseLVal = 11643 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11644 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11645 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11646 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11647 FiredLVal, llvm::AtomicOrdering::Unordered, 11648 /*IsVolatile=*/true, /*isInit=*/false); 11649 return; 11650 } 11651 11652 // Private address of the lastprivate conditional in the current context. 11653 // priv_a 11654 LValue LVal = CGF.EmitLValue(FoundE); 11655 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11656 FoundE->getExprLoc()); 11657 } 11658 11659 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11660 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11661 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11662 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11663 return; 11664 auto Range = llvm::reverse(LastprivateConditionalStack); 11665 auto It = llvm::find_if( 11666 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11667 if (It == Range.end() || It->Fn != CGF.CurFn) 11668 return; 11669 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11670 assert(LPCI != LastprivateConditionalToTypes.end() && 11671 "Lastprivates must be registered already."); 11672 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11673 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11674 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11675 for (const auto &Pair : It->DeclToUniqueName) { 11676 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11677 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11678 continue; 11679 auto I = LPCI->getSecond().find(Pair.first); 11680 assert(I != LPCI->getSecond().end() && 11681 "Lastprivate must be rehistered already."); 11682 // bool Cmp = priv_a.Fired != 0; 11683 LValue BaseLVal = std::get<3>(I->getSecond()); 11684 LValue FiredLVal = 11685 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11686 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11687 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11688 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11689 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11690 // if (Cmp) { 11691 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11692 CGF.EmitBlock(ThenBB); 11693 Address Addr = CGF.GetAddrOfLocalVar(VD); 11694 LValue LVal; 11695 if (VD->getType()->isReferenceType()) 11696 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11697 AlignmentSource::Decl); 11698 else 11699 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11700 AlignmentSource::Decl); 11701 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11702 D.getBeginLoc()); 11703 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11704 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11705 // } 11706 } 11707 } 11708 11709 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11710 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11711 SourceLocation Loc) { 11712 if (CGF.getLangOpts().OpenMP < 50) 11713 return; 11714 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11715 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11716 "Unknown lastprivate conditional variable."); 11717 StringRef UniqueName = It->second; 11718 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11719 // The variable was not updated in the region - exit. 11720 if (!GV) 11721 return; 11722 LValue LPLVal = CGF.MakeAddrLValue( 11723 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11724 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11725 CGF.EmitStoreOfScalar(Res, PrivLVal); 11726 } 11727 11728 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11729 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11730 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11731 llvm_unreachable("Not supported in SIMD-only mode"); 11732 } 11733 11734 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11735 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11736 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11737 llvm_unreachable("Not supported in SIMD-only mode"); 11738 } 11739 11740 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11741 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11742 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11743 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11744 bool Tied, unsigned &NumberOfParts) { 11745 llvm_unreachable("Not supported in SIMD-only mode"); 11746 } 11747 11748 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11749 SourceLocation Loc, 11750 llvm::Function *OutlinedFn, 11751 ArrayRef<llvm::Value *> CapturedVars, 11752 const Expr *IfCond) { 11753 llvm_unreachable("Not supported in SIMD-only mode"); 11754 } 11755 11756 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11757 CodeGenFunction &CGF, StringRef CriticalName, 11758 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11759 const Expr *Hint) { 11760 llvm_unreachable("Not supported in SIMD-only mode"); 11761 } 11762 11763 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11764 const RegionCodeGenTy &MasterOpGen, 11765 SourceLocation Loc) { 11766 llvm_unreachable("Not supported in SIMD-only mode"); 11767 } 11768 11769 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11770 SourceLocation Loc) { 11771 llvm_unreachable("Not supported in SIMD-only mode"); 11772 } 11773 11774 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11775 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11776 SourceLocation Loc) { 11777 llvm_unreachable("Not supported in SIMD-only mode"); 11778 } 11779 11780 void CGOpenMPSIMDRuntime::emitSingleRegion( 11781 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11782 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11783 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11784 ArrayRef<const Expr *> AssignmentOps) { 11785 llvm_unreachable("Not supported in SIMD-only mode"); 11786 } 11787 11788 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11789 const RegionCodeGenTy &OrderedOpGen, 11790 SourceLocation Loc, 11791 bool IsThreads) { 11792 llvm_unreachable("Not supported in SIMD-only mode"); 11793 } 11794 11795 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11796 SourceLocation Loc, 11797 OpenMPDirectiveKind Kind, 11798 bool EmitChecks, 11799 bool ForceSimpleCall) { 11800 llvm_unreachable("Not supported in SIMD-only mode"); 11801 } 11802 11803 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11804 CodeGenFunction &CGF, SourceLocation Loc, 11805 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11806 bool Ordered, const DispatchRTInput &DispatchValues) { 11807 llvm_unreachable("Not supported in SIMD-only mode"); 11808 } 11809 11810 void CGOpenMPSIMDRuntime::emitForStaticInit( 11811 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11812 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11813 llvm_unreachable("Not supported in SIMD-only mode"); 11814 } 11815 11816 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11817 CodeGenFunction &CGF, SourceLocation Loc, 11818 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11819 llvm_unreachable("Not supported in SIMD-only mode"); 11820 } 11821 11822 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11823 SourceLocation Loc, 11824 unsigned IVSize, 11825 bool IVSigned) { 11826 llvm_unreachable("Not supported in SIMD-only mode"); 11827 } 11828 11829 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11830 SourceLocation Loc, 11831 OpenMPDirectiveKind DKind) { 11832 llvm_unreachable("Not supported in SIMD-only mode"); 11833 } 11834 11835 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11836 SourceLocation Loc, 11837 unsigned IVSize, bool IVSigned, 11838 Address IL, Address LB, 11839 Address UB, Address ST) { 11840 llvm_unreachable("Not supported in SIMD-only mode"); 11841 } 11842 11843 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11844 llvm::Value *NumThreads, 11845 SourceLocation Loc) { 11846 llvm_unreachable("Not supported in SIMD-only mode"); 11847 } 11848 11849 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11850 ProcBindKind ProcBind, 11851 SourceLocation Loc) { 11852 llvm_unreachable("Not supported in SIMD-only mode"); 11853 } 11854 11855 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11856 const VarDecl *VD, 11857 Address VDAddr, 11858 SourceLocation Loc) { 11859 llvm_unreachable("Not supported in SIMD-only mode"); 11860 } 11861 11862 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11863 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11864 CodeGenFunction *CGF) { 11865 llvm_unreachable("Not supported in SIMD-only mode"); 11866 } 11867 11868 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11869 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11870 llvm_unreachable("Not supported in SIMD-only mode"); 11871 } 11872 11873 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11874 ArrayRef<const Expr *> Vars, 11875 SourceLocation Loc, 11876 llvm::AtomicOrdering AO) { 11877 llvm_unreachable("Not supported in SIMD-only mode"); 11878 } 11879 11880 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11881 const OMPExecutableDirective &D, 11882 llvm::Function *TaskFunction, 11883 QualType SharedsTy, Address Shareds, 11884 const Expr *IfCond, 11885 const OMPTaskDataTy &Data) { 11886 llvm_unreachable("Not supported in SIMD-only mode"); 11887 } 11888 11889 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11890 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11891 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11892 const Expr *IfCond, const OMPTaskDataTy &Data) { 11893 llvm_unreachable("Not supported in SIMD-only mode"); 11894 } 11895 11896 void CGOpenMPSIMDRuntime::emitReduction( 11897 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11898 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11899 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11900 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11901 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11902 ReductionOps, Options); 11903 } 11904 11905 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11906 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11907 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11908 llvm_unreachable("Not supported in SIMD-only mode"); 11909 } 11910 11911 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 11912 SourceLocation Loc, 11913 bool IsWorksharingReduction) { 11914 llvm_unreachable("Not supported in SIMD-only mode"); 11915 } 11916 11917 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11918 SourceLocation Loc, 11919 ReductionCodeGen &RCG, 11920 unsigned N) { 11921 llvm_unreachable("Not supported in SIMD-only mode"); 11922 } 11923 11924 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11925 SourceLocation Loc, 11926 llvm::Value *ReductionsPtr, 11927 LValue SharedLVal) { 11928 llvm_unreachable("Not supported in SIMD-only mode"); 11929 } 11930 11931 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11932 SourceLocation Loc) { 11933 llvm_unreachable("Not supported in SIMD-only mode"); 11934 } 11935 11936 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11937 CodeGenFunction &CGF, SourceLocation Loc, 11938 OpenMPDirectiveKind CancelRegion) { 11939 llvm_unreachable("Not supported in SIMD-only mode"); 11940 } 11941 11942 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11943 SourceLocation Loc, const Expr *IfCond, 11944 OpenMPDirectiveKind CancelRegion) { 11945 llvm_unreachable("Not supported in SIMD-only mode"); 11946 } 11947 11948 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11949 const OMPExecutableDirective &D, StringRef ParentName, 11950 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11951 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11952 llvm_unreachable("Not supported in SIMD-only mode"); 11953 } 11954 11955 void CGOpenMPSIMDRuntime::emitTargetCall( 11956 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11957 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11958 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 11959 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11960 const OMPLoopDirective &D)> 11961 SizeEmitter) { 11962 llvm_unreachable("Not supported in SIMD-only mode"); 11963 } 11964 11965 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11966 llvm_unreachable("Not supported in SIMD-only mode"); 11967 } 11968 11969 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11970 llvm_unreachable("Not supported in SIMD-only mode"); 11971 } 11972 11973 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11974 return false; 11975 } 11976 11977 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11978 const OMPExecutableDirective &D, 11979 SourceLocation Loc, 11980 llvm::Function *OutlinedFn, 11981 ArrayRef<llvm::Value *> CapturedVars) { 11982 llvm_unreachable("Not supported in SIMD-only mode"); 11983 } 11984 11985 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11986 const Expr *NumTeams, 11987 const Expr *ThreadLimit, 11988 SourceLocation Loc) { 11989 llvm_unreachable("Not supported in SIMD-only mode"); 11990 } 11991 11992 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11993 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11994 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11995 llvm_unreachable("Not supported in SIMD-only mode"); 11996 } 11997 11998 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11999 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12000 const Expr *Device) { 12001 llvm_unreachable("Not supported in SIMD-only mode"); 12002 } 12003 12004 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12005 const OMPLoopDirective &D, 12006 ArrayRef<Expr *> NumIterations) { 12007 llvm_unreachable("Not supported in SIMD-only mode"); 12008 } 12009 12010 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12011 const OMPDependClause *C) { 12012 llvm_unreachable("Not supported in SIMD-only mode"); 12013 } 12014 12015 const VarDecl * 12016 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12017 const VarDecl *NativeParam) const { 12018 llvm_unreachable("Not supported in SIMD-only mode"); 12019 } 12020 12021 Address 12022 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12023 const VarDecl *NativeParam, 12024 const VarDecl *TargetParam) const { 12025 llvm_unreachable("Not supported in SIMD-only mode"); 12026 } 12027