1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 573 /// region. 574 class CleanupTy final : public EHScopeStack::Cleanup { 575 PrePostActionTy *Action; 576 577 public: 578 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 579 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 580 if (!CGF.HaveInsertPoint()) 581 return; 582 Action->Exit(CGF); 583 } 584 }; 585 586 } // anonymous namespace 587 588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 589 CodeGenFunction::RunCleanupsScope Scope(CGF); 590 if (PrePostAction) { 591 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 592 Callback(CodeGen, CGF, *PrePostAction); 593 } else { 594 PrePostActionTy Action; 595 Callback(CodeGen, CGF, Action); 596 } 597 } 598 599 /// Check if the combiner is a call to UDR combiner and if it is so return the 600 /// UDR decl used for reduction. 601 static const OMPDeclareReductionDecl * 602 getReductionInit(const Expr *ReductionOp) { 603 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 604 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 605 if (const auto *DRE = 606 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 607 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 608 return DRD; 609 return nullptr; 610 } 611 612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 613 const OMPDeclareReductionDecl *DRD, 614 const Expr *InitOp, 615 Address Private, Address Original, 616 QualType Ty) { 617 if (DRD->getInitializer()) { 618 std::pair<llvm::Function *, llvm::Function *> Reduction = 619 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 620 const auto *CE = cast<CallExpr>(InitOp); 621 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 622 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 623 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 624 const auto *LHSDRE = 625 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 626 const auto *RHSDRE = 627 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 628 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 629 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 630 [=]() { return Private; }); 631 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 632 [=]() { return Original; }); 633 (void)PrivateScope.Privatize(); 634 RValue Func = RValue::get(Reduction.second); 635 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 636 CGF.EmitIgnoredExpr(InitOp); 637 } else { 638 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 639 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 640 auto *GV = new llvm::GlobalVariable( 641 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 642 llvm::GlobalValue::PrivateLinkage, Init, Name); 643 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 644 RValue InitRVal; 645 switch (CGF.getEvaluationKind(Ty)) { 646 case TEK_Scalar: 647 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 648 break; 649 case TEK_Complex: 650 InitRVal = 651 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 652 break; 653 case TEK_Aggregate: 654 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 655 break; 656 } 657 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 658 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 659 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 660 /*IsInitializer=*/false); 661 } 662 } 663 664 /// Emit initialization of arrays of complex types. 665 /// \param DestAddr Address of the array. 666 /// \param Type Type of array. 667 /// \param Init Initial expression of array. 668 /// \param SrcAddr Address of the original array. 669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 670 QualType Type, bool EmitDeclareReductionInit, 671 const Expr *Init, 672 const OMPDeclareReductionDecl *DRD, 673 Address SrcAddr = Address::invalid()) { 674 // Perform element-by-element initialization. 675 QualType ElementTy; 676 677 // Drill down to the base element type on both arrays. 678 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 679 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 680 DestAddr = 681 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 682 if (DRD) 683 SrcAddr = 684 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 685 686 llvm::Value *SrcBegin = nullptr; 687 if (DRD) 688 SrcBegin = SrcAddr.getPointer(); 689 llvm::Value *DestBegin = DestAddr.getPointer(); 690 // Cast from pointer to array type to pointer to single element. 691 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 692 // The basic structure here is a while-do loop. 693 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 694 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 695 llvm::Value *IsEmpty = 696 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 697 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 698 699 // Enter the loop body, making that address the current address. 700 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 701 CGF.EmitBlock(BodyBB); 702 703 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 704 705 llvm::PHINode *SrcElementPHI = nullptr; 706 Address SrcElementCurrent = Address::invalid(); 707 if (DRD) { 708 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 709 "omp.arraycpy.srcElementPast"); 710 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 711 SrcElementCurrent = 712 Address(SrcElementPHI, 713 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 714 } 715 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 716 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 717 DestElementPHI->addIncoming(DestBegin, EntryBB); 718 Address DestElementCurrent = 719 Address(DestElementPHI, 720 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 721 722 // Emit copy. 723 { 724 CodeGenFunction::RunCleanupsScope InitScope(CGF); 725 if (EmitDeclareReductionInit) { 726 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 727 SrcElementCurrent, ElementTy); 728 } else 729 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 730 /*IsInitializer=*/false); 731 } 732 733 if (DRD) { 734 // Shift the address forward by one element. 735 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 736 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 737 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 738 } 739 740 // Shift the address forward by one element. 741 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 742 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 743 // Check whether we've reached the end. 744 llvm::Value *Done = 745 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 746 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 747 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 748 749 // Done. 750 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 751 } 752 753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 754 return CGF.EmitOMPSharedLValue(E); 755 } 756 757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 758 const Expr *E) { 759 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 760 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 761 return LValue(); 762 } 763 764 void ReductionCodeGen::emitAggregateInitialization( 765 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 766 const OMPDeclareReductionDecl *DRD) { 767 // Emit VarDecl with copy init for arrays. 768 // Get the address of the original variable captured in current 769 // captured region. 770 const auto *PrivateVD = 771 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 772 bool EmitDeclareReductionInit = 773 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 774 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 775 EmitDeclareReductionInit, 776 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 777 : PrivateVD->getInit(), 778 DRD, SharedLVal.getAddress(CGF)); 779 } 780 781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 782 ArrayRef<const Expr *> Origs, 783 ArrayRef<const Expr *> Privates, 784 ArrayRef<const Expr *> ReductionOps) { 785 ClausesData.reserve(Shareds.size()); 786 SharedAddresses.reserve(Shareds.size()); 787 Sizes.reserve(Shareds.size()); 788 BaseDecls.reserve(Shareds.size()); 789 const auto *IOrig = Origs.begin(); 790 const auto *IPriv = Privates.begin(); 791 const auto *IRed = ReductionOps.begin(); 792 for (const Expr *Ref : Shareds) { 793 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 794 std::advance(IOrig, 1); 795 std::advance(IPriv, 1); 796 std::advance(IRed, 1); 797 } 798 } 799 800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 801 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 802 "Number of generated lvalues must be exactly N."); 803 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 804 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 805 SharedAddresses.emplace_back(First, Second); 806 if (ClausesData[N].Shared == ClausesData[N].Ref) { 807 OrigAddresses.emplace_back(First, Second); 808 } else { 809 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 810 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 811 OrigAddresses.emplace_back(First, Second); 812 } 813 } 814 815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 816 const auto *PrivateVD = 817 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 818 QualType PrivateType = PrivateVD->getType(); 819 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 820 if (!PrivateType->isVariablyModifiedType()) { 821 Sizes.emplace_back( 822 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 823 nullptr); 824 return; 825 } 826 llvm::Value *Size; 827 llvm::Value *SizeInChars; 828 auto *ElemType = 829 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 830 ->getElementType(); 831 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 832 if (AsArraySection) { 833 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 834 OrigAddresses[N].first.getPointer(CGF)); 835 Size = CGF.Builder.CreateNUWAdd( 836 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 837 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 838 } else { 839 SizeInChars = 840 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 841 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 842 } 843 Sizes.emplace_back(SizeInChars, Size); 844 CodeGenFunction::OpaqueValueMapping OpaqueMap( 845 CGF, 846 cast<OpaqueValueExpr>( 847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 848 RValue::get(Size)); 849 CGF.EmitVariablyModifiedType(PrivateType); 850 } 851 852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 853 llvm::Value *Size) { 854 const auto *PrivateVD = 855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 856 QualType PrivateType = PrivateVD->getType(); 857 if (!PrivateType->isVariablyModifiedType()) { 858 assert(!Size && !Sizes[N].second && 859 "Size should be nullptr for non-variably modified reduction " 860 "items."); 861 return; 862 } 863 CodeGenFunction::OpaqueValueMapping OpaqueMap( 864 CGF, 865 cast<OpaqueValueExpr>( 866 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 867 RValue::get(Size)); 868 CGF.EmitVariablyModifiedType(PrivateType); 869 } 870 871 void ReductionCodeGen::emitInitialization( 872 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 873 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 874 assert(SharedAddresses.size() > N && "No variable was generated"); 875 const auto *PrivateVD = 876 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 877 const OMPDeclareReductionDecl *DRD = 878 getReductionInit(ClausesData[N].ReductionOp); 879 QualType PrivateType = PrivateVD->getType(); 880 PrivateAddr = CGF.Builder.CreateElementBitCast( 881 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 882 QualType SharedType = SharedAddresses[N].first.getType(); 883 SharedLVal = CGF.MakeAddrLValue( 884 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 885 CGF.ConvertTypeForMem(SharedType)), 886 SharedType, SharedAddresses[N].first.getBaseInfo(), 887 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 888 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 889 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 890 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 891 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 892 PrivateAddr, SharedLVal.getAddress(CGF), 893 SharedLVal.getType()); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 const auto *PrivateVD = 904 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 905 QualType PrivateType = PrivateVD->getType(); 906 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 907 return DTorKind != QualType::DK_none; 908 } 909 910 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 911 Address PrivateAddr) { 912 const auto *PrivateVD = 913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 914 QualType PrivateType = PrivateVD->getType(); 915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 916 if (needCleanups(N)) { 917 PrivateAddr = CGF.Builder.CreateElementBitCast( 918 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 919 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 920 } 921 } 922 923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 924 LValue BaseLV) { 925 BaseTy = BaseTy.getNonReferenceType(); 926 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 927 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 928 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 929 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 930 } else { 931 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 932 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 933 } 934 BaseTy = BaseTy->getPointeeType(); 935 } 936 return CGF.MakeAddrLValue( 937 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 938 CGF.ConvertTypeForMem(ElTy)), 939 BaseLV.getType(), BaseLV.getBaseInfo(), 940 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 941 } 942 943 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 944 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 945 llvm::Value *Addr) { 946 Address Tmp = Address::invalid(); 947 Address TopTmp = Address::invalid(); 948 Address MostTopTmp = Address::invalid(); 949 BaseTy = BaseTy.getNonReferenceType(); 950 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 951 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 952 Tmp = CGF.CreateMemTemp(BaseTy); 953 if (TopTmp.isValid()) 954 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 955 else 956 MostTopTmp = Tmp; 957 TopTmp = Tmp; 958 BaseTy = BaseTy->getPointeeType(); 959 } 960 llvm::Type *Ty = BaseLVType; 961 if (Tmp.isValid()) 962 Ty = Tmp.getElementType(); 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 964 if (Tmp.isValid()) { 965 CGF.Builder.CreateStore(Addr, Tmp); 966 return MostTopTmp; 967 } 968 return Address(Addr, BaseLVAlignment); 969 } 970 971 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 972 const VarDecl *OrigVD = nullptr; 973 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 974 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 975 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 976 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 978 Base = TempASE->getBase()->IgnoreParenImpCasts(); 979 DE = cast<DeclRefExpr>(Base); 980 OrigVD = cast<VarDecl>(DE->getDecl()); 981 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 982 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 984 Base = TempASE->getBase()->IgnoreParenImpCasts(); 985 DE = cast<DeclRefExpr>(Base); 986 OrigVD = cast<VarDecl>(DE->getDecl()); 987 } 988 return OrigVD; 989 } 990 991 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 992 Address PrivateAddr) { 993 const DeclRefExpr *DE; 994 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 995 BaseDecls.emplace_back(OrigVD); 996 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 997 LValue BaseLValue = 998 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 999 OriginalBaseLValue); 1000 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1001 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1002 llvm::Value *PrivatePointer = 1003 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1004 PrivateAddr.getPointer(), 1005 SharedAddresses[N].first.getAddress(CGF).getType()); 1006 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1007 return castToBase(CGF, OrigVD->getType(), 1008 SharedAddresses[N].first.getType(), 1009 OriginalBaseLValue.getAddress(CGF).getType(), 1010 OriginalBaseLValue.getAlignment(), Ptr); 1011 } 1012 BaseDecls.emplace_back( 1013 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1014 return PrivateAddr; 1015 } 1016 1017 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1018 const OMPDeclareReductionDecl *DRD = 1019 getReductionInit(ClausesData[N].ReductionOp); 1020 return DRD && DRD->getInitializer(); 1021 } 1022 1023 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1024 return CGF.EmitLoadOfPointerLValue( 1025 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1026 getThreadIDVariable()->getType()->castAs<PointerType>()); 1027 } 1028 1029 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1030 if (!CGF.HaveInsertPoint()) 1031 return; 1032 // 1.2.2 OpenMP Language Terminology 1033 // Structured block - An executable statement with a single entry at the 1034 // top and a single exit at the bottom. 1035 // The point of exit cannot be a branch out of the structured block. 1036 // longjmp() and throw() must not violate the entry/exit criteria. 1037 CGF.EHStack.pushTerminate(); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OffloadEntriesInfoManager(CGM) { 1064 ASTContext &C = CGM.getContext(); 1065 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1066 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1067 RD->startDefinition(); 1068 // reserved_1 1069 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1070 // flags 1071 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1072 // reserved_2 1073 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1074 // reserved_3 1075 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1076 // psource 1077 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1078 RD->completeDefinition(); 1079 IdentQTy = C.getRecordType(RD); 1080 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1081 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1082 1083 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1084 llvm::omp::types::initializeTypes(CGM.getModule()); 1085 loadOffloadInfoMetadata(); 1086 } 1087 1088 void CGOpenMPRuntime::clear() { 1089 InternalVars.clear(); 1090 // Clean non-target variable declarations possibly used only in debug info. 1091 for (const auto &Data : EmittedNonTargetVariables) { 1092 if (!Data.getValue().pointsToAliveValue()) 1093 continue; 1094 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1095 if (!GV) 1096 continue; 1097 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1098 continue; 1099 GV->eraseFromParent(); 1100 } 1101 } 1102 1103 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1104 SmallString<128> Buffer; 1105 llvm::raw_svector_ostream OS(Buffer); 1106 StringRef Sep = FirstSeparator; 1107 for (StringRef Part : Parts) { 1108 OS << Sep << Part; 1109 Sep = Separator; 1110 } 1111 return std::string(OS.str()); 1112 } 1113 1114 static llvm::Function * 1115 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1116 const Expr *CombinerInitializer, const VarDecl *In, 1117 const VarDecl *Out, bool IsCombiner) { 1118 // void .omp_combiner.(Ty *in, Ty *out); 1119 ASTContext &C = CGM.getContext(); 1120 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1121 FunctionArgList Args; 1122 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1123 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1124 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1126 Args.push_back(&OmpOutParm); 1127 Args.push_back(&OmpInParm); 1128 const CGFunctionInfo &FnInfo = 1129 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1130 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1131 std::string Name = CGM.getOpenMPRuntime().getName( 1132 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1133 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1134 Name, &CGM.getModule()); 1135 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1136 if (CGM.getLangOpts().Optimize) { 1137 Fn->removeFnAttr(llvm::Attribute::NoInline); 1138 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1139 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1140 } 1141 CodeGenFunction CGF(CGM); 1142 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1143 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1144 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1145 Out->getLocation()); 1146 CodeGenFunction::OMPPrivateScope Scope(CGF); 1147 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1148 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1153 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1154 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1155 .getAddress(CGF); 1156 }); 1157 (void)Scope.Privatize(); 1158 if (!IsCombiner && Out->hasInit() && 1159 !CGF.isTrivialInitializer(Out->getInit())) { 1160 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1161 Out->getType().getQualifiers(), 1162 /*IsInitializer=*/true); 1163 } 1164 if (CombinerInitializer) 1165 CGF.EmitIgnoredExpr(CombinerInitializer); 1166 Scope.ForceCleanup(); 1167 CGF.FinishFunction(); 1168 return Fn; 1169 } 1170 1171 void CGOpenMPRuntime::emitUserDefinedReduction( 1172 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1173 if (UDRMap.count(D) > 0) 1174 return; 1175 llvm::Function *Combiner = emitCombinerOrInitializer( 1176 CGM, D->getType(), D->getCombiner(), 1177 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1178 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1179 /*IsCombiner=*/true); 1180 llvm::Function *Initializer = nullptr; 1181 if (const Expr *Init = D->getInitializer()) { 1182 Initializer = emitCombinerOrInitializer( 1183 CGM, D->getType(), 1184 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1185 : nullptr, 1186 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1187 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1188 /*IsCombiner=*/false); 1189 } 1190 UDRMap.try_emplace(D, Combiner, Initializer); 1191 if (CGF) { 1192 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1193 Decls.second.push_back(D); 1194 } 1195 } 1196 1197 std::pair<llvm::Function *, llvm::Function *> 1198 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1199 auto I = UDRMap.find(D); 1200 if (I != UDRMap.end()) 1201 return I->second; 1202 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1203 return UDRMap.lookup(D); 1204 } 1205 1206 namespace { 1207 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1208 // Builder if one is present. 1209 struct PushAndPopStackRAII { 1210 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1211 bool HasCancel) 1212 : OMPBuilder(OMPBuilder) { 1213 if (!OMPBuilder) 1214 return; 1215 1216 // The following callback is the crucial part of clangs cleanup process. 1217 // 1218 // NOTE: 1219 // Once the OpenMPIRBuilder is used to create parallel regions (and 1220 // similar), the cancellation destination (Dest below) is determined via 1221 // IP. That means if we have variables to finalize we split the block at IP, 1222 // use the new block (=BB) as destination to build a JumpDest (via 1223 // getJumpDestInCurrentScope(BB)) which then is fed to 1224 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1225 // to push & pop an FinalizationInfo object. 1226 // The FiniCB will still be needed but at the point where the 1227 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1228 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1229 assert(IP.getBlock()->end() == IP.getPoint() && 1230 "Clang CG should cause non-terminated block!"); 1231 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1232 CGF.Builder.restoreIP(IP); 1233 CodeGenFunction::JumpDest Dest = 1234 CGF.getOMPCancelDestination(OMPD_parallel); 1235 CGF.EmitBranchThroughCleanup(Dest); 1236 }; 1237 1238 // TODO: Remove this once we emit parallel regions through the 1239 // OpenMPIRBuilder as it can do this setup internally. 1240 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1241 {FiniCB, OMPD_parallel, HasCancel}); 1242 OMPBuilder->pushFinalizationCB(std::move(FI)); 1243 } 1244 ~PushAndPopStackRAII() { 1245 if (OMPBuilder) 1246 OMPBuilder->popFinalizationCB(); 1247 } 1248 llvm::OpenMPIRBuilder *OMPBuilder; 1249 }; 1250 } // namespace 1251 1252 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1253 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1254 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1255 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1256 assert(ThreadIDVar->getType()->isPointerType() && 1257 "thread id variable must be of type kmp_int32 *"); 1258 CodeGenFunction CGF(CGM, true); 1259 bool HasCancel = false; 1260 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1261 HasCancel = OPD->hasCancel(); 1262 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1263 HasCancel = OPD->hasCancel(); 1264 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1265 HasCancel = OPSD->hasCancel(); 1266 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1267 HasCancel = OPFD->hasCancel(); 1268 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 else if (const auto *OPFD = 1276 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1277 HasCancel = OPFD->hasCancel(); 1278 1279 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1280 // parallel region to make cancellation barriers work properly. 1281 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1282 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1283 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1284 HasCancel, OutlinedHelperName); 1285 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1286 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1287 } 1288 1289 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1290 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1292 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1293 return emitParallelOrTeamsOutlinedFunction( 1294 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1295 } 1296 1297 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1298 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1299 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1300 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1301 return emitParallelOrTeamsOutlinedFunction( 1302 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1303 } 1304 1305 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1306 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1307 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1308 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1309 bool Tied, unsigned &NumberOfParts) { 1310 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1311 PrePostActionTy &) { 1312 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1313 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1314 llvm::Value *TaskArgs[] = { 1315 UpLoc, ThreadID, 1316 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1317 TaskTVar->getType()->castAs<PointerType>()) 1318 .getPointer(CGF)}; 1319 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1320 CGM.getModule(), OMPRTL___kmpc_omp_task), 1321 TaskArgs); 1322 }; 1323 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1324 UntiedCodeGen); 1325 CodeGen.setAction(Action); 1326 assert(!ThreadIDVar->getType()->isPointerType() && 1327 "thread id variable must be of type kmp_int32 for tasks"); 1328 const OpenMPDirectiveKind Region = 1329 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1330 : OMPD_task; 1331 const CapturedStmt *CS = D.getCapturedStmt(Region); 1332 bool HasCancel = false; 1333 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1336 HasCancel = TD->hasCancel(); 1337 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1338 HasCancel = TD->hasCancel(); 1339 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1340 HasCancel = TD->hasCancel(); 1341 1342 CodeGenFunction CGF(CGM, true); 1343 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1344 InnermostKind, HasCancel, Action); 1345 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1346 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1347 if (!Tied) 1348 NumberOfParts = Action.getNumberOfParts(); 1349 return Res; 1350 } 1351 1352 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1353 const RecordDecl *RD, const CGRecordLayout &RL, 1354 ArrayRef<llvm::Constant *> Data) { 1355 llvm::StructType *StructTy = RL.getLLVMType(); 1356 unsigned PrevIdx = 0; 1357 ConstantInitBuilder CIBuilder(CGM); 1358 auto DI = Data.begin(); 1359 for (const FieldDecl *FD : RD->fields()) { 1360 unsigned Idx = RL.getLLVMFieldNo(FD); 1361 // Fill the alignment. 1362 for (unsigned I = PrevIdx; I < Idx; ++I) 1363 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1364 PrevIdx = Idx + 1; 1365 Fields.add(*DI); 1366 ++DI; 1367 } 1368 } 1369 1370 template <class... As> 1371 static llvm::GlobalVariable * 1372 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1373 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1374 As &&... Args) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantInitBuilder CIBuilder(CGM); 1378 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1379 buildStructValue(Fields, CGM, RD, RL, Data); 1380 return Fields.finishAndCreateGlobal( 1381 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1382 std::forward<As>(Args)...); 1383 } 1384 1385 template <typename T> 1386 static void 1387 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1388 ArrayRef<llvm::Constant *> Data, 1389 T &Parent) { 1390 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1391 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1392 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1393 buildStructValue(Fields, CGM, RD, RL, Data); 1394 Fields.finishAndAddTo(Parent); 1395 } 1396 1397 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1398 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1399 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1400 FlagsTy FlagsKey(Flags, Reserved2Flags); 1401 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1402 if (!Entry) { 1403 if (!DefaultOpenMPPSource) { 1404 // Initialize default location for psource field of ident_t structure of 1405 // all ident_t objects. Format is ";file;function;line;column;;". 1406 // Taken from 1407 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1408 DefaultOpenMPPSource = 1409 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1410 DefaultOpenMPPSource = 1411 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1412 } 1413 1414 llvm::Constant *Data[] = { 1415 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1416 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1417 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1418 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1419 llvm::GlobalValue *DefaultOpenMPLocation = 1420 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1421 llvm::GlobalValue::PrivateLinkage); 1422 DefaultOpenMPLocation->setUnnamedAddr( 1423 llvm::GlobalValue::UnnamedAddr::Global); 1424 1425 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1426 } 1427 return Address(Entry, Align); 1428 } 1429 1430 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1431 bool AtCurrentPoint) { 1432 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1433 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1434 1435 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1436 if (AtCurrentPoint) { 1437 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1438 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1439 } else { 1440 Elem.second.ServiceInsertPt = 1441 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1442 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1443 } 1444 } 1445 1446 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1447 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1448 if (Elem.second.ServiceInsertPt) { 1449 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1450 Elem.second.ServiceInsertPt = nullptr; 1451 Ptr->eraseFromParent(); 1452 } 1453 } 1454 1455 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1456 SourceLocation Loc, 1457 unsigned Flags) { 1458 Flags |= OMP_IDENT_KMPC; 1459 // If no debug info is generated - return global default location. 1460 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1461 Loc.isInvalid()) 1462 return getOrCreateDefaultLocation(Flags).getPointer(); 1463 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 1466 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1467 Address LocValue = Address::invalid(); 1468 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1469 if (I != OpenMPLocThreadIDMap.end()) 1470 LocValue = Address(I->second.DebugLoc, Align); 1471 1472 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1473 // GetOpenMPThreadID was called before this routine. 1474 if (!LocValue.isValid()) { 1475 // Generate "ident_t .kmpc_loc.addr;" 1476 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1477 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1478 Elem.second.DebugLoc = AI.getPointer(); 1479 LocValue = AI; 1480 1481 if (!Elem.second.ServiceInsertPt) 1482 setLocThreadIdInsertPt(CGF); 1483 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1484 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1485 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1486 CGF.getTypeSize(IdentQTy)); 1487 } 1488 1489 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1490 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1491 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1492 LValue PSource = 1493 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1494 1495 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1496 if (OMPDebugLoc == nullptr) { 1497 SmallString<128> Buffer2; 1498 llvm::raw_svector_ostream OS2(Buffer2); 1499 // Build debug location 1500 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1501 OS2 << ";" << PLoc.getFilename() << ";"; 1502 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1503 OS2 << FD->getQualifiedNameAsString(); 1504 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1505 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1506 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1507 } 1508 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1509 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1510 1511 // Our callers always pass this to a runtime function, so for 1512 // convenience, go ahead and return a naked pointer. 1513 return LocValue.getPointer(); 1514 } 1515 1516 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1517 SourceLocation Loc) { 1518 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1519 1520 llvm::Value *ThreadID = nullptr; 1521 // Check whether we've already cached a load of the thread id in this 1522 // function. 1523 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1524 if (I != OpenMPLocThreadIDMap.end()) { 1525 ThreadID = I->second.ThreadID; 1526 if (ThreadID != nullptr) 1527 return ThreadID; 1528 } 1529 // If exceptions are enabled, do not use parameter to avoid possible crash. 1530 if (auto *OMPRegionInfo = 1531 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1532 if (OMPRegionInfo->getThreadIDVariable()) { 1533 // Check if this an outlined function with thread id passed as argument. 1534 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1535 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1536 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1537 !CGF.getLangOpts().CXXExceptions || 1538 CGF.Builder.GetInsertBlock() == TopBlock || 1539 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1540 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1541 TopBlock || 1542 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1543 CGF.Builder.GetInsertBlock()) { 1544 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1545 // If value loaded in entry block, cache it and use it everywhere in 1546 // function. 1547 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1548 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1549 Elem.second.ThreadID = ThreadID; 1550 } 1551 return ThreadID; 1552 } 1553 } 1554 } 1555 1556 // This is not an outlined function region - need to call __kmpc_int32 1557 // kmpc_global_thread_num(ident_t *loc). 1558 // Generate thread id value and cache this value for use across the 1559 // function. 1560 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1561 if (!Elem.second.ServiceInsertPt) 1562 setLocThreadIdInsertPt(CGF); 1563 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1564 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1565 llvm::CallInst *Call = CGF.Builder.CreateCall( 1566 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1567 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1568 emitUpdateLocation(CGF, Loc)); 1569 Call->setCallingConv(CGF.getRuntimeCC()); 1570 Elem.second.ThreadID = Call; 1571 return Call; 1572 } 1573 1574 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1575 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1576 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1577 clearLocThreadIdInsertPt(CGF); 1578 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1579 } 1580 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1581 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1582 UDRMap.erase(D); 1583 FunctionUDRMap.erase(CGF.CurFn); 1584 } 1585 auto I = FunctionUDMMap.find(CGF.CurFn); 1586 if (I != FunctionUDMMap.end()) { 1587 for(const auto *D : I->second) 1588 UDMMap.erase(D); 1589 FunctionUDMMap.erase(I); 1590 } 1591 LastprivateConditionalToTypes.erase(CGF.CurFn); 1592 } 1593 1594 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1595 return IdentTy->getPointerTo(); 1596 } 1597 1598 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1599 if (!Kmpc_MicroTy) { 1600 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1601 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1602 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1603 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1604 } 1605 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1606 } 1607 1608 llvm::FunctionCallee 1609 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1610 assert((IVSize == 32 || IVSize == 64) && 1611 "IV size is not compatible with the omp runtime"); 1612 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1613 : "__kmpc_for_static_init_4u") 1614 : (IVSigned ? "__kmpc_for_static_init_8" 1615 : "__kmpc_for_static_init_8u"); 1616 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1617 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1618 llvm::Type *TypeParams[] = { 1619 getIdentTyPointerTy(), // loc 1620 CGM.Int32Ty, // tid 1621 CGM.Int32Ty, // schedtype 1622 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1623 PtrTy, // p_lower 1624 PtrTy, // p_upper 1625 PtrTy, // p_stride 1626 ITy, // incr 1627 ITy // chunk 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 llvm::FunctionCallee 1635 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1636 assert((IVSize == 32 || IVSize == 64) && 1637 "IV size is not compatible with the omp runtime"); 1638 StringRef Name = 1639 IVSize == 32 1640 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1641 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1642 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1643 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1644 CGM.Int32Ty, // tid 1645 CGM.Int32Ty, // schedtype 1646 ITy, // lower 1647 ITy, // upper 1648 ITy, // stride 1649 ITy // chunk 1650 }; 1651 auto *FnTy = 1652 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1653 return CGM.CreateRuntimeFunction(FnTy, Name); 1654 } 1655 1656 llvm::FunctionCallee 1657 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1658 assert((IVSize == 32 || IVSize == 64) && 1659 "IV size is not compatible with the omp runtime"); 1660 StringRef Name = 1661 IVSize == 32 1662 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1663 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1664 llvm::Type *TypeParams[] = { 1665 getIdentTyPointerTy(), // loc 1666 CGM.Int32Ty, // tid 1667 }; 1668 auto *FnTy = 1669 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1670 return CGM.CreateRuntimeFunction(FnTy, Name); 1671 } 1672 1673 llvm::FunctionCallee 1674 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1675 assert((IVSize == 32 || IVSize == 64) && 1676 "IV size is not compatible with the omp runtime"); 1677 StringRef Name = 1678 IVSize == 32 1679 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1680 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1681 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1682 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1683 llvm::Type *TypeParams[] = { 1684 getIdentTyPointerTy(), // loc 1685 CGM.Int32Ty, // tid 1686 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1687 PtrTy, // p_lower 1688 PtrTy, // p_upper 1689 PtrTy // p_stride 1690 }; 1691 auto *FnTy = 1692 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1693 return CGM.CreateRuntimeFunction(FnTy, Name); 1694 } 1695 1696 /// Obtain information that uniquely identifies a target entry. This 1697 /// consists of the file and device IDs as well as line number associated with 1698 /// the relevant entry source location. 1699 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1700 unsigned &DeviceID, unsigned &FileID, 1701 unsigned &LineNum) { 1702 SourceManager &SM = C.getSourceManager(); 1703 1704 // The loc should be always valid and have a file ID (the user cannot use 1705 // #pragma directives in macros) 1706 1707 assert(Loc.isValid() && "Source location is expected to be always valid."); 1708 1709 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1710 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1711 1712 llvm::sys::fs::UniqueID ID; 1713 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1714 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1715 << PLoc.getFilename() << EC.message(); 1716 1717 DeviceID = ID.getDevice(); 1718 FileID = ID.getFile(); 1719 LineNum = PLoc.getLine(); 1720 } 1721 1722 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1723 if (CGM.getLangOpts().OpenMPSimd) 1724 return Address::invalid(); 1725 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1726 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1727 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1728 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1729 HasRequiresUnifiedSharedMemory))) { 1730 SmallString<64> PtrName; 1731 { 1732 llvm::raw_svector_ostream OS(PtrName); 1733 OS << CGM.getMangledName(GlobalDecl(VD)); 1734 if (!VD->isExternallyVisible()) { 1735 unsigned DeviceID, FileID, Line; 1736 getTargetEntryUniqueInfo(CGM.getContext(), 1737 VD->getCanonicalDecl()->getBeginLoc(), 1738 DeviceID, FileID, Line); 1739 OS << llvm::format("_%x", FileID); 1740 } 1741 OS << "_decl_tgt_ref_ptr"; 1742 } 1743 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1744 if (!Ptr) { 1745 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1746 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1747 PtrName); 1748 1749 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1750 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1751 1752 if (!CGM.getLangOpts().OpenMPIsDevice) 1753 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1754 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1755 } 1756 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1757 } 1758 return Address::invalid(); 1759 } 1760 1761 llvm::Constant * 1762 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1763 assert(!CGM.getLangOpts().OpenMPUseTLS || 1764 !CGM.getContext().getTargetInfo().isTLSSupported()); 1765 // Lookup the entry, lazily creating it if necessary. 1766 std::string Suffix = getName({"cache", ""}); 1767 return getOrCreateInternalVariable( 1768 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1769 } 1770 1771 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1772 const VarDecl *VD, 1773 Address VDAddr, 1774 SourceLocation Loc) { 1775 if (CGM.getLangOpts().OpenMPUseTLS && 1776 CGM.getContext().getTargetInfo().isTLSSupported()) 1777 return VDAddr; 1778 1779 llvm::Type *VarTy = VDAddr.getElementType(); 1780 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1781 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1782 CGM.Int8PtrTy), 1783 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1784 getOrCreateThreadPrivateCache(VD)}; 1785 return Address(CGF.EmitRuntimeCall( 1786 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1787 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1788 Args), 1789 VDAddr.getAlignment()); 1790 } 1791 1792 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1793 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1794 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1795 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1796 // library. 1797 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1798 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1799 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1800 OMPLoc); 1801 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1802 // to register constructor/destructor for variable. 1803 llvm::Value *Args[] = { 1804 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1805 Ctor, CopyCtor, Dtor}; 1806 CGF.EmitRuntimeCall( 1807 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 1808 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1809 Args); 1810 } 1811 1812 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1813 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1814 bool PerformInit, CodeGenFunction *CGF) { 1815 if (CGM.getLangOpts().OpenMPUseTLS && 1816 CGM.getContext().getTargetInfo().isTLSSupported()) 1817 return nullptr; 1818 1819 VD = VD->getDefinition(CGM.getContext()); 1820 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1821 QualType ASTTy = VD->getType(); 1822 1823 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1824 const Expr *Init = VD->getAnyInitializer(); 1825 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1826 // Generate function that re-emits the declaration's initializer into the 1827 // threadprivate copy of the variable VD 1828 CodeGenFunction CtorCGF(CGM); 1829 FunctionArgList Args; 1830 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1831 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1832 ImplicitParamDecl::Other); 1833 Args.push_back(&Dst); 1834 1835 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1836 CGM.getContext().VoidPtrTy, Args); 1837 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1838 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1839 llvm::Function *Fn = 1840 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1841 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1842 Args, Loc, Loc); 1843 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1844 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1845 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1846 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1847 Arg = CtorCGF.Builder.CreateElementBitCast( 1848 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1849 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1850 /*IsInitializer=*/true); 1851 ArgVal = CtorCGF.EmitLoadOfScalar( 1852 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1853 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1854 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1855 CtorCGF.FinishFunction(); 1856 Ctor = Fn; 1857 } 1858 if (VD->getType().isDestructedType() != QualType::DK_none) { 1859 // Generate function that emits destructor call for the threadprivate copy 1860 // of the variable VD 1861 CodeGenFunction DtorCGF(CGM); 1862 FunctionArgList Args; 1863 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1864 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1865 ImplicitParamDecl::Other); 1866 Args.push_back(&Dst); 1867 1868 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1869 CGM.getContext().VoidTy, Args); 1870 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1871 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1872 llvm::Function *Fn = 1873 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1874 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1875 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1876 Loc, Loc); 1877 // Create a scope with an artificial location for the body of this function. 1878 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1879 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1880 DtorCGF.GetAddrOfLocalVar(&Dst), 1881 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1882 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1883 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1884 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1885 DtorCGF.FinishFunction(); 1886 Dtor = Fn; 1887 } 1888 // Do not emit init function if it is not required. 1889 if (!Ctor && !Dtor) 1890 return nullptr; 1891 1892 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1893 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1894 /*isVarArg=*/false) 1895 ->getPointerTo(); 1896 // Copying constructor for the threadprivate variable. 1897 // Must be NULL - reserved by runtime, but currently it requires that this 1898 // parameter is always NULL. Otherwise it fires assertion. 1899 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1900 if (Ctor == nullptr) { 1901 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1902 /*isVarArg=*/false) 1903 ->getPointerTo(); 1904 Ctor = llvm::Constant::getNullValue(CtorTy); 1905 } 1906 if (Dtor == nullptr) { 1907 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1908 /*isVarArg=*/false) 1909 ->getPointerTo(); 1910 Dtor = llvm::Constant::getNullValue(DtorTy); 1911 } 1912 if (!CGF) { 1913 auto *InitFunctionTy = 1914 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1915 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1916 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1917 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1918 CodeGenFunction InitCGF(CGM); 1919 FunctionArgList ArgList; 1920 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1921 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1922 Loc, Loc); 1923 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1924 InitCGF.FinishFunction(); 1925 return InitFunction; 1926 } 1927 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1928 } 1929 return nullptr; 1930 } 1931 1932 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1933 llvm::GlobalVariable *Addr, 1934 bool PerformInit) { 1935 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1936 !CGM.getLangOpts().OpenMPIsDevice) 1937 return false; 1938 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1939 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1940 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1941 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1942 HasRequiresUnifiedSharedMemory)) 1943 return CGM.getLangOpts().OpenMPIsDevice; 1944 VD = VD->getDefinition(CGM.getContext()); 1945 assert(VD && "Unknown VarDecl"); 1946 1947 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1948 return CGM.getLangOpts().OpenMPIsDevice; 1949 1950 QualType ASTTy = VD->getType(); 1951 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1952 1953 // Produce the unique prefix to identify the new target regions. We use 1954 // the source location of the variable declaration which we know to not 1955 // conflict with any target region. 1956 unsigned DeviceID; 1957 unsigned FileID; 1958 unsigned Line; 1959 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1960 SmallString<128> Buffer, Out; 1961 { 1962 llvm::raw_svector_ostream OS(Buffer); 1963 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1964 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1965 } 1966 1967 const Expr *Init = VD->getAnyInitializer(); 1968 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1969 llvm::Constant *Ctor; 1970 llvm::Constant *ID; 1971 if (CGM.getLangOpts().OpenMPIsDevice) { 1972 // Generate function that re-emits the declaration's initializer into 1973 // the threadprivate copy of the variable VD 1974 CodeGenFunction CtorCGF(CGM); 1975 1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1978 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1979 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1980 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1981 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1982 FunctionArgList(), Loc, Loc); 1983 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1984 CtorCGF.EmitAnyExprToMem(Init, 1985 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1986 Init->getType().getQualifiers(), 1987 /*IsInitializer=*/true); 1988 CtorCGF.FinishFunction(); 1989 Ctor = Fn; 1990 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1991 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1992 } else { 1993 Ctor = new llvm::GlobalVariable( 1994 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1995 llvm::GlobalValue::PrivateLinkage, 1996 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1997 ID = Ctor; 1998 } 1999 2000 // Register the information for the entry associated with the constructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2005 } 2006 if (VD->getType().isDestructedType() != QualType::DK_none) { 2007 llvm::Constant *Dtor; 2008 llvm::Constant *ID; 2009 if (CGM.getLangOpts().OpenMPIsDevice) { 2010 // Generate function that emits destructor call for the threadprivate 2011 // copy of the variable VD 2012 CodeGenFunction DtorCGF(CGM); 2013 2014 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2015 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2016 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 2017 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2018 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2019 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2020 FunctionArgList(), Loc, Loc); 2021 // Create a scope with an artificial location for the body of this 2022 // function. 2023 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2024 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2025 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2026 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2027 DtorCGF.FinishFunction(); 2028 Dtor = Fn; 2029 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2030 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2031 } else { 2032 Dtor = new llvm::GlobalVariable( 2033 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2034 llvm::GlobalValue::PrivateLinkage, 2035 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2036 ID = Dtor; 2037 } 2038 // Register the information for the entry associated with the destructor. 2039 Out.clear(); 2040 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2041 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2042 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2043 } 2044 return CGM.getLangOpts().OpenMPIsDevice; 2045 } 2046 2047 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2048 QualType VarType, 2049 StringRef Name) { 2050 std::string Suffix = getName({"artificial", ""}); 2051 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2052 llvm::Value *GAddr = 2053 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2054 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2055 CGM.getTarget().isTLSSupported()) { 2056 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2057 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2058 } 2059 std::string CacheSuffix = getName({"cache", ""}); 2060 llvm::Value *Args[] = { 2061 emitUpdateLocation(CGF, SourceLocation()), 2062 getThreadID(CGF, SourceLocation()), 2063 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2064 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2065 /*isSigned=*/false), 2066 getOrCreateInternalVariable( 2067 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2068 return Address( 2069 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2070 CGF.EmitRuntimeCall( 2071 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2072 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2073 Args), 2074 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2075 CGM.getContext().getTypeAlignInChars(VarType)); 2076 } 2077 2078 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2079 const RegionCodeGenTy &ThenGen, 2080 const RegionCodeGenTy &ElseGen) { 2081 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2082 2083 // If the condition constant folds and can be elided, try to avoid emitting 2084 // the condition and the dead arm of the if/else. 2085 bool CondConstant; 2086 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2087 if (CondConstant) 2088 ThenGen(CGF); 2089 else 2090 ElseGen(CGF); 2091 return; 2092 } 2093 2094 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2095 // emit the conditional branch. 2096 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2097 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2098 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2099 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2100 2101 // Emit the 'then' code. 2102 CGF.EmitBlock(ThenBlock); 2103 ThenGen(CGF); 2104 CGF.EmitBranch(ContBlock); 2105 // Emit the 'else' code if present. 2106 // There is no need to emit line number for unconditional branch. 2107 (void)ApplyDebugLocation::CreateEmpty(CGF); 2108 CGF.EmitBlock(ElseBlock); 2109 ElseGen(CGF); 2110 // There is no need to emit line number for unconditional branch. 2111 (void)ApplyDebugLocation::CreateEmpty(CGF); 2112 CGF.EmitBranch(ContBlock); 2113 // Emit the continuation block for code after the if. 2114 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2115 } 2116 2117 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2118 llvm::Function *OutlinedFn, 2119 ArrayRef<llvm::Value *> CapturedVars, 2120 const Expr *IfCond) { 2121 if (!CGF.HaveInsertPoint()) 2122 return; 2123 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2124 auto &M = CGM.getModule(); 2125 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2126 PrePostActionTy &) { 2127 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2128 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2129 llvm::Value *Args[] = { 2130 RTLoc, 2131 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2132 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2133 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2134 RealArgs.append(std::begin(Args), std::end(Args)); 2135 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2136 2137 llvm::FunctionCallee RTLFn = 2138 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2139 M, OMPRTL___kmpc_fork_call); 2140 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2141 }; 2142 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2143 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 2144 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2145 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2146 // Build calls: 2147 // __kmpc_serialized_parallel(&Loc, GTid); 2148 llvm::Value *Args[] = {RTLoc, ThreadID}; 2149 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2150 M, OMPRTL___kmpc_serialized_parallel), 2151 Args); 2152 2153 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2154 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2155 Address ZeroAddrBound = 2156 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2157 /*Name=*/".bound.zero.addr"); 2158 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2159 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2160 // ThreadId for serialized parallels is 0. 2161 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2162 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2163 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2164 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2165 2166 // __kmpc_end_serialized_parallel(&Loc, GTid); 2167 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2168 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2169 M, OMPRTL___kmpc_end_serialized_parallel), 2170 EndArgs); 2171 }; 2172 if (IfCond) { 2173 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2174 } else { 2175 RegionCodeGenTy ThenRCG(ThenGen); 2176 ThenRCG(CGF); 2177 } 2178 } 2179 2180 // If we're inside an (outlined) parallel region, use the region info's 2181 // thread-ID variable (it is passed in a first argument of the outlined function 2182 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2183 // regular serial code region, get thread ID by calling kmp_int32 2184 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2185 // return the address of that temp. 2186 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2187 SourceLocation Loc) { 2188 if (auto *OMPRegionInfo = 2189 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2190 if (OMPRegionInfo->getThreadIDVariable()) 2191 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2192 2193 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2194 QualType Int32Ty = 2195 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2196 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2197 CGF.EmitStoreOfScalar(ThreadID, 2198 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2199 2200 return ThreadIDTemp; 2201 } 2202 2203 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2204 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2205 SmallString<256> Buffer; 2206 llvm::raw_svector_ostream Out(Buffer); 2207 Out << Name; 2208 StringRef RuntimeName = Out.str(); 2209 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2210 if (Elem.second) { 2211 assert(Elem.second->getType()->getPointerElementType() == Ty && 2212 "OMP internal variable has different type than requested"); 2213 return &*Elem.second; 2214 } 2215 2216 return Elem.second = new llvm::GlobalVariable( 2217 CGM.getModule(), Ty, /*IsConstant*/ false, 2218 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2219 Elem.first(), /*InsertBefore=*/nullptr, 2220 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2221 } 2222 2223 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2224 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2225 std::string Name = getName({Prefix, "var"}); 2226 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2227 } 2228 2229 namespace { 2230 /// Common pre(post)-action for different OpenMP constructs. 2231 class CommonActionTy final : public PrePostActionTy { 2232 llvm::FunctionCallee EnterCallee; 2233 ArrayRef<llvm::Value *> EnterArgs; 2234 llvm::FunctionCallee ExitCallee; 2235 ArrayRef<llvm::Value *> ExitArgs; 2236 bool Conditional; 2237 llvm::BasicBlock *ContBlock = nullptr; 2238 2239 public: 2240 CommonActionTy(llvm::FunctionCallee EnterCallee, 2241 ArrayRef<llvm::Value *> EnterArgs, 2242 llvm::FunctionCallee ExitCallee, 2243 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2244 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2245 ExitArgs(ExitArgs), Conditional(Conditional) {} 2246 void Enter(CodeGenFunction &CGF) override { 2247 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2248 if (Conditional) { 2249 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2250 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2251 ContBlock = CGF.createBasicBlock("omp_if.end"); 2252 // Generate the branch (If-stmt) 2253 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2254 CGF.EmitBlock(ThenBlock); 2255 } 2256 } 2257 void Done(CodeGenFunction &CGF) { 2258 // Emit the rest of blocks/branches 2259 CGF.EmitBranch(ContBlock); 2260 CGF.EmitBlock(ContBlock, true); 2261 } 2262 void Exit(CodeGenFunction &CGF) override { 2263 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2264 } 2265 }; 2266 } // anonymous namespace 2267 2268 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2269 StringRef CriticalName, 2270 const RegionCodeGenTy &CriticalOpGen, 2271 SourceLocation Loc, const Expr *Hint) { 2272 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2273 // CriticalOpGen(); 2274 // __kmpc_end_critical(ident_t *, gtid, Lock); 2275 // Prepare arguments and build a call to __kmpc_critical 2276 if (!CGF.HaveInsertPoint()) 2277 return; 2278 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2279 getCriticalRegionLock(CriticalName)}; 2280 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2281 std::end(Args)); 2282 if (Hint) { 2283 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2284 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2285 } 2286 CommonActionTy Action( 2287 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2288 CGM.getModule(), 2289 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2290 EnterArgs, 2291 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_critical), 2293 Args); 2294 CriticalOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2296 } 2297 2298 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2299 const RegionCodeGenTy &MasterOpGen, 2300 SourceLocation Loc) { 2301 if (!CGF.HaveInsertPoint()) 2302 return; 2303 // if(__kmpc_master(ident_t *, gtid)) { 2304 // MasterOpGen(); 2305 // __kmpc_end_master(ident_t *, gtid); 2306 // } 2307 // Prepare arguments and build a call to __kmpc_master 2308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2309 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2310 CGM.getModule(), OMPRTL___kmpc_master), 2311 Args, 2312 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2313 CGM.getModule(), OMPRTL___kmpc_end_master), 2314 Args, 2315 /*Conditional=*/true); 2316 MasterOpGen.setAction(Action); 2317 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2318 Action.Done(CGF); 2319 } 2320 2321 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2322 SourceLocation Loc) { 2323 if (!CGF.HaveInsertPoint()) 2324 return; 2325 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2326 if (OMPBuilder) { 2327 OMPBuilder->CreateTaskyield(CGF.Builder); 2328 } else { 2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2330 llvm::Value *Args[] = { 2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2333 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2335 Args); 2336 } 2337 2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2339 Region->emitUntiedSwitch(CGF); 2340 } 2341 2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &TaskgroupOpGen, 2344 SourceLocation Loc) { 2345 if (!CGF.HaveInsertPoint()) 2346 return; 2347 // __kmpc_taskgroup(ident_t *, gtid); 2348 // TaskgroupOpGen(); 2349 // __kmpc_end_taskgroup(ident_t *, gtid); 2350 // Prepare arguments and build a call to __kmpc_taskgroup 2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2352 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2353 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2354 Args, 2355 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2357 Args); 2358 TaskgroupOpGen.setAction(Action); 2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2360 } 2361 2362 /// Given an array of pointers to variables, project the address of a 2363 /// given variable. 2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2365 unsigned Index, const VarDecl *Var) { 2366 // Pull out the pointer to the variable. 2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2369 2370 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2371 Addr = CGF.Builder.CreateElementBitCast( 2372 Addr, CGF.ConvertTypeForMem(Var->getType())); 2373 return Addr; 2374 } 2375 2376 static llvm::Value *emitCopyprivateCopyFunction( 2377 CodeGenModule &CGM, llvm::Type *ArgsType, 2378 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2379 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2380 SourceLocation Loc) { 2381 ASTContext &C = CGM.getContext(); 2382 // void copy_func(void *LHSArg, void *RHSArg); 2383 FunctionArgList Args; 2384 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2387 ImplicitParamDecl::Other); 2388 Args.push_back(&LHSArg); 2389 Args.push_back(&RHSArg); 2390 const auto &CGFI = 2391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2392 std::string Name = 2393 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2395 llvm::GlobalValue::InternalLinkage, Name, 2396 &CGM.getModule()); 2397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2398 Fn->setDoesNotRecurse(); 2399 CodeGenFunction CGF(CGM); 2400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2401 // Dest = (void*[n])(LHSArg); 2402 // Src = (void*[n])(RHSArg); 2403 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2405 ArgsType), CGF.getPointerAlign()); 2406 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2410 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2411 // ... 2412 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2414 const auto *DestVar = 2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2417 2418 const auto *SrcVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2421 2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2423 QualType Type = VD->getType(); 2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2425 } 2426 CGF.FinishFunction(); 2427 return Fn; 2428 } 2429 2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2431 const RegionCodeGenTy &SingleOpGen, 2432 SourceLocation Loc, 2433 ArrayRef<const Expr *> CopyprivateVars, 2434 ArrayRef<const Expr *> SrcExprs, 2435 ArrayRef<const Expr *> DstExprs, 2436 ArrayRef<const Expr *> AssignmentOps) { 2437 if (!CGF.HaveInsertPoint()) 2438 return; 2439 assert(CopyprivateVars.size() == SrcExprs.size() && 2440 CopyprivateVars.size() == DstExprs.size() && 2441 CopyprivateVars.size() == AssignmentOps.size()); 2442 ASTContext &C = CGM.getContext(); 2443 // int32 did_it = 0; 2444 // if(__kmpc_single(ident_t *, gtid)) { 2445 // SingleOpGen(); 2446 // __kmpc_end_single(ident_t *, gtid); 2447 // did_it = 1; 2448 // } 2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2450 // <copy_func>, did_it); 2451 2452 Address DidIt = Address::invalid(); 2453 if (!CopyprivateVars.empty()) { 2454 // int32 did_it = 0; 2455 QualType KmpInt32Ty = 2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2459 } 2460 // Prepare arguments and build a call to __kmpc_single 2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2462 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2463 CGM.getModule(), OMPRTL___kmpc_single), 2464 Args, 2465 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_end_single), 2467 Args, 2468 /*Conditional=*/true); 2469 SingleOpGen.setAction(Action); 2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2471 if (DidIt.isValid()) { 2472 // did_it = 1; 2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2474 } 2475 Action.Done(CGF); 2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2477 // <copy_func>, did_it); 2478 if (DidIt.isValid()) { 2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2480 QualType CopyprivateArrayTy = C.getConstantArrayType( 2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2482 /*IndexTypeQuals=*/0); 2483 // Create a list of all private variables for copyprivate. 2484 Address CopyprivateList = 2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2488 CGF.Builder.CreateStore( 2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2491 CGF.VoidPtrTy), 2492 Elem); 2493 } 2494 // Build function that copies private values from single region to all other 2495 // threads in the corresponding parallel region. 2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2500 Address CL = 2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2502 CGF.VoidPtrTy); 2503 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2504 llvm::Value *Args[] = { 2505 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2506 getThreadID(CGF, Loc), // i32 <gtid> 2507 BufSize, // size_t <buf_size> 2508 CL.getPointer(), // void *<copyprivate list> 2509 CpyFn, // void (*) (void *, void *) <copy_func> 2510 DidItVal // i32 did_it 2511 }; 2512 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2513 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2514 Args); 2515 } 2516 } 2517 2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2519 const RegionCodeGenTy &OrderedOpGen, 2520 SourceLocation Loc, bool IsThreads) { 2521 if (!CGF.HaveInsertPoint()) 2522 return; 2523 // __kmpc_ordered(ident_t *, gtid); 2524 // OrderedOpGen(); 2525 // __kmpc_end_ordered(ident_t *, gtid); 2526 // Prepare arguments and build a call to __kmpc_ordered 2527 if (IsThreads) { 2528 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2529 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2530 CGM.getModule(), OMPRTL___kmpc_ordered), 2531 Args, 2532 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2534 Args); 2535 OrderedOpGen.setAction(Action); 2536 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2537 return; 2538 } 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 } 2541 2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2543 unsigned Flags; 2544 if (Kind == OMPD_for) 2545 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2546 else if (Kind == OMPD_sections) 2547 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2548 else if (Kind == OMPD_single) 2549 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2550 else if (Kind == OMPD_barrier) 2551 Flags = OMP_IDENT_BARRIER_EXPL; 2552 else 2553 Flags = OMP_IDENT_BARRIER_IMPL; 2554 return Flags; 2555 } 2556 2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2558 CodeGenFunction &CGF, const OMPLoopDirective &S, 2559 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2560 // Check if the loop directive is actually a doacross loop directive. In this 2561 // case choose static, 1 schedule. 2562 if (llvm::any_of( 2563 S.getClausesOfKind<OMPOrderedClause>(), 2564 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2565 ScheduleKind = OMPC_SCHEDULE_static; 2566 // Chunk size is 1 in this case. 2567 llvm::APInt ChunkSize(32, 1); 2568 ChunkExpr = IntegerLiteral::Create( 2569 CGF.getContext(), ChunkSize, 2570 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2571 SourceLocation()); 2572 } 2573 } 2574 2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2576 OpenMPDirectiveKind Kind, bool EmitChecks, 2577 bool ForceSimpleCall) { 2578 // Check if we should use the OMPBuilder 2579 auto *OMPRegionInfo = 2580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2581 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2582 if (OMPBuilder) { 2583 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 2584 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2585 return; 2586 } 2587 2588 if (!CGF.HaveInsertPoint()) 2589 return; 2590 // Build call __kmpc_cancel_barrier(loc, thread_id); 2591 // Build call __kmpc_barrier(loc, thread_id); 2592 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2593 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2594 // thread_id); 2595 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2596 getThreadID(CGF, Loc)}; 2597 if (OMPRegionInfo) { 2598 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2599 llvm::Value *Result = CGF.EmitRuntimeCall( 2600 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2601 CGM.getModule(), OMPRTL___kmpc_cancel_barrier), 2602 Args); 2603 if (EmitChecks) { 2604 // if (__kmpc_cancel_barrier()) { 2605 // exit from construct; 2606 // } 2607 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2608 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2609 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2610 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2611 CGF.EmitBlock(ExitBB); 2612 // exit from construct; 2613 CodeGenFunction::JumpDest CancelDestination = 2614 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2615 CGF.EmitBranchThroughCleanup(CancelDestination); 2616 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2617 } 2618 return; 2619 } 2620 } 2621 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2622 CGM.getModule(), OMPRTL___kmpc_barrier), 2623 Args); 2624 } 2625 2626 /// Map the OpenMP loop schedule to the runtime enumeration. 2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2628 bool Chunked, bool Ordered) { 2629 switch (ScheduleKind) { 2630 case OMPC_SCHEDULE_static: 2631 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2632 : (Ordered ? OMP_ord_static : OMP_sch_static); 2633 case OMPC_SCHEDULE_dynamic: 2634 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2635 case OMPC_SCHEDULE_guided: 2636 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2637 case OMPC_SCHEDULE_runtime: 2638 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2639 case OMPC_SCHEDULE_auto: 2640 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2641 case OMPC_SCHEDULE_unknown: 2642 assert(!Chunked && "chunk was specified but schedule kind not known"); 2643 return Ordered ? OMP_ord_static : OMP_sch_static; 2644 } 2645 llvm_unreachable("Unexpected runtime schedule"); 2646 } 2647 2648 /// Map the OpenMP distribute schedule to the runtime enumeration. 2649 static OpenMPSchedType 2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2651 // only static is allowed for dist_schedule 2652 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2653 } 2654 2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2656 bool Chunked) const { 2657 OpenMPSchedType Schedule = 2658 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2659 return Schedule == OMP_sch_static; 2660 } 2661 2662 bool CGOpenMPRuntime::isStaticNonchunked( 2663 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2664 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2665 return Schedule == OMP_dist_sch_static; 2666 } 2667 2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2669 bool Chunked) const { 2670 OpenMPSchedType Schedule = 2671 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2672 return Schedule == OMP_sch_static_chunked; 2673 } 2674 2675 bool CGOpenMPRuntime::isStaticChunked( 2676 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2677 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2678 return Schedule == OMP_dist_sch_static_chunked; 2679 } 2680 2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2682 OpenMPSchedType Schedule = 2683 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2684 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2685 return Schedule != OMP_sch_static; 2686 } 2687 2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2689 OpenMPScheduleClauseModifier M1, 2690 OpenMPScheduleClauseModifier M2) { 2691 int Modifier = 0; 2692 switch (M1) { 2693 case OMPC_SCHEDULE_MODIFIER_monotonic: 2694 Modifier = OMP_sch_modifier_monotonic; 2695 break; 2696 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2697 Modifier = OMP_sch_modifier_nonmonotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_simd: 2700 if (Schedule == OMP_sch_static_chunked) 2701 Schedule = OMP_sch_static_balanced_chunked; 2702 break; 2703 case OMPC_SCHEDULE_MODIFIER_last: 2704 case OMPC_SCHEDULE_MODIFIER_unknown: 2705 break; 2706 } 2707 switch (M2) { 2708 case OMPC_SCHEDULE_MODIFIER_monotonic: 2709 Modifier = OMP_sch_modifier_monotonic; 2710 break; 2711 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2712 Modifier = OMP_sch_modifier_nonmonotonic; 2713 break; 2714 case OMPC_SCHEDULE_MODIFIER_simd: 2715 if (Schedule == OMP_sch_static_chunked) 2716 Schedule = OMP_sch_static_balanced_chunked; 2717 break; 2718 case OMPC_SCHEDULE_MODIFIER_last: 2719 case OMPC_SCHEDULE_MODIFIER_unknown: 2720 break; 2721 } 2722 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2723 // If the static schedule kind is specified or if the ordered clause is 2724 // specified, and if the nonmonotonic modifier is not specified, the effect is 2725 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2726 // modifier is specified, the effect is as if the nonmonotonic modifier is 2727 // specified. 2728 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2729 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2730 Schedule == OMP_sch_static_balanced_chunked || 2731 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2732 Schedule == OMP_dist_sch_static_chunked || 2733 Schedule == OMP_dist_sch_static)) 2734 Modifier = OMP_sch_modifier_nonmonotonic; 2735 } 2736 return Schedule | Modifier; 2737 } 2738 2739 void CGOpenMPRuntime::emitForDispatchInit( 2740 CodeGenFunction &CGF, SourceLocation Loc, 2741 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2742 bool Ordered, const DispatchRTInput &DispatchValues) { 2743 if (!CGF.HaveInsertPoint()) 2744 return; 2745 OpenMPSchedType Schedule = getRuntimeSchedule( 2746 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2747 assert(Ordered || 2748 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2749 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2750 Schedule != OMP_sch_static_balanced_chunked)); 2751 // Call __kmpc_dispatch_init( 2752 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2753 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2754 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2755 2756 // If the Chunk was not specified in the clause - use default value 1. 2757 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2758 : CGF.Builder.getIntN(IVSize, 1); 2759 llvm::Value *Args[] = { 2760 emitUpdateLocation(CGF, Loc), 2761 getThreadID(CGF, Loc), 2762 CGF.Builder.getInt32(addMonoNonMonoModifier( 2763 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2764 DispatchValues.LB, // Lower 2765 DispatchValues.UB, // Upper 2766 CGF.Builder.getIntN(IVSize, 1), // Stride 2767 Chunk // Chunk 2768 }; 2769 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2770 } 2771 2772 static void emitForStaticInitCall( 2773 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2774 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2775 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2776 const CGOpenMPRuntime::StaticRTInput &Values) { 2777 if (!CGF.HaveInsertPoint()) 2778 return; 2779 2780 assert(!Values.Ordered); 2781 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2782 Schedule == OMP_sch_static_balanced_chunked || 2783 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2784 Schedule == OMP_dist_sch_static || 2785 Schedule == OMP_dist_sch_static_chunked); 2786 2787 // Call __kmpc_for_static_init( 2788 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2789 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2790 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2791 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2792 llvm::Value *Chunk = Values.Chunk; 2793 if (Chunk == nullptr) { 2794 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2795 Schedule == OMP_dist_sch_static) && 2796 "expected static non-chunked schedule"); 2797 // If the Chunk was not specified in the clause - use default value 1. 2798 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2799 } else { 2800 assert((Schedule == OMP_sch_static_chunked || 2801 Schedule == OMP_sch_static_balanced_chunked || 2802 Schedule == OMP_ord_static_chunked || 2803 Schedule == OMP_dist_sch_static_chunked) && 2804 "expected static chunked schedule"); 2805 } 2806 llvm::Value *Args[] = { 2807 UpdateLocation, 2808 ThreadId, 2809 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2810 M2)), // Schedule type 2811 Values.IL.getPointer(), // &isLastIter 2812 Values.LB.getPointer(), // &LB 2813 Values.UB.getPointer(), // &UB 2814 Values.ST.getPointer(), // &Stride 2815 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2816 Chunk // Chunk 2817 }; 2818 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2819 } 2820 2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2822 SourceLocation Loc, 2823 OpenMPDirectiveKind DKind, 2824 const OpenMPScheduleTy &ScheduleKind, 2825 const StaticRTInput &Values) { 2826 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2827 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2828 assert(isOpenMPWorksharingDirective(DKind) && 2829 "Expected loop-based or sections-based directive."); 2830 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2831 isOpenMPLoopDirective(DKind) 2832 ? OMP_IDENT_WORK_LOOP 2833 : OMP_IDENT_WORK_SECTIONS); 2834 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2835 llvm::FunctionCallee StaticInitFunction = 2836 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2837 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2838 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2839 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2840 } 2841 2842 void CGOpenMPRuntime::emitDistributeStaticInit( 2843 CodeGenFunction &CGF, SourceLocation Loc, 2844 OpenMPDistScheduleClauseKind SchedKind, 2845 const CGOpenMPRuntime::StaticRTInput &Values) { 2846 OpenMPSchedType ScheduleNum = 2847 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2848 llvm::Value *UpdatedLocation = 2849 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2850 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2851 llvm::FunctionCallee StaticInitFunction = 2852 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2853 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2854 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2855 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2856 } 2857 2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2859 SourceLocation Loc, 2860 OpenMPDirectiveKind DKind) { 2861 if (!CGF.HaveInsertPoint()) 2862 return; 2863 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2864 llvm::Value *Args[] = { 2865 emitUpdateLocation(CGF, Loc, 2866 isOpenMPDistributeDirective(DKind) 2867 ? OMP_IDENT_WORK_DISTRIBUTE 2868 : isOpenMPLoopDirective(DKind) 2869 ? OMP_IDENT_WORK_LOOP 2870 : OMP_IDENT_WORK_SECTIONS), 2871 getThreadID(CGF, Loc)}; 2872 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2873 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2874 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2875 Args); 2876 } 2877 2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2879 SourceLocation Loc, 2880 unsigned IVSize, 2881 bool IVSigned) { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2885 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2886 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2887 } 2888 2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2890 SourceLocation Loc, unsigned IVSize, 2891 bool IVSigned, Address IL, 2892 Address LB, Address UB, 2893 Address ST) { 2894 // Call __kmpc_dispatch_next( 2895 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2896 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2897 // kmp_int[32|64] *p_stride); 2898 llvm::Value *Args[] = { 2899 emitUpdateLocation(CGF, Loc), 2900 getThreadID(CGF, Loc), 2901 IL.getPointer(), // &isLastIter 2902 LB.getPointer(), // &Lower 2903 UB.getPointer(), // &Upper 2904 ST.getPointer() // &Stride 2905 }; 2906 llvm::Value *Call = 2907 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2908 return CGF.EmitScalarConversion( 2909 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2910 CGF.getContext().BoolTy, Loc); 2911 } 2912 2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2914 llvm::Value *NumThreads, 2915 SourceLocation Loc) { 2916 if (!CGF.HaveInsertPoint()) 2917 return; 2918 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2919 llvm::Value *Args[] = { 2920 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2921 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2922 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2923 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2924 Args); 2925 } 2926 2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2928 ProcBindKind ProcBind, 2929 SourceLocation Loc) { 2930 if (!CGF.HaveInsertPoint()) 2931 return; 2932 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2933 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2934 llvm::Value *Args[] = { 2935 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2936 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2937 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2939 Args); 2940 } 2941 2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2943 SourceLocation Loc, llvm::AtomicOrdering AO) { 2944 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 2945 if (OMPBuilder) { 2946 OMPBuilder->CreateFlush(CGF.Builder); 2947 } else { 2948 if (!CGF.HaveInsertPoint()) 2949 return; 2950 // Build call void __kmpc_flush(ident_t *loc) 2951 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 2952 CGM.getModule(), OMPRTL___kmpc_flush), 2953 emitUpdateLocation(CGF, Loc)); 2954 } 2955 } 2956 2957 namespace { 2958 /// Indexes of fields for type kmp_task_t. 2959 enum KmpTaskTFields { 2960 /// List of shared variables. 2961 KmpTaskTShareds, 2962 /// Task routine. 2963 KmpTaskTRoutine, 2964 /// Partition id for the untied tasks. 2965 KmpTaskTPartId, 2966 /// Function with call of destructors for private variables. 2967 Data1, 2968 /// Task priority. 2969 Data2, 2970 /// (Taskloops only) Lower bound. 2971 KmpTaskTLowerBound, 2972 /// (Taskloops only) Upper bound. 2973 KmpTaskTUpperBound, 2974 /// (Taskloops only) Stride. 2975 KmpTaskTStride, 2976 /// (Taskloops only) Is last iteration flag. 2977 KmpTaskTLastIter, 2978 /// (Taskloops only) Reduction data. 2979 KmpTaskTReductions, 2980 }; 2981 } // anonymous namespace 2982 2983 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2984 return OffloadEntriesTargetRegion.empty() && 2985 OffloadEntriesDeviceGlobalVar.empty(); 2986 } 2987 2988 /// Initialize target region entry. 2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2990 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2991 StringRef ParentName, unsigned LineNum, 2992 unsigned Order) { 2993 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2994 "only required for the device " 2995 "code generation."); 2996 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2997 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2998 OMPTargetRegionEntryTargetRegion); 2999 ++OffloadingEntriesNum; 3000 } 3001 3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3003 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3004 StringRef ParentName, unsigned LineNum, 3005 llvm::Constant *Addr, llvm::Constant *ID, 3006 OMPTargetRegionEntryKind Flags) { 3007 // If we are emitting code for a target, the entry is already initialized, 3008 // only has to be registered. 3009 if (CGM.getLangOpts().OpenMPIsDevice) { 3010 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3011 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3012 DiagnosticsEngine::Error, 3013 "Unable to find target region on line '%0' in the device code."); 3014 CGM.getDiags().Report(DiagID) << LineNum; 3015 return; 3016 } 3017 auto &Entry = 3018 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3019 assert(Entry.isValid() && "Entry not initialized!"); 3020 Entry.setAddress(Addr); 3021 Entry.setID(ID); 3022 Entry.setFlags(Flags); 3023 } else { 3024 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3025 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3026 ++OffloadingEntriesNum; 3027 } 3028 } 3029 3030 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3031 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3032 unsigned LineNum) const { 3033 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3034 if (PerDevice == OffloadEntriesTargetRegion.end()) 3035 return false; 3036 auto PerFile = PerDevice->second.find(FileID); 3037 if (PerFile == PerDevice->second.end()) 3038 return false; 3039 auto PerParentName = PerFile->second.find(ParentName); 3040 if (PerParentName == PerFile->second.end()) 3041 return false; 3042 auto PerLine = PerParentName->second.find(LineNum); 3043 if (PerLine == PerParentName->second.end()) 3044 return false; 3045 // Fail if this entry is already registered. 3046 if (PerLine->second.getAddress() || PerLine->second.getID()) 3047 return false; 3048 return true; 3049 } 3050 3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3052 const OffloadTargetRegionEntryInfoActTy &Action) { 3053 // Scan all target region entries and perform the provided action. 3054 for (const auto &D : OffloadEntriesTargetRegion) 3055 for (const auto &F : D.second) 3056 for (const auto &P : F.second) 3057 for (const auto &L : P.second) 3058 Action(D.first, F.first, P.first(), L.first, L.second); 3059 } 3060 3061 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3062 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3063 OMPTargetGlobalVarEntryKind Flags, 3064 unsigned Order) { 3065 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3066 "only required for the device " 3067 "code generation."); 3068 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3069 ++OffloadingEntriesNum; 3070 } 3071 3072 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3073 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3074 CharUnits VarSize, 3075 OMPTargetGlobalVarEntryKind Flags, 3076 llvm::GlobalValue::LinkageTypes Linkage) { 3077 if (CGM.getLangOpts().OpenMPIsDevice) { 3078 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3079 assert(Entry.isValid() && Entry.getFlags() == Flags && 3080 "Entry not initialized!"); 3081 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3082 "Resetting with the new address."); 3083 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3084 if (Entry.getVarSize().isZero()) { 3085 Entry.setVarSize(VarSize); 3086 Entry.setLinkage(Linkage); 3087 } 3088 return; 3089 } 3090 Entry.setVarSize(VarSize); 3091 Entry.setLinkage(Linkage); 3092 Entry.setAddress(Addr); 3093 } else { 3094 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3095 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3096 assert(Entry.isValid() && Entry.getFlags() == Flags && 3097 "Entry not initialized!"); 3098 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3099 "Resetting with the new address."); 3100 if (Entry.getVarSize().isZero()) { 3101 Entry.setVarSize(VarSize); 3102 Entry.setLinkage(Linkage); 3103 } 3104 return; 3105 } 3106 OffloadEntriesDeviceGlobalVar.try_emplace( 3107 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3108 ++OffloadingEntriesNum; 3109 } 3110 } 3111 3112 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3113 actOnDeviceGlobalVarEntriesInfo( 3114 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3115 // Scan all target region entries and perform the provided action. 3116 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3117 Action(E.getKey(), E.getValue()); 3118 } 3119 3120 void CGOpenMPRuntime::createOffloadEntry( 3121 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3122 llvm::GlobalValue::LinkageTypes Linkage) { 3123 StringRef Name = Addr->getName(); 3124 llvm::Module &M = CGM.getModule(); 3125 llvm::LLVMContext &C = M.getContext(); 3126 3127 // Create constant string with the name. 3128 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3129 3130 std::string StringName = getName({"omp_offloading", "entry_name"}); 3131 auto *Str = new llvm::GlobalVariable( 3132 M, StrPtrInit->getType(), /*isConstant=*/true, 3133 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3134 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3135 3136 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3137 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3138 llvm::ConstantInt::get(CGM.SizeTy, Size), 3139 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3140 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3141 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3142 llvm::GlobalVariable *Entry = createGlobalStruct( 3143 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3144 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3145 3146 // The entry has to be created in the section the linker expects it to be. 3147 Entry->setSection("omp_offloading_entries"); 3148 } 3149 3150 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3151 // Emit the offloading entries and metadata so that the device codegen side 3152 // can easily figure out what to emit. The produced metadata looks like 3153 // this: 3154 // 3155 // !omp_offload.info = !{!1, ...} 3156 // 3157 // Right now we only generate metadata for function that contain target 3158 // regions. 3159 3160 // If we are in simd mode or there are no entries, we don't need to do 3161 // anything. 3162 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3163 return; 3164 3165 llvm::Module &M = CGM.getModule(); 3166 llvm::LLVMContext &C = M.getContext(); 3167 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3168 SourceLocation, StringRef>, 3169 16> 3170 OrderedEntries(OffloadEntriesInfoManager.size()); 3171 llvm::SmallVector<StringRef, 16> ParentFunctions( 3172 OffloadEntriesInfoManager.size()); 3173 3174 // Auxiliary methods to create metadata values and strings. 3175 auto &&GetMDInt = [this](unsigned V) { 3176 return llvm::ConstantAsMetadata::get( 3177 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3178 }; 3179 3180 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3181 3182 // Create the offloading info metadata node. 3183 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3184 3185 // Create function that emits metadata for each target region entry; 3186 auto &&TargetRegionMetadataEmitter = 3187 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3188 &GetMDString]( 3189 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3190 unsigned Line, 3191 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3192 // Generate metadata for target regions. Each entry of this metadata 3193 // contains: 3194 // - Entry 0 -> Kind of this type of metadata (0). 3195 // - Entry 1 -> Device ID of the file where the entry was identified. 3196 // - Entry 2 -> File ID of the file where the entry was identified. 3197 // - Entry 3 -> Mangled name of the function where the entry was 3198 // identified. 3199 // - Entry 4 -> Line in the file where the entry was identified. 3200 // - Entry 5 -> Order the entry was created. 3201 // The first element of the metadata node is the kind. 3202 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3203 GetMDInt(FileID), GetMDString(ParentName), 3204 GetMDInt(Line), GetMDInt(E.getOrder())}; 3205 3206 SourceLocation Loc; 3207 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3208 E = CGM.getContext().getSourceManager().fileinfo_end(); 3209 I != E; ++I) { 3210 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3211 I->getFirst()->getUniqueID().getFile() == FileID) { 3212 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3213 I->getFirst(), Line, 1); 3214 break; 3215 } 3216 } 3217 // Save this entry in the right position of the ordered entries array. 3218 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3219 ParentFunctions[E.getOrder()] = ParentName; 3220 3221 // Add metadata to the named metadata node. 3222 MD->addOperand(llvm::MDNode::get(C, Ops)); 3223 }; 3224 3225 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3226 TargetRegionMetadataEmitter); 3227 3228 // Create function that emits metadata for each device global variable entry; 3229 auto &&DeviceGlobalVarMetadataEmitter = 3230 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3231 MD](StringRef MangledName, 3232 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3233 &E) { 3234 // Generate metadata for global variables. Each entry of this metadata 3235 // contains: 3236 // - Entry 0 -> Kind of this type of metadata (1). 3237 // - Entry 1 -> Mangled name of the variable. 3238 // - Entry 2 -> Declare target kind. 3239 // - Entry 3 -> Order the entry was created. 3240 // The first element of the metadata node is the kind. 3241 llvm::Metadata *Ops[] = { 3242 GetMDInt(E.getKind()), GetMDString(MangledName), 3243 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3244 3245 // Save this entry in the right position of the ordered entries array. 3246 OrderedEntries[E.getOrder()] = 3247 std::make_tuple(&E, SourceLocation(), MangledName); 3248 3249 // Add metadata to the named metadata node. 3250 MD->addOperand(llvm::MDNode::get(C, Ops)); 3251 }; 3252 3253 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3254 DeviceGlobalVarMetadataEmitter); 3255 3256 for (const auto &E : OrderedEntries) { 3257 assert(std::get<0>(E) && "All ordered entries must exist!"); 3258 if (const auto *CE = 3259 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3260 std::get<0>(E))) { 3261 if (!CE->getID() || !CE->getAddress()) { 3262 // Do not blame the entry if the parent funtion is not emitted. 3263 StringRef FnName = ParentFunctions[CE->getOrder()]; 3264 if (!CGM.GetGlobalValue(FnName)) 3265 continue; 3266 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3267 DiagnosticsEngine::Error, 3268 "Offloading entry for target region in %0 is incorrect: either the " 3269 "address or the ID is invalid."); 3270 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3271 continue; 3272 } 3273 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3274 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3275 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3276 OffloadEntryInfoDeviceGlobalVar>( 3277 std::get<0>(E))) { 3278 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3279 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3280 CE->getFlags()); 3281 switch (Flags) { 3282 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3283 if (CGM.getLangOpts().OpenMPIsDevice && 3284 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3285 continue; 3286 if (!CE->getAddress()) { 3287 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3288 DiagnosticsEngine::Error, "Offloading entry for declare target " 3289 "variable %0 is incorrect: the " 3290 "address is invalid."); 3291 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3292 continue; 3293 } 3294 // The vaiable has no definition - no need to add the entry. 3295 if (CE->getVarSize().isZero()) 3296 continue; 3297 break; 3298 } 3299 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3300 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3301 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3302 "Declaret target link address is set."); 3303 if (CGM.getLangOpts().OpenMPIsDevice) 3304 continue; 3305 if (!CE->getAddress()) { 3306 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3307 DiagnosticsEngine::Error, 3308 "Offloading entry for declare target variable is incorrect: the " 3309 "address is invalid."); 3310 CGM.getDiags().Report(DiagID); 3311 continue; 3312 } 3313 break; 3314 } 3315 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3316 CE->getVarSize().getQuantity(), Flags, 3317 CE->getLinkage()); 3318 } else { 3319 llvm_unreachable("Unsupported entry kind."); 3320 } 3321 } 3322 } 3323 3324 /// Loads all the offload entries information from the host IR 3325 /// metadata. 3326 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3327 // If we are in target mode, load the metadata from the host IR. This code has 3328 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3329 3330 if (!CGM.getLangOpts().OpenMPIsDevice) 3331 return; 3332 3333 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3334 return; 3335 3336 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3337 if (auto EC = Buf.getError()) { 3338 CGM.getDiags().Report(diag::err_cannot_open_file) 3339 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3340 return; 3341 } 3342 3343 llvm::LLVMContext C; 3344 auto ME = expectedToErrorOrAndEmitErrors( 3345 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3346 3347 if (auto EC = ME.getError()) { 3348 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3349 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3350 CGM.getDiags().Report(DiagID) 3351 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3352 return; 3353 } 3354 3355 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3356 if (!MD) 3357 return; 3358 3359 for (llvm::MDNode *MN : MD->operands()) { 3360 auto &&GetMDInt = [MN](unsigned Idx) { 3361 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3362 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3363 }; 3364 3365 auto &&GetMDString = [MN](unsigned Idx) { 3366 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3367 return V->getString(); 3368 }; 3369 3370 switch (GetMDInt(0)) { 3371 default: 3372 llvm_unreachable("Unexpected metadata!"); 3373 break; 3374 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3375 OffloadingEntryInfoTargetRegion: 3376 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3377 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3378 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3379 /*Order=*/GetMDInt(5)); 3380 break; 3381 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3382 OffloadingEntryInfoDeviceGlobalVar: 3383 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3384 /*MangledName=*/GetMDString(1), 3385 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3386 /*Flags=*/GetMDInt(2)), 3387 /*Order=*/GetMDInt(3)); 3388 break; 3389 } 3390 } 3391 } 3392 3393 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3394 if (!KmpRoutineEntryPtrTy) { 3395 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3396 ASTContext &C = CGM.getContext(); 3397 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3398 FunctionProtoType::ExtProtoInfo EPI; 3399 KmpRoutineEntryPtrQTy = C.getPointerType( 3400 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3401 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3402 } 3403 } 3404 3405 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3406 // Make sure the type of the entry is already created. This is the type we 3407 // have to create: 3408 // struct __tgt_offload_entry{ 3409 // void *addr; // Pointer to the offload entry info. 3410 // // (function or global) 3411 // char *name; // Name of the function or global. 3412 // size_t size; // Size of the entry info (0 if it a function). 3413 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3414 // int32_t reserved; // Reserved, to use by the runtime library. 3415 // }; 3416 if (TgtOffloadEntryQTy.isNull()) { 3417 ASTContext &C = CGM.getContext(); 3418 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3419 RD->startDefinition(); 3420 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3421 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3422 addFieldToRecordDecl(C, RD, C.getSizeType()); 3423 addFieldToRecordDecl( 3424 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3425 addFieldToRecordDecl( 3426 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3427 RD->completeDefinition(); 3428 RD->addAttr(PackedAttr::CreateImplicit(C)); 3429 TgtOffloadEntryQTy = C.getRecordType(RD); 3430 } 3431 return TgtOffloadEntryQTy; 3432 } 3433 3434 namespace { 3435 struct PrivateHelpersTy { 3436 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3437 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3438 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3439 PrivateElemInit(PrivateElemInit) {} 3440 const Expr *OriginalRef = nullptr; 3441 const VarDecl *Original = nullptr; 3442 const VarDecl *PrivateCopy = nullptr; 3443 const VarDecl *PrivateElemInit = nullptr; 3444 }; 3445 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3446 } // anonymous namespace 3447 3448 static RecordDecl * 3449 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3450 if (!Privates.empty()) { 3451 ASTContext &C = CGM.getContext(); 3452 // Build struct .kmp_privates_t. { 3453 // /* private vars */ 3454 // }; 3455 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3456 RD->startDefinition(); 3457 for (const auto &Pair : Privates) { 3458 const VarDecl *VD = Pair.second.Original; 3459 QualType Type = VD->getType().getNonReferenceType(); 3460 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3461 if (VD->hasAttrs()) { 3462 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3463 E(VD->getAttrs().end()); 3464 I != E; ++I) 3465 FD->addAttr(*I); 3466 } 3467 } 3468 RD->completeDefinition(); 3469 return RD; 3470 } 3471 return nullptr; 3472 } 3473 3474 static RecordDecl * 3475 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3476 QualType KmpInt32Ty, 3477 QualType KmpRoutineEntryPointerQTy) { 3478 ASTContext &C = CGM.getContext(); 3479 // Build struct kmp_task_t { 3480 // void * shareds; 3481 // kmp_routine_entry_t routine; 3482 // kmp_int32 part_id; 3483 // kmp_cmplrdata_t data1; 3484 // kmp_cmplrdata_t data2; 3485 // For taskloops additional fields: 3486 // kmp_uint64 lb; 3487 // kmp_uint64 ub; 3488 // kmp_int64 st; 3489 // kmp_int32 liter; 3490 // void * reductions; 3491 // }; 3492 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3493 UD->startDefinition(); 3494 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3495 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3496 UD->completeDefinition(); 3497 QualType KmpCmplrdataTy = C.getRecordType(UD); 3498 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3499 RD->startDefinition(); 3500 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3501 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3502 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3503 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3504 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3505 if (isOpenMPTaskLoopDirective(Kind)) { 3506 QualType KmpUInt64Ty = 3507 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3508 QualType KmpInt64Ty = 3509 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3510 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3511 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3512 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3513 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3514 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3515 } 3516 RD->completeDefinition(); 3517 return RD; 3518 } 3519 3520 static RecordDecl * 3521 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3522 ArrayRef<PrivateDataTy> Privates) { 3523 ASTContext &C = CGM.getContext(); 3524 // Build struct kmp_task_t_with_privates { 3525 // kmp_task_t task_data; 3526 // .kmp_privates_t. privates; 3527 // }; 3528 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3529 RD->startDefinition(); 3530 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3531 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3532 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3533 RD->completeDefinition(); 3534 return RD; 3535 } 3536 3537 /// Emit a proxy function which accepts kmp_task_t as the second 3538 /// argument. 3539 /// \code 3540 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3541 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3542 /// For taskloops: 3543 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3544 /// tt->reductions, tt->shareds); 3545 /// return 0; 3546 /// } 3547 /// \endcode 3548 static llvm::Function * 3549 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3550 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3551 QualType KmpTaskTWithPrivatesPtrQTy, 3552 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3553 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3554 llvm::Value *TaskPrivatesMap) { 3555 ASTContext &C = CGM.getContext(); 3556 FunctionArgList Args; 3557 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3558 ImplicitParamDecl::Other); 3559 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3560 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3561 ImplicitParamDecl::Other); 3562 Args.push_back(&GtidArg); 3563 Args.push_back(&TaskTypeArg); 3564 const auto &TaskEntryFnInfo = 3565 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3566 llvm::FunctionType *TaskEntryTy = 3567 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3568 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3569 auto *TaskEntry = llvm::Function::Create( 3570 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3571 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3572 TaskEntry->setDoesNotRecurse(); 3573 CodeGenFunction CGF(CGM); 3574 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3575 Loc, Loc); 3576 3577 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3578 // tt, 3579 // For taskloops: 3580 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3581 // tt->task_data.shareds); 3582 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3583 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3584 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3585 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3586 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3587 const auto *KmpTaskTWithPrivatesQTyRD = 3588 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3589 LValue Base = 3590 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3591 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3592 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3593 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3594 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3595 3596 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3597 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3598 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3599 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3600 CGF.ConvertTypeForMem(SharedsPtrTy)); 3601 3602 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3603 llvm::Value *PrivatesParam; 3604 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3605 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3606 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3607 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3608 } else { 3609 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3610 } 3611 3612 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3613 TaskPrivatesMap, 3614 CGF.Builder 3615 .CreatePointerBitCastOrAddrSpaceCast( 3616 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3617 .getPointer()}; 3618 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3619 std::end(CommonArgs)); 3620 if (isOpenMPTaskLoopDirective(Kind)) { 3621 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3622 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3623 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3624 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3625 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3626 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3627 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3628 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3629 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3630 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3631 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3632 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3633 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3634 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3635 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3636 CallArgs.push_back(LBParam); 3637 CallArgs.push_back(UBParam); 3638 CallArgs.push_back(StParam); 3639 CallArgs.push_back(LIParam); 3640 CallArgs.push_back(RParam); 3641 } 3642 CallArgs.push_back(SharedsParam); 3643 3644 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3645 CallArgs); 3646 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3647 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3648 CGF.FinishFunction(); 3649 return TaskEntry; 3650 } 3651 3652 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3653 SourceLocation Loc, 3654 QualType KmpInt32Ty, 3655 QualType KmpTaskTWithPrivatesPtrQTy, 3656 QualType KmpTaskTWithPrivatesQTy) { 3657 ASTContext &C = CGM.getContext(); 3658 FunctionArgList Args; 3659 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3660 ImplicitParamDecl::Other); 3661 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3662 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3663 ImplicitParamDecl::Other); 3664 Args.push_back(&GtidArg); 3665 Args.push_back(&TaskTypeArg); 3666 const auto &DestructorFnInfo = 3667 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3668 llvm::FunctionType *DestructorFnTy = 3669 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3670 std::string Name = 3671 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3672 auto *DestructorFn = 3673 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3674 Name, &CGM.getModule()); 3675 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3676 DestructorFnInfo); 3677 DestructorFn->setDoesNotRecurse(); 3678 CodeGenFunction CGF(CGM); 3679 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3680 Args, Loc, Loc); 3681 3682 LValue Base = CGF.EmitLoadOfPointerLValue( 3683 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3684 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3685 const auto *KmpTaskTWithPrivatesQTyRD = 3686 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3687 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3688 Base = CGF.EmitLValueForField(Base, *FI); 3689 for (const auto *Field : 3690 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3691 if (QualType::DestructionKind DtorKind = 3692 Field->getType().isDestructedType()) { 3693 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3694 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3695 } 3696 } 3697 CGF.FinishFunction(); 3698 return DestructorFn; 3699 } 3700 3701 /// Emit a privates mapping function for correct handling of private and 3702 /// firstprivate variables. 3703 /// \code 3704 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3705 /// **noalias priv1,..., <tyn> **noalias privn) { 3706 /// *priv1 = &.privates.priv1; 3707 /// ...; 3708 /// *privn = &.privates.privn; 3709 /// } 3710 /// \endcode 3711 static llvm::Value * 3712 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3713 ArrayRef<const Expr *> PrivateVars, 3714 ArrayRef<const Expr *> FirstprivateVars, 3715 ArrayRef<const Expr *> LastprivateVars, 3716 QualType PrivatesQTy, 3717 ArrayRef<PrivateDataTy> Privates) { 3718 ASTContext &C = CGM.getContext(); 3719 FunctionArgList Args; 3720 ImplicitParamDecl TaskPrivatesArg( 3721 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3722 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3723 ImplicitParamDecl::Other); 3724 Args.push_back(&TaskPrivatesArg); 3725 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3726 unsigned Counter = 1; 3727 for (const Expr *E : PrivateVars) { 3728 Args.push_back(ImplicitParamDecl::Create( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(C.getPointerType(E->getType())) 3731 .withConst() 3732 .withRestrict(), 3733 ImplicitParamDecl::Other)); 3734 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3735 PrivateVarsPos[VD] = Counter; 3736 ++Counter; 3737 } 3738 for (const Expr *E : FirstprivateVars) { 3739 Args.push_back(ImplicitParamDecl::Create( 3740 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3741 C.getPointerType(C.getPointerType(E->getType())) 3742 .withConst() 3743 .withRestrict(), 3744 ImplicitParamDecl::Other)); 3745 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3746 PrivateVarsPos[VD] = Counter; 3747 ++Counter; 3748 } 3749 for (const Expr *E : LastprivateVars) { 3750 Args.push_back(ImplicitParamDecl::Create( 3751 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3752 C.getPointerType(C.getPointerType(E->getType())) 3753 .withConst() 3754 .withRestrict(), 3755 ImplicitParamDecl::Other)); 3756 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3757 PrivateVarsPos[VD] = Counter; 3758 ++Counter; 3759 } 3760 const auto &TaskPrivatesMapFnInfo = 3761 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3762 llvm::FunctionType *TaskPrivatesMapTy = 3763 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3764 std::string Name = 3765 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3766 auto *TaskPrivatesMap = llvm::Function::Create( 3767 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3768 &CGM.getModule()); 3769 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3770 TaskPrivatesMapFnInfo); 3771 if (CGM.getLangOpts().Optimize) { 3772 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3773 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3774 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3775 } 3776 CodeGenFunction CGF(CGM); 3777 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3778 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3779 3780 // *privi = &.privates.privi; 3781 LValue Base = CGF.EmitLoadOfPointerLValue( 3782 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3783 TaskPrivatesArg.getType()->castAs<PointerType>()); 3784 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3785 Counter = 0; 3786 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3787 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3788 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3789 LValue RefLVal = 3790 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3791 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3792 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3793 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3794 ++Counter; 3795 } 3796 CGF.FinishFunction(); 3797 return TaskPrivatesMap; 3798 } 3799 3800 /// Emit initialization for private variables in task-based directives. 3801 static void emitPrivatesInit(CodeGenFunction &CGF, 3802 const OMPExecutableDirective &D, 3803 Address KmpTaskSharedsPtr, LValue TDBase, 3804 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3805 QualType SharedsTy, QualType SharedsPtrTy, 3806 const OMPTaskDataTy &Data, 3807 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3808 ASTContext &C = CGF.getContext(); 3809 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3810 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3811 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3812 ? OMPD_taskloop 3813 : OMPD_task; 3814 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3815 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3816 LValue SrcBase; 3817 bool IsTargetTask = 3818 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3819 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3820 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3821 // PointersArray and SizesArray. The original variables for these arrays are 3822 // not captured and we get their addresses explicitly. 3823 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3824 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3825 SrcBase = CGF.MakeAddrLValue( 3826 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3827 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3828 SharedsTy); 3829 } 3830 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3831 for (const PrivateDataTy &Pair : Privates) { 3832 const VarDecl *VD = Pair.second.PrivateCopy; 3833 const Expr *Init = VD->getAnyInitializer(); 3834 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3835 !CGF.isTrivialInitializer(Init)))) { 3836 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3837 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3838 const VarDecl *OriginalVD = Pair.second.Original; 3839 // Check if the variable is the target-based BasePointersArray, 3840 // PointersArray or SizesArray. 3841 LValue SharedRefLValue; 3842 QualType Type = PrivateLValue.getType(); 3843 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3844 if (IsTargetTask && !SharedField) { 3845 assert(isa<ImplicitParamDecl>(OriginalVD) && 3846 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3847 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3848 ->getNumParams() == 0 && 3849 isa<TranslationUnitDecl>( 3850 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3851 ->getDeclContext()) && 3852 "Expected artificial target data variable."); 3853 SharedRefLValue = 3854 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3855 } else if (ForDup) { 3856 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3857 SharedRefLValue = CGF.MakeAddrLValue( 3858 Address(SharedRefLValue.getPointer(CGF), 3859 C.getDeclAlign(OriginalVD)), 3860 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3861 SharedRefLValue.getTBAAInfo()); 3862 } else if (CGF.LambdaCaptureFields.count( 3863 Pair.second.Original->getCanonicalDecl()) > 0 || 3864 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3865 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3866 } else { 3867 // Processing for implicitly captured variables. 3868 InlinedOpenMPRegionRAII Region( 3869 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3870 /*HasCancel=*/false); 3871 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3872 } 3873 if (Type->isArrayType()) { 3874 // Initialize firstprivate array. 3875 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3876 // Perform simple memcpy. 3877 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3878 } else { 3879 // Initialize firstprivate array using element-by-element 3880 // initialization. 3881 CGF.EmitOMPAggregateAssign( 3882 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3883 Type, 3884 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3885 Address SrcElement) { 3886 // Clean up any temporaries needed by the initialization. 3887 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3888 InitScope.addPrivate( 3889 Elem, [SrcElement]() -> Address { return SrcElement; }); 3890 (void)InitScope.Privatize(); 3891 // Emit initialization for single element. 3892 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3893 CGF, &CapturesInfo); 3894 CGF.EmitAnyExprToMem(Init, DestElement, 3895 Init->getType().getQualifiers(), 3896 /*IsInitializer=*/false); 3897 }); 3898 } 3899 } else { 3900 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3901 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3902 return SharedRefLValue.getAddress(CGF); 3903 }); 3904 (void)InitScope.Privatize(); 3905 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3906 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3907 /*capturedByInit=*/false); 3908 } 3909 } else { 3910 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3911 } 3912 } 3913 ++FI; 3914 } 3915 } 3916 3917 /// Check if duplication function is required for taskloops. 3918 static bool checkInitIsRequired(CodeGenFunction &CGF, 3919 ArrayRef<PrivateDataTy> Privates) { 3920 bool InitRequired = false; 3921 for (const PrivateDataTy &Pair : Privates) { 3922 const VarDecl *VD = Pair.second.PrivateCopy; 3923 const Expr *Init = VD->getAnyInitializer(); 3924 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3925 !CGF.isTrivialInitializer(Init)); 3926 if (InitRequired) 3927 break; 3928 } 3929 return InitRequired; 3930 } 3931 3932 3933 /// Emit task_dup function (for initialization of 3934 /// private/firstprivate/lastprivate vars and last_iter flag) 3935 /// \code 3936 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3937 /// lastpriv) { 3938 /// // setup lastprivate flag 3939 /// task_dst->last = lastpriv; 3940 /// // could be constructor calls here... 3941 /// } 3942 /// \endcode 3943 static llvm::Value * 3944 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3945 const OMPExecutableDirective &D, 3946 QualType KmpTaskTWithPrivatesPtrQTy, 3947 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3948 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3949 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3950 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3951 ASTContext &C = CGM.getContext(); 3952 FunctionArgList Args; 3953 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3954 KmpTaskTWithPrivatesPtrQTy, 3955 ImplicitParamDecl::Other); 3956 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3957 KmpTaskTWithPrivatesPtrQTy, 3958 ImplicitParamDecl::Other); 3959 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3960 ImplicitParamDecl::Other); 3961 Args.push_back(&DstArg); 3962 Args.push_back(&SrcArg); 3963 Args.push_back(&LastprivArg); 3964 const auto &TaskDupFnInfo = 3965 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3966 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3967 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3968 auto *TaskDup = llvm::Function::Create( 3969 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3970 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3971 TaskDup->setDoesNotRecurse(); 3972 CodeGenFunction CGF(CGM); 3973 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3974 Loc); 3975 3976 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3977 CGF.GetAddrOfLocalVar(&DstArg), 3978 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3979 // task_dst->liter = lastpriv; 3980 if (WithLastIter) { 3981 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3982 LValue Base = CGF.EmitLValueForField( 3983 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3984 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3985 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3986 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3987 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3988 } 3989 3990 // Emit initial values for private copies (if any). 3991 assert(!Privates.empty()); 3992 Address KmpTaskSharedsPtr = Address::invalid(); 3993 if (!Data.FirstprivateVars.empty()) { 3994 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3995 CGF.GetAddrOfLocalVar(&SrcArg), 3996 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3997 LValue Base = CGF.EmitLValueForField( 3998 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3999 KmpTaskSharedsPtr = Address( 4000 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4001 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4002 KmpTaskTShareds)), 4003 Loc), 4004 CGM.getNaturalTypeAlignment(SharedsTy)); 4005 } 4006 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4007 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4008 CGF.FinishFunction(); 4009 return TaskDup; 4010 } 4011 4012 /// Checks if destructor function is required to be generated. 4013 /// \return true if cleanups are required, false otherwise. 4014 static bool 4015 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4016 bool NeedsCleanup = false; 4017 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4018 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4019 for (const FieldDecl *FD : PrivateRD->fields()) { 4020 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4021 if (NeedsCleanup) 4022 break; 4023 } 4024 return NeedsCleanup; 4025 } 4026 4027 namespace { 4028 /// Loop generator for OpenMP iterator expression. 4029 class OMPIteratorGeneratorScope final 4030 : public CodeGenFunction::OMPPrivateScope { 4031 CodeGenFunction &CGF; 4032 const OMPIteratorExpr *E = nullptr; 4033 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4034 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4035 OMPIteratorGeneratorScope() = delete; 4036 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4037 4038 public: 4039 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4040 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4041 if (!E) 4042 return; 4043 SmallVector<llvm::Value *, 4> Uppers; 4044 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4045 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4046 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4047 addPrivate(VD, [&CGF, VD]() { 4048 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4049 }); 4050 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4051 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4052 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4053 "counter.addr"); 4054 }); 4055 } 4056 Privatize(); 4057 4058 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4059 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4060 LValue CLVal = 4061 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4062 HelperData.CounterVD->getType()); 4063 // Counter = 0; 4064 CGF.EmitStoreOfScalar( 4065 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4066 CLVal); 4067 CodeGenFunction::JumpDest &ContDest = 4068 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4069 CodeGenFunction::JumpDest &ExitDest = 4070 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4071 // N = <number-of_iterations>; 4072 llvm::Value *N = Uppers[I]; 4073 // cont: 4074 // if (Counter < N) goto body; else goto exit; 4075 CGF.EmitBlock(ContDest.getBlock()); 4076 auto *CVal = 4077 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4078 llvm::Value *Cmp = 4079 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4080 ? CGF.Builder.CreateICmpSLT(CVal, N) 4081 : CGF.Builder.CreateICmpULT(CVal, N); 4082 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4083 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4084 // body: 4085 CGF.EmitBlock(BodyBB); 4086 // Iteri = Begini + Counter * Stepi; 4087 CGF.EmitIgnoredExpr(HelperData.Update); 4088 } 4089 } 4090 ~OMPIteratorGeneratorScope() { 4091 if (!E) 4092 return; 4093 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4094 // Counter = Counter + 1; 4095 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4096 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4097 // goto cont; 4098 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4099 // exit: 4100 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4101 } 4102 } 4103 }; 4104 } // namespace 4105 4106 static std::pair<llvm::Value *, llvm::Value *> 4107 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4108 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4109 llvm::Value *Addr; 4110 if (OASE) { 4111 const Expr *Base = OASE->getBase(); 4112 Addr = CGF.EmitScalarExpr(Base); 4113 } else { 4114 Addr = CGF.EmitLValue(E).getPointer(CGF); 4115 } 4116 llvm::Value *SizeVal; 4117 QualType Ty = E->getType(); 4118 if (OASE) { 4119 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4120 for (const Expr *SE : OASE->getDimensions()) { 4121 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4122 Sz = CGF.EmitScalarConversion( 4123 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4124 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4125 } 4126 } else if (const auto *ASE = 4127 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4128 LValue UpAddrLVal = 4129 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4130 llvm::Value *UpAddr = 4131 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4132 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4133 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4134 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4135 } else { 4136 SizeVal = CGF.getTypeSize(Ty); 4137 } 4138 return std::make_pair(Addr, SizeVal); 4139 } 4140 4141 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4142 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4143 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4144 if (KmpTaskAffinityInfoTy.isNull()) { 4145 RecordDecl *KmpAffinityInfoRD = 4146 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4147 KmpAffinityInfoRD->startDefinition(); 4148 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4149 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4150 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4151 KmpAffinityInfoRD->completeDefinition(); 4152 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4153 } 4154 } 4155 4156 CGOpenMPRuntime::TaskResultTy 4157 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4158 const OMPExecutableDirective &D, 4159 llvm::Function *TaskFunction, QualType SharedsTy, 4160 Address Shareds, const OMPTaskDataTy &Data) { 4161 ASTContext &C = CGM.getContext(); 4162 llvm::SmallVector<PrivateDataTy, 4> Privates; 4163 // Aggregate privates and sort them by the alignment. 4164 const auto *I = Data.PrivateCopies.begin(); 4165 for (const Expr *E : Data.PrivateVars) { 4166 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4167 Privates.emplace_back( 4168 C.getDeclAlign(VD), 4169 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4170 /*PrivateElemInit=*/nullptr)); 4171 ++I; 4172 } 4173 I = Data.FirstprivateCopies.begin(); 4174 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4175 for (const Expr *E : Data.FirstprivateVars) { 4176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4177 Privates.emplace_back( 4178 C.getDeclAlign(VD), 4179 PrivateHelpersTy( 4180 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4181 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4182 ++I; 4183 ++IElemInitRef; 4184 } 4185 I = Data.LastprivateCopies.begin(); 4186 for (const Expr *E : Data.LastprivateVars) { 4187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4188 Privates.emplace_back( 4189 C.getDeclAlign(VD), 4190 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4191 /*PrivateElemInit=*/nullptr)); 4192 ++I; 4193 } 4194 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4195 return L.first > R.first; 4196 }); 4197 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4198 // Build type kmp_routine_entry_t (if not built yet). 4199 emitKmpRoutineEntryT(KmpInt32Ty); 4200 // Build type kmp_task_t (if not built yet). 4201 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4202 if (SavedKmpTaskloopTQTy.isNull()) { 4203 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4204 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4205 } 4206 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4207 } else { 4208 assert((D.getDirectiveKind() == OMPD_task || 4209 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4210 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4211 "Expected taskloop, task or target directive"); 4212 if (SavedKmpTaskTQTy.isNull()) { 4213 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4214 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4215 } 4216 KmpTaskTQTy = SavedKmpTaskTQTy; 4217 } 4218 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4219 // Build particular struct kmp_task_t for the given task. 4220 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4221 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4222 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4223 QualType KmpTaskTWithPrivatesPtrQTy = 4224 C.getPointerType(KmpTaskTWithPrivatesQTy); 4225 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4226 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4227 KmpTaskTWithPrivatesTy->getPointerTo(); 4228 llvm::Value *KmpTaskTWithPrivatesTySize = 4229 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4230 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4231 4232 // Emit initial values for private copies (if any). 4233 llvm::Value *TaskPrivatesMap = nullptr; 4234 llvm::Type *TaskPrivatesMapTy = 4235 std::next(TaskFunction->arg_begin(), 3)->getType(); 4236 if (!Privates.empty()) { 4237 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4238 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4239 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4240 FI->getType(), Privates); 4241 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4242 TaskPrivatesMap, TaskPrivatesMapTy); 4243 } else { 4244 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4245 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4246 } 4247 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4248 // kmp_task_t *tt); 4249 llvm::Function *TaskEntry = emitProxyTaskFunction( 4250 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4251 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4252 TaskPrivatesMap); 4253 4254 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4255 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4256 // kmp_routine_entry_t *task_entry); 4257 // Task flags. Format is taken from 4258 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4259 // description of kmp_tasking_flags struct. 4260 enum { 4261 TiedFlag = 0x1, 4262 FinalFlag = 0x2, 4263 DestructorsFlag = 0x8, 4264 PriorityFlag = 0x20, 4265 DetachableFlag = 0x40, 4266 }; 4267 unsigned Flags = Data.Tied ? TiedFlag : 0; 4268 bool NeedsCleanup = false; 4269 if (!Privates.empty()) { 4270 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4271 if (NeedsCleanup) 4272 Flags = Flags | DestructorsFlag; 4273 } 4274 if (Data.Priority.getInt()) 4275 Flags = Flags | PriorityFlag; 4276 if (D.hasClausesOfKind<OMPDetachClause>()) 4277 Flags = Flags | DetachableFlag; 4278 llvm::Value *TaskFlags = 4279 Data.Final.getPointer() 4280 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4281 CGF.Builder.getInt32(FinalFlag), 4282 CGF.Builder.getInt32(/*C=*/0)) 4283 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4284 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4285 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4286 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4287 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4288 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4289 TaskEntry, KmpRoutineEntryPtrTy)}; 4290 llvm::Value *NewTask; 4291 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4292 // Check if we have any device clause associated with the directive. 4293 const Expr *Device = nullptr; 4294 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4295 Device = C->getDevice(); 4296 // Emit device ID if any otherwise use default value. 4297 llvm::Value *DeviceID; 4298 if (Device) 4299 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4300 CGF.Int64Ty, /*isSigned=*/true); 4301 else 4302 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4303 AllocArgs.push_back(DeviceID); 4304 NewTask = CGF.EmitRuntimeCall( 4305 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4306 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4307 AllocArgs); 4308 } else { 4309 NewTask = 4310 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4311 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4312 AllocArgs); 4313 } 4314 // Emit detach clause initialization. 4315 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4316 // task_descriptor); 4317 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4318 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4319 LValue EvtLVal = CGF.EmitLValue(Evt); 4320 4321 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4322 // int gtid, kmp_task_t *task); 4323 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4324 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4325 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4326 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4327 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4328 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4329 {Loc, Tid, NewTask}); 4330 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4331 Evt->getExprLoc()); 4332 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4333 } 4334 // Process affinity clauses. 4335 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4336 // Process list of affinity data. 4337 ASTContext &C = CGM.getContext(); 4338 Address AffinitiesArray = Address::invalid(); 4339 // Calculate number of elements to form the array of affinity data. 4340 llvm::Value *NumOfElements = nullptr; 4341 unsigned NumAffinities = 0; 4342 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4343 if (const Expr *Modifier = C->getModifier()) { 4344 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4345 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4346 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4347 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4348 NumOfElements = 4349 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4350 } 4351 } else { 4352 NumAffinities += C->varlist_size(); 4353 } 4354 } 4355 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4356 // Fields ids in kmp_task_affinity_info record. 4357 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4358 4359 QualType KmpTaskAffinityInfoArrayTy; 4360 if (NumOfElements) { 4361 NumOfElements = CGF.Builder.CreateNUWAdd( 4362 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4363 OpaqueValueExpr OVE( 4364 Loc, 4365 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4366 VK_RValue); 4367 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4368 RValue::get(NumOfElements)); 4369 KmpTaskAffinityInfoArrayTy = 4370 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4371 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4372 // Properly emit variable-sized array. 4373 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4374 ImplicitParamDecl::Other); 4375 CGF.EmitVarDecl(*PD); 4376 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4377 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4378 /*isSigned=*/false); 4379 } else { 4380 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4381 KmpTaskAffinityInfoTy, 4382 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4383 ArrayType::Normal, /*IndexTypeQuals=*/0); 4384 AffinitiesArray = 4385 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4386 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4387 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4388 /*isSigned=*/false); 4389 } 4390 4391 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4392 // Fill array by elements without iterators. 4393 unsigned Pos = 0; 4394 bool HasIterator = false; 4395 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4396 if (C->getModifier()) { 4397 HasIterator = true; 4398 continue; 4399 } 4400 for (const Expr *E : C->varlists()) { 4401 llvm::Value *Addr; 4402 llvm::Value *Size; 4403 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4404 LValue Base = 4405 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4406 KmpTaskAffinityInfoTy); 4407 // affs[i].base_addr = &<Affinities[i].second>; 4408 LValue BaseAddrLVal = CGF.EmitLValueForField( 4409 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4410 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4411 BaseAddrLVal); 4412 // affs[i].len = sizeof(<Affinities[i].second>); 4413 LValue LenLVal = CGF.EmitLValueForField( 4414 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4415 CGF.EmitStoreOfScalar(Size, LenLVal); 4416 ++Pos; 4417 } 4418 } 4419 LValue PosLVal; 4420 if (HasIterator) { 4421 PosLVal = CGF.MakeAddrLValue( 4422 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4423 C.getSizeType()); 4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4425 } 4426 // Process elements with iterators. 4427 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4428 const Expr *Modifier = C->getModifier(); 4429 if (!Modifier) 4430 continue; 4431 OMPIteratorGeneratorScope IteratorScope( 4432 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4433 for (const Expr *E : C->varlists()) { 4434 llvm::Value *Addr; 4435 llvm::Value *Size; 4436 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4437 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4438 LValue Base = CGF.MakeAddrLValue( 4439 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4440 AffinitiesArray.getAlignment()), 4441 KmpTaskAffinityInfoTy); 4442 // affs[i].base_addr = &<Affinities[i].second>; 4443 LValue BaseAddrLVal = CGF.EmitLValueForField( 4444 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4445 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4446 BaseAddrLVal); 4447 // affs[i].len = sizeof(<Affinities[i].second>); 4448 LValue LenLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4450 CGF.EmitStoreOfScalar(Size, LenLVal); 4451 Idx = CGF.Builder.CreateNUWAdd( 4452 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4453 CGF.EmitStoreOfScalar(Idx, PosLVal); 4454 } 4455 } 4456 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4457 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4458 // naffins, kmp_task_affinity_info_t *affin_list); 4459 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4460 llvm::Value *GTid = getThreadID(CGF, Loc); 4461 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4462 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4463 // FIXME: Emit the function and ignore its result for now unless the 4464 // runtime function is properly implemented. 4465 (void)CGF.EmitRuntimeCall( 4466 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4467 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4468 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4469 } 4470 llvm::Value *NewTaskNewTaskTTy = 4471 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4472 NewTask, KmpTaskTWithPrivatesPtrTy); 4473 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4474 KmpTaskTWithPrivatesQTy); 4475 LValue TDBase = 4476 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4477 // Fill the data in the resulting kmp_task_t record. 4478 // Copy shareds if there are any. 4479 Address KmpTaskSharedsPtr = Address::invalid(); 4480 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4481 KmpTaskSharedsPtr = 4482 Address(CGF.EmitLoadOfScalar( 4483 CGF.EmitLValueForField( 4484 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4485 KmpTaskTShareds)), 4486 Loc), 4487 CGM.getNaturalTypeAlignment(SharedsTy)); 4488 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4489 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4490 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4491 } 4492 // Emit initial values for private copies (if any). 4493 TaskResultTy Result; 4494 if (!Privates.empty()) { 4495 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4496 SharedsTy, SharedsPtrTy, Data, Privates, 4497 /*ForDup=*/false); 4498 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4499 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4500 Result.TaskDupFn = emitTaskDupFunction( 4501 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4502 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4503 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4504 } 4505 } 4506 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4507 enum { Priority = 0, Destructors = 1 }; 4508 // Provide pointer to function with destructors for privates. 4509 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4510 const RecordDecl *KmpCmplrdataUD = 4511 (*FI)->getType()->getAsUnionType()->getDecl(); 4512 if (NeedsCleanup) { 4513 llvm::Value *DestructorFn = emitDestructorsFunction( 4514 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4515 KmpTaskTWithPrivatesQTy); 4516 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4517 LValue DestructorsLV = CGF.EmitLValueForField( 4518 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4519 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4520 DestructorFn, KmpRoutineEntryPtrTy), 4521 DestructorsLV); 4522 } 4523 // Set priority. 4524 if (Data.Priority.getInt()) { 4525 LValue Data2LV = CGF.EmitLValueForField( 4526 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4527 LValue PriorityLV = CGF.EmitLValueForField( 4528 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4529 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4530 } 4531 Result.NewTask = NewTask; 4532 Result.TaskEntry = TaskEntry; 4533 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4534 Result.TDBase = TDBase; 4535 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4536 return Result; 4537 } 4538 4539 namespace { 4540 /// Dependence kind for RTL. 4541 enum RTLDependenceKindTy { 4542 DepIn = 0x01, 4543 DepInOut = 0x3, 4544 DepMutexInOutSet = 0x4 4545 }; 4546 /// Fields ids in kmp_depend_info record. 4547 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4548 } // namespace 4549 4550 /// Translates internal dependency kind into the runtime kind. 4551 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4552 RTLDependenceKindTy DepKind; 4553 switch (K) { 4554 case OMPC_DEPEND_in: 4555 DepKind = DepIn; 4556 break; 4557 // Out and InOut dependencies must use the same code. 4558 case OMPC_DEPEND_out: 4559 case OMPC_DEPEND_inout: 4560 DepKind = DepInOut; 4561 break; 4562 case OMPC_DEPEND_mutexinoutset: 4563 DepKind = DepMutexInOutSet; 4564 break; 4565 case OMPC_DEPEND_source: 4566 case OMPC_DEPEND_sink: 4567 case OMPC_DEPEND_depobj: 4568 case OMPC_DEPEND_unknown: 4569 llvm_unreachable("Unknown task dependence type"); 4570 } 4571 return DepKind; 4572 } 4573 4574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4575 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4576 QualType &FlagsTy) { 4577 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4578 if (KmpDependInfoTy.isNull()) { 4579 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4580 KmpDependInfoRD->startDefinition(); 4581 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4582 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4583 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4584 KmpDependInfoRD->completeDefinition(); 4585 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4586 } 4587 } 4588 4589 std::pair<llvm::Value *, LValue> 4590 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4591 SourceLocation Loc) { 4592 ASTContext &C = CGM.getContext(); 4593 QualType FlagsTy; 4594 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4595 RecordDecl *KmpDependInfoRD = 4596 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4597 LValue Base = CGF.EmitLoadOfPointerLValue( 4598 DepobjLVal.getAddress(CGF), 4599 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4600 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4601 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4602 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4603 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4604 Base.getTBAAInfo()); 4605 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4606 Addr.getPointer(), 4607 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4608 LValue NumDepsBase = CGF.MakeAddrLValue( 4609 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4610 Base.getBaseInfo(), Base.getTBAAInfo()); 4611 // NumDeps = deps[i].base_addr; 4612 LValue BaseAddrLVal = CGF.EmitLValueForField( 4613 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4614 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4615 return std::make_pair(NumDeps, Base); 4616 } 4617 4618 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4619 llvm::PointerUnion<unsigned *, LValue *> Pos, 4620 const OMPTaskDataTy::DependData &Data, 4621 Address DependenciesArray) { 4622 CodeGenModule &CGM = CGF.CGM; 4623 ASTContext &C = CGM.getContext(); 4624 QualType FlagsTy; 4625 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4626 RecordDecl *KmpDependInfoRD = 4627 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4628 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4629 4630 OMPIteratorGeneratorScope IteratorScope( 4631 CGF, cast_or_null<OMPIteratorExpr>( 4632 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4633 : nullptr)); 4634 for (const Expr *E : Data.DepExprs) { 4635 llvm::Value *Addr; 4636 llvm::Value *Size; 4637 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4638 LValue Base; 4639 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4640 Base = CGF.MakeAddrLValue( 4641 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4642 } else { 4643 LValue &PosLVal = *Pos.get<LValue *>(); 4644 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4645 Base = CGF.MakeAddrLValue( 4646 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4647 DependenciesArray.getAlignment()), 4648 KmpDependInfoTy); 4649 } 4650 // deps[i].base_addr = &<Dependencies[i].second>; 4651 LValue BaseAddrLVal = CGF.EmitLValueForField( 4652 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4653 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4654 BaseAddrLVal); 4655 // deps[i].len = sizeof(<Dependencies[i].second>); 4656 LValue LenLVal = CGF.EmitLValueForField( 4657 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4658 CGF.EmitStoreOfScalar(Size, LenLVal); 4659 // deps[i].flags = <Dependencies[i].first>; 4660 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4661 LValue FlagsLVal = CGF.EmitLValueForField( 4662 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4663 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4664 FlagsLVal); 4665 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4666 ++(*P); 4667 } else { 4668 LValue &PosLVal = *Pos.get<LValue *>(); 4669 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4670 Idx = CGF.Builder.CreateNUWAdd(Idx, 4671 llvm::ConstantInt::get(Idx->getType(), 1)); 4672 CGF.EmitStoreOfScalar(Idx, PosLVal); 4673 } 4674 } 4675 } 4676 4677 static SmallVector<llvm::Value *, 4> 4678 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4679 const OMPTaskDataTy::DependData &Data) { 4680 assert(Data.DepKind == OMPC_DEPEND_depobj && 4681 "Expected depobj dependecy kind."); 4682 SmallVector<llvm::Value *, 4> Sizes; 4683 SmallVector<LValue, 4> SizeLVals; 4684 ASTContext &C = CGF.getContext(); 4685 QualType FlagsTy; 4686 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4687 RecordDecl *KmpDependInfoRD = 4688 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4689 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4690 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4691 { 4692 OMPIteratorGeneratorScope IteratorScope( 4693 CGF, cast_or_null<OMPIteratorExpr>( 4694 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4695 : nullptr)); 4696 for (const Expr *E : Data.DepExprs) { 4697 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4698 LValue Base = CGF.EmitLoadOfPointerLValue( 4699 DepobjLVal.getAddress(CGF), 4700 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4701 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4702 Base.getAddress(CGF), KmpDependInfoPtrT); 4703 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4704 Base.getTBAAInfo()); 4705 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4706 Addr.getPointer(), 4707 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4708 LValue NumDepsBase = CGF.MakeAddrLValue( 4709 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4710 Base.getBaseInfo(), Base.getTBAAInfo()); 4711 // NumDeps = deps[i].base_addr; 4712 LValue BaseAddrLVal = CGF.EmitLValueForField( 4713 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4714 llvm::Value *NumDeps = 4715 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4716 LValue NumLVal = CGF.MakeAddrLValue( 4717 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4718 C.getUIntPtrType()); 4719 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4720 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4721 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4722 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4723 CGF.EmitStoreOfScalar(Add, NumLVal); 4724 SizeLVals.push_back(NumLVal); 4725 } 4726 } 4727 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4728 llvm::Value *Size = 4729 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4730 Sizes.push_back(Size); 4731 } 4732 return Sizes; 4733 } 4734 4735 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4736 LValue PosLVal, 4737 const OMPTaskDataTy::DependData &Data, 4738 Address DependenciesArray) { 4739 assert(Data.DepKind == OMPC_DEPEND_depobj && 4740 "Expected depobj dependecy kind."); 4741 ASTContext &C = CGF.getContext(); 4742 QualType FlagsTy; 4743 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4744 RecordDecl *KmpDependInfoRD = 4745 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4746 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4747 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4748 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4749 { 4750 OMPIteratorGeneratorScope IteratorScope( 4751 CGF, cast_or_null<OMPIteratorExpr>( 4752 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4753 : nullptr)); 4754 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4755 const Expr *E = Data.DepExprs[I]; 4756 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4757 LValue Base = CGF.EmitLoadOfPointerLValue( 4758 DepobjLVal.getAddress(CGF), 4759 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4760 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4761 Base.getAddress(CGF), KmpDependInfoPtrT); 4762 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4763 Base.getTBAAInfo()); 4764 4765 // Get number of elements in a single depobj. 4766 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4767 Addr.getPointer(), 4768 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4769 LValue NumDepsBase = CGF.MakeAddrLValue( 4770 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4771 Base.getBaseInfo(), Base.getTBAAInfo()); 4772 // NumDeps = deps[i].base_addr; 4773 LValue BaseAddrLVal = CGF.EmitLValueForField( 4774 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4775 llvm::Value *NumDeps = 4776 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4777 4778 // memcopy dependency data. 4779 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4780 ElSize, 4781 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4782 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4783 Address DepAddr = 4784 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4785 DependenciesArray.getAlignment()); 4786 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4787 4788 // Increase pos. 4789 // pos += size; 4790 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4791 CGF.EmitStoreOfScalar(Add, PosLVal); 4792 } 4793 } 4794 } 4795 4796 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4797 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4798 SourceLocation Loc) { 4799 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4800 return D.DepExprs.empty(); 4801 })) 4802 return std::make_pair(nullptr, Address::invalid()); 4803 // Process list of dependencies. 4804 ASTContext &C = CGM.getContext(); 4805 Address DependenciesArray = Address::invalid(); 4806 llvm::Value *NumOfElements = nullptr; 4807 unsigned NumDependencies = std::accumulate( 4808 Dependencies.begin(), Dependencies.end(), 0, 4809 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4810 return D.DepKind == OMPC_DEPEND_depobj 4811 ? V 4812 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4813 }); 4814 QualType FlagsTy; 4815 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4816 bool HasDepobjDeps = false; 4817 bool HasRegularWithIterators = false; 4818 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4819 llvm::Value *NumOfRegularWithIterators = 4820 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4821 // Calculate number of depobj dependecies and regular deps with the iterators. 4822 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4823 if (D.DepKind == OMPC_DEPEND_depobj) { 4824 SmallVector<llvm::Value *, 4> Sizes = 4825 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4826 for (llvm::Value *Size : Sizes) { 4827 NumOfDepobjElements = 4828 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4829 } 4830 HasDepobjDeps = true; 4831 continue; 4832 } 4833 // Include number of iterations, if any. 4834 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4835 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4836 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4837 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4838 NumOfRegularWithIterators = 4839 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4840 } 4841 HasRegularWithIterators = true; 4842 continue; 4843 } 4844 } 4845 4846 QualType KmpDependInfoArrayTy; 4847 if (HasDepobjDeps || HasRegularWithIterators) { 4848 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4849 /*isSigned=*/false); 4850 if (HasDepobjDeps) { 4851 NumOfElements = 4852 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4853 } 4854 if (HasRegularWithIterators) { 4855 NumOfElements = 4856 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4857 } 4858 OpaqueValueExpr OVE(Loc, 4859 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4860 VK_RValue); 4861 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4862 RValue::get(NumOfElements)); 4863 KmpDependInfoArrayTy = 4864 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4865 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4866 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4867 // Properly emit variable-sized array. 4868 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4869 ImplicitParamDecl::Other); 4870 CGF.EmitVarDecl(*PD); 4871 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4872 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4873 /*isSigned=*/false); 4874 } else { 4875 KmpDependInfoArrayTy = C.getConstantArrayType( 4876 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4877 ArrayType::Normal, /*IndexTypeQuals=*/0); 4878 DependenciesArray = 4879 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4880 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4881 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4882 /*isSigned=*/false); 4883 } 4884 unsigned Pos = 0; 4885 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4886 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4887 Dependencies[I].IteratorExpr) 4888 continue; 4889 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4890 DependenciesArray); 4891 } 4892 // Copy regular dependecies with iterators. 4893 LValue PosLVal = CGF.MakeAddrLValue( 4894 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4895 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4896 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4897 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4898 !Dependencies[I].IteratorExpr) 4899 continue; 4900 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4901 DependenciesArray); 4902 } 4903 // Copy final depobj arrays without iterators. 4904 if (HasDepobjDeps) { 4905 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4906 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4907 continue; 4908 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4909 DependenciesArray); 4910 } 4911 } 4912 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4913 DependenciesArray, CGF.VoidPtrTy); 4914 return std::make_pair(NumOfElements, DependenciesArray); 4915 } 4916 4917 Address CGOpenMPRuntime::emitDepobjDependClause( 4918 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4919 SourceLocation Loc) { 4920 if (Dependencies.DepExprs.empty()) 4921 return Address::invalid(); 4922 // Process list of dependencies. 4923 ASTContext &C = CGM.getContext(); 4924 Address DependenciesArray = Address::invalid(); 4925 unsigned NumDependencies = Dependencies.DepExprs.size(); 4926 QualType FlagsTy; 4927 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4928 RecordDecl *KmpDependInfoRD = 4929 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4930 4931 llvm::Value *Size; 4932 // Define type kmp_depend_info[<Dependencies.size()>]; 4933 // For depobj reserve one extra element to store the number of elements. 4934 // It is required to handle depobj(x) update(in) construct. 4935 // kmp_depend_info[<Dependencies.size()>] deps; 4936 llvm::Value *NumDepsVal; 4937 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4938 if (const auto *IE = 4939 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4940 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4941 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4942 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4943 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4944 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4945 } 4946 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4947 NumDepsVal); 4948 CharUnits SizeInBytes = 4949 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4950 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4951 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4952 NumDepsVal = 4953 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4954 } else { 4955 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4956 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4957 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4958 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4959 Size = CGM.getSize(Sz.alignTo(Align)); 4960 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4961 } 4962 // Need to allocate on the dynamic memory. 4963 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4964 // Use default allocator. 4965 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4966 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4967 4968 llvm::Value *Addr = 4969 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 4970 CGM.getModule(), OMPRTL___kmpc_alloc), 4971 Args, ".dep.arr.addr"); 4972 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4973 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4974 DependenciesArray = Address(Addr, Align); 4975 // Write number of elements in the first element of array for depobj. 4976 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4977 // deps[i].base_addr = NumDependencies; 4978 LValue BaseAddrLVal = CGF.EmitLValueForField( 4979 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4980 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4981 llvm::PointerUnion<unsigned *, LValue *> Pos; 4982 unsigned Idx = 1; 4983 LValue PosLVal; 4984 if (Dependencies.IteratorExpr) { 4985 PosLVal = CGF.MakeAddrLValue( 4986 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4987 C.getSizeType()); 4988 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4989 /*IsInit=*/true); 4990 Pos = &PosLVal; 4991 } else { 4992 Pos = &Idx; 4993 } 4994 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4995 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4996 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4997 return DependenciesArray; 4998 } 4999 5000 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5001 SourceLocation Loc) { 5002 ASTContext &C = CGM.getContext(); 5003 QualType FlagsTy; 5004 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5005 LValue Base = CGF.EmitLoadOfPointerLValue( 5006 DepobjLVal.getAddress(CGF), 5007 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5008 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5009 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5010 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5011 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5012 Addr.getPointer(), 5013 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5014 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5015 CGF.VoidPtrTy); 5016 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5017 // Use default allocator. 5018 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5019 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5020 5021 // _kmpc_free(gtid, addr, nullptr); 5022 (void)CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5023 CGM.getModule(), OMPRTL___kmpc_free), 5024 Args); 5025 } 5026 5027 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5028 OpenMPDependClauseKind NewDepKind, 5029 SourceLocation Loc) { 5030 ASTContext &C = CGM.getContext(); 5031 QualType FlagsTy; 5032 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5033 RecordDecl *KmpDependInfoRD = 5034 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5035 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5036 llvm::Value *NumDeps; 5037 LValue Base; 5038 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5039 5040 Address Begin = Base.getAddress(CGF); 5041 // Cast from pointer to array type to pointer to single element. 5042 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5043 // The basic structure here is a while-do loop. 5044 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5045 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5046 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5047 CGF.EmitBlock(BodyBB); 5048 llvm::PHINode *ElementPHI = 5049 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5050 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5051 Begin = Address(ElementPHI, Begin.getAlignment()); 5052 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5053 Base.getTBAAInfo()); 5054 // deps[i].flags = NewDepKind; 5055 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5056 LValue FlagsLVal = CGF.EmitLValueForField( 5057 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5058 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5059 FlagsLVal); 5060 5061 // Shift the address forward by one element. 5062 Address ElementNext = 5063 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5064 ElementPHI->addIncoming(ElementNext.getPointer(), 5065 CGF.Builder.GetInsertBlock()); 5066 llvm::Value *IsEmpty = 5067 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5068 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5069 // Done. 5070 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5071 } 5072 5073 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5074 const OMPExecutableDirective &D, 5075 llvm::Function *TaskFunction, 5076 QualType SharedsTy, Address Shareds, 5077 const Expr *IfCond, 5078 const OMPTaskDataTy &Data) { 5079 if (!CGF.HaveInsertPoint()) 5080 return; 5081 5082 TaskResultTy Result = 5083 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5084 llvm::Value *NewTask = Result.NewTask; 5085 llvm::Function *TaskEntry = Result.TaskEntry; 5086 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5087 LValue TDBase = Result.TDBase; 5088 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5089 // Process list of dependences. 5090 Address DependenciesArray = Address::invalid(); 5091 llvm::Value *NumOfElements; 5092 std::tie(NumOfElements, DependenciesArray) = 5093 emitDependClause(CGF, Data.Dependences, Loc); 5094 5095 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5096 // libcall. 5097 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5098 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5099 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5100 // list is not empty 5101 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5102 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5103 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5104 llvm::Value *DepTaskArgs[7]; 5105 if (!Data.Dependences.empty()) { 5106 DepTaskArgs[0] = UpLoc; 5107 DepTaskArgs[1] = ThreadID; 5108 DepTaskArgs[2] = NewTask; 5109 DepTaskArgs[3] = NumOfElements; 5110 DepTaskArgs[4] = DependenciesArray.getPointer(); 5111 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5112 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5113 } 5114 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5115 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5116 if (!Data.Tied) { 5117 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5118 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5119 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5120 } 5121 if (!Data.Dependences.empty()) { 5122 CGF.EmitRuntimeCall( 5123 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5124 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5125 DepTaskArgs); 5126 } else { 5127 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5128 CGM.getModule(), OMPRTL___kmpc_omp_task), 5129 TaskArgs); 5130 } 5131 // Check if parent region is untied and build return for untied task; 5132 if (auto *Region = 5133 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5134 Region->emitUntiedSwitch(CGF); 5135 }; 5136 5137 llvm::Value *DepWaitTaskArgs[6]; 5138 if (!Data.Dependences.empty()) { 5139 DepWaitTaskArgs[0] = UpLoc; 5140 DepWaitTaskArgs[1] = ThreadID; 5141 DepWaitTaskArgs[2] = NumOfElements; 5142 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5143 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5144 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5145 } 5146 auto &M = CGM.getModule(); 5147 auto &&ElseCodeGen = [&M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5148 &Data, &DepWaitTaskArgs, 5149 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5150 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5151 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5152 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5153 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5154 // is specified. 5155 if (!Data.Dependences.empty()) 5156 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5157 M, OMPRTL___kmpc_omp_wait_deps), 5158 DepWaitTaskArgs); 5159 // Call proxy_task_entry(gtid, new_task); 5160 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5161 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5162 Action.Enter(CGF); 5163 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5164 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5165 OutlinedFnArgs); 5166 }; 5167 5168 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5169 // kmp_task_t *new_task); 5170 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5171 // kmp_task_t *new_task); 5172 RegionCodeGenTy RCG(CodeGen); 5173 CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5174 M, OMPRTL___kmpc_omp_task_begin_if0), 5175 TaskArgs, 5176 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5177 M, OMPRTL___kmpc_omp_task_complete_if0), 5178 TaskArgs); 5179 RCG.setAction(Action); 5180 RCG(CGF); 5181 }; 5182 5183 if (IfCond) { 5184 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5185 } else { 5186 RegionCodeGenTy ThenRCG(ThenCodeGen); 5187 ThenRCG(CGF); 5188 } 5189 } 5190 5191 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5192 const OMPLoopDirective &D, 5193 llvm::Function *TaskFunction, 5194 QualType SharedsTy, Address Shareds, 5195 const Expr *IfCond, 5196 const OMPTaskDataTy &Data) { 5197 if (!CGF.HaveInsertPoint()) 5198 return; 5199 TaskResultTy Result = 5200 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5201 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5202 // libcall. 5203 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5204 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5205 // sched, kmp_uint64 grainsize, void *task_dup); 5206 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5207 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5208 llvm::Value *IfVal; 5209 if (IfCond) { 5210 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5211 /*isSigned=*/true); 5212 } else { 5213 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5214 } 5215 5216 LValue LBLVal = CGF.EmitLValueForField( 5217 Result.TDBase, 5218 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5219 const auto *LBVar = 5220 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5221 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5222 LBLVal.getQuals(), 5223 /*IsInitializer=*/true); 5224 LValue UBLVal = CGF.EmitLValueForField( 5225 Result.TDBase, 5226 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5227 const auto *UBVar = 5228 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5229 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5230 UBLVal.getQuals(), 5231 /*IsInitializer=*/true); 5232 LValue StLVal = CGF.EmitLValueForField( 5233 Result.TDBase, 5234 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5235 const auto *StVar = 5236 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5237 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5238 StLVal.getQuals(), 5239 /*IsInitializer=*/true); 5240 // Store reductions address. 5241 LValue RedLVal = CGF.EmitLValueForField( 5242 Result.TDBase, 5243 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5244 if (Data.Reductions) { 5245 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5246 } else { 5247 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5248 CGF.getContext().VoidPtrTy); 5249 } 5250 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5251 llvm::Value *TaskArgs[] = { 5252 UpLoc, 5253 ThreadID, 5254 Result.NewTask, 5255 IfVal, 5256 LBLVal.getPointer(CGF), 5257 UBLVal.getPointer(CGF), 5258 CGF.EmitLoadOfScalar(StLVal, Loc), 5259 llvm::ConstantInt::getSigned( 5260 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5261 llvm::ConstantInt::getSigned( 5262 CGF.IntTy, Data.Schedule.getPointer() 5263 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5264 : NoSchedule), 5265 Data.Schedule.getPointer() 5266 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5267 /*isSigned=*/false) 5268 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5269 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5270 Result.TaskDupFn, CGF.VoidPtrTy) 5271 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5272 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5273 CGM.getModule(), OMPRTL___kmpc_taskloop), 5274 TaskArgs); 5275 } 5276 5277 /// Emit reduction operation for each element of array (required for 5278 /// array sections) LHS op = RHS. 5279 /// \param Type Type of array. 5280 /// \param LHSVar Variable on the left side of the reduction operation 5281 /// (references element of array in original variable). 5282 /// \param RHSVar Variable on the right side of the reduction operation 5283 /// (references element of array in original variable). 5284 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5285 /// RHSVar. 5286 static void EmitOMPAggregateReduction( 5287 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5288 const VarDecl *RHSVar, 5289 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5290 const Expr *, const Expr *)> &RedOpGen, 5291 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5292 const Expr *UpExpr = nullptr) { 5293 // Perform element-by-element initialization. 5294 QualType ElementTy; 5295 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5296 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5297 5298 // Drill down to the base element type on both arrays. 5299 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5300 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5301 5302 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5303 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5304 // Cast from pointer to array type to pointer to single element. 5305 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5306 // The basic structure here is a while-do loop. 5307 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5308 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5309 llvm::Value *IsEmpty = 5310 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5311 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5312 5313 // Enter the loop body, making that address the current address. 5314 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5315 CGF.EmitBlock(BodyBB); 5316 5317 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5318 5319 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5320 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5321 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5322 Address RHSElementCurrent = 5323 Address(RHSElementPHI, 5324 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5325 5326 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5327 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5328 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5329 Address LHSElementCurrent = 5330 Address(LHSElementPHI, 5331 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5332 5333 // Emit copy. 5334 CodeGenFunction::OMPPrivateScope Scope(CGF); 5335 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5336 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5337 Scope.Privatize(); 5338 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5339 Scope.ForceCleanup(); 5340 5341 // Shift the address forward by one element. 5342 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5343 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5344 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5345 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5346 // Check whether we've reached the end. 5347 llvm::Value *Done = 5348 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5349 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5350 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5351 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5352 5353 // Done. 5354 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5355 } 5356 5357 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5358 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5359 /// UDR combiner function. 5360 static void emitReductionCombiner(CodeGenFunction &CGF, 5361 const Expr *ReductionOp) { 5362 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5363 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5364 if (const auto *DRE = 5365 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5366 if (const auto *DRD = 5367 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5368 std::pair<llvm::Function *, llvm::Function *> Reduction = 5369 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5370 RValue Func = RValue::get(Reduction.first); 5371 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5372 CGF.EmitIgnoredExpr(ReductionOp); 5373 return; 5374 } 5375 CGF.EmitIgnoredExpr(ReductionOp); 5376 } 5377 5378 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5379 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5380 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5381 ArrayRef<const Expr *> ReductionOps) { 5382 ASTContext &C = CGM.getContext(); 5383 5384 // void reduction_func(void *LHSArg, void *RHSArg); 5385 FunctionArgList Args; 5386 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5387 ImplicitParamDecl::Other); 5388 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5389 ImplicitParamDecl::Other); 5390 Args.push_back(&LHSArg); 5391 Args.push_back(&RHSArg); 5392 const auto &CGFI = 5393 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5394 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5395 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5396 llvm::GlobalValue::InternalLinkage, Name, 5397 &CGM.getModule()); 5398 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5399 Fn->setDoesNotRecurse(); 5400 CodeGenFunction CGF(CGM); 5401 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5402 5403 // Dst = (void*[n])(LHSArg); 5404 // Src = (void*[n])(RHSArg); 5405 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5406 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5407 ArgsType), CGF.getPointerAlign()); 5408 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5410 ArgsType), CGF.getPointerAlign()); 5411 5412 // ... 5413 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5414 // ... 5415 CodeGenFunction::OMPPrivateScope Scope(CGF); 5416 auto IPriv = Privates.begin(); 5417 unsigned Idx = 0; 5418 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5419 const auto *RHSVar = 5420 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5421 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5422 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5423 }); 5424 const auto *LHSVar = 5425 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5426 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5427 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5428 }); 5429 QualType PrivTy = (*IPriv)->getType(); 5430 if (PrivTy->isVariablyModifiedType()) { 5431 // Get array size and emit VLA type. 5432 ++Idx; 5433 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5434 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5435 const VariableArrayType *VLA = 5436 CGF.getContext().getAsVariableArrayType(PrivTy); 5437 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5438 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5439 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5440 CGF.EmitVariablyModifiedType(PrivTy); 5441 } 5442 } 5443 Scope.Privatize(); 5444 IPriv = Privates.begin(); 5445 auto ILHS = LHSExprs.begin(); 5446 auto IRHS = RHSExprs.begin(); 5447 for (const Expr *E : ReductionOps) { 5448 if ((*IPriv)->getType()->isArrayType()) { 5449 // Emit reduction for array section. 5450 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5451 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5452 EmitOMPAggregateReduction( 5453 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5454 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5455 emitReductionCombiner(CGF, E); 5456 }); 5457 } else { 5458 // Emit reduction for array subscript or single variable. 5459 emitReductionCombiner(CGF, E); 5460 } 5461 ++IPriv; 5462 ++ILHS; 5463 ++IRHS; 5464 } 5465 Scope.ForceCleanup(); 5466 CGF.FinishFunction(); 5467 return Fn; 5468 } 5469 5470 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5471 const Expr *ReductionOp, 5472 const Expr *PrivateRef, 5473 const DeclRefExpr *LHS, 5474 const DeclRefExpr *RHS) { 5475 if (PrivateRef->getType()->isArrayType()) { 5476 // Emit reduction for array section. 5477 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5478 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5479 EmitOMPAggregateReduction( 5480 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5481 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5482 emitReductionCombiner(CGF, ReductionOp); 5483 }); 5484 } else { 5485 // Emit reduction for array subscript or single variable. 5486 emitReductionCombiner(CGF, ReductionOp); 5487 } 5488 } 5489 5490 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5491 ArrayRef<const Expr *> Privates, 5492 ArrayRef<const Expr *> LHSExprs, 5493 ArrayRef<const Expr *> RHSExprs, 5494 ArrayRef<const Expr *> ReductionOps, 5495 ReductionOptionsTy Options) { 5496 if (!CGF.HaveInsertPoint()) 5497 return; 5498 5499 bool WithNowait = Options.WithNowait; 5500 bool SimpleReduction = Options.SimpleReduction; 5501 5502 // Next code should be emitted for reduction: 5503 // 5504 // static kmp_critical_name lock = { 0 }; 5505 // 5506 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5507 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5508 // ... 5509 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5510 // *(Type<n>-1*)rhs[<n>-1]); 5511 // } 5512 // 5513 // ... 5514 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5515 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5516 // RedList, reduce_func, &<lock>)) { 5517 // case 1: 5518 // ... 5519 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5520 // ... 5521 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5522 // break; 5523 // case 2: 5524 // ... 5525 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5526 // ... 5527 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5528 // break; 5529 // default:; 5530 // } 5531 // 5532 // if SimpleReduction is true, only the next code is generated: 5533 // ... 5534 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5535 // ... 5536 5537 ASTContext &C = CGM.getContext(); 5538 5539 if (SimpleReduction) { 5540 CodeGenFunction::RunCleanupsScope Scope(CGF); 5541 auto IPriv = Privates.begin(); 5542 auto ILHS = LHSExprs.begin(); 5543 auto IRHS = RHSExprs.begin(); 5544 for (const Expr *E : ReductionOps) { 5545 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5546 cast<DeclRefExpr>(*IRHS)); 5547 ++IPriv; 5548 ++ILHS; 5549 ++IRHS; 5550 } 5551 return; 5552 } 5553 5554 // 1. Build a list of reduction variables. 5555 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5556 auto Size = RHSExprs.size(); 5557 for (const Expr *E : Privates) { 5558 if (E->getType()->isVariablyModifiedType()) 5559 // Reserve place for array size. 5560 ++Size; 5561 } 5562 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5563 QualType ReductionArrayTy = 5564 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5565 /*IndexTypeQuals=*/0); 5566 Address ReductionList = 5567 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5568 auto IPriv = Privates.begin(); 5569 unsigned Idx = 0; 5570 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5571 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5572 CGF.Builder.CreateStore( 5573 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5574 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5575 Elem); 5576 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5577 // Store array size. 5578 ++Idx; 5579 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5580 llvm::Value *Size = CGF.Builder.CreateIntCast( 5581 CGF.getVLASize( 5582 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5583 .NumElts, 5584 CGF.SizeTy, /*isSigned=*/false); 5585 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5586 Elem); 5587 } 5588 } 5589 5590 // 2. Emit reduce_func(). 5591 llvm::Function *ReductionFn = emitReductionFunction( 5592 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5593 LHSExprs, RHSExprs, ReductionOps); 5594 5595 // 3. Create static kmp_critical_name lock = { 0 }; 5596 std::string Name = getName({"reduction"}); 5597 llvm::Value *Lock = getCriticalRegionLock(Name); 5598 5599 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5600 // RedList, reduce_func, &<lock>); 5601 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5602 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5603 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5604 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5605 ReductionList.getPointer(), CGF.VoidPtrTy); 5606 llvm::Value *Args[] = { 5607 IdentTLoc, // ident_t *<loc> 5608 ThreadId, // i32 <gtid> 5609 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5610 ReductionArrayTySize, // size_type sizeof(RedList) 5611 RL, // void *RedList 5612 ReductionFn, // void (*) (void *, void *) <reduce_func> 5613 Lock // kmp_critical_name *&<lock> 5614 }; 5615 llvm::Value *Res = CGF.EmitRuntimeCall( 5616 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5617 CGM.getModule(), 5618 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5619 Args); 5620 5621 // 5. Build switch(res) 5622 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5623 llvm::SwitchInst *SwInst = 5624 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5625 5626 // 6. Build case 1: 5627 // ... 5628 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5629 // ... 5630 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5631 // break; 5632 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5633 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5634 CGF.EmitBlock(Case1BB); 5635 5636 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5637 llvm::Value *EndArgs[] = { 5638 IdentTLoc, // ident_t *<loc> 5639 ThreadId, // i32 <gtid> 5640 Lock // kmp_critical_name *&<lock> 5641 }; 5642 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5643 CodeGenFunction &CGF, PrePostActionTy &Action) { 5644 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5645 auto IPriv = Privates.begin(); 5646 auto ILHS = LHSExprs.begin(); 5647 auto IRHS = RHSExprs.begin(); 5648 for (const Expr *E : ReductionOps) { 5649 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5650 cast<DeclRefExpr>(*IRHS)); 5651 ++IPriv; 5652 ++ILHS; 5653 ++IRHS; 5654 } 5655 }; 5656 RegionCodeGenTy RCG(CodeGen); 5657 CommonActionTy Action( 5658 nullptr, llvm::None, 5659 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5660 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5661 : OMPRTL___kmpc_end_reduce), 5662 EndArgs); 5663 RCG.setAction(Action); 5664 RCG(CGF); 5665 5666 CGF.EmitBranch(DefaultBB); 5667 5668 // 7. Build case 2: 5669 // ... 5670 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5671 // ... 5672 // break; 5673 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5674 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5675 CGF.EmitBlock(Case2BB); 5676 5677 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5678 CodeGenFunction &CGF, PrePostActionTy &Action) { 5679 auto ILHS = LHSExprs.begin(); 5680 auto IRHS = RHSExprs.begin(); 5681 auto IPriv = Privates.begin(); 5682 for (const Expr *E : ReductionOps) { 5683 const Expr *XExpr = nullptr; 5684 const Expr *EExpr = nullptr; 5685 const Expr *UpExpr = nullptr; 5686 BinaryOperatorKind BO = BO_Comma; 5687 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5688 if (BO->getOpcode() == BO_Assign) { 5689 XExpr = BO->getLHS(); 5690 UpExpr = BO->getRHS(); 5691 } 5692 } 5693 // Try to emit update expression as a simple atomic. 5694 const Expr *RHSExpr = UpExpr; 5695 if (RHSExpr) { 5696 // Analyze RHS part of the whole expression. 5697 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5698 RHSExpr->IgnoreParenImpCasts())) { 5699 // If this is a conditional operator, analyze its condition for 5700 // min/max reduction operator. 5701 RHSExpr = ACO->getCond(); 5702 } 5703 if (const auto *BORHS = 5704 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5705 EExpr = BORHS->getRHS(); 5706 BO = BORHS->getOpcode(); 5707 } 5708 } 5709 if (XExpr) { 5710 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5711 auto &&AtomicRedGen = [BO, VD, 5712 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5713 const Expr *EExpr, const Expr *UpExpr) { 5714 LValue X = CGF.EmitLValue(XExpr); 5715 RValue E; 5716 if (EExpr) 5717 E = CGF.EmitAnyExpr(EExpr); 5718 CGF.EmitOMPAtomicSimpleUpdateExpr( 5719 X, E, BO, /*IsXLHSInRHSPart=*/true, 5720 llvm::AtomicOrdering::Monotonic, Loc, 5721 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5722 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5723 PrivateScope.addPrivate( 5724 VD, [&CGF, VD, XRValue, Loc]() { 5725 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5726 CGF.emitOMPSimpleStore( 5727 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5728 VD->getType().getNonReferenceType(), Loc); 5729 return LHSTemp; 5730 }); 5731 (void)PrivateScope.Privatize(); 5732 return CGF.EmitAnyExpr(UpExpr); 5733 }); 5734 }; 5735 if ((*IPriv)->getType()->isArrayType()) { 5736 // Emit atomic reduction for array section. 5737 const auto *RHSVar = 5738 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5739 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5740 AtomicRedGen, XExpr, EExpr, UpExpr); 5741 } else { 5742 // Emit atomic reduction for array subscript or single variable. 5743 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5744 } 5745 } else { 5746 // Emit as a critical region. 5747 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5748 const Expr *, const Expr *) { 5749 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5750 std::string Name = RT.getName({"atomic_reduction"}); 5751 RT.emitCriticalRegion( 5752 CGF, Name, 5753 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5754 Action.Enter(CGF); 5755 emitReductionCombiner(CGF, E); 5756 }, 5757 Loc); 5758 }; 5759 if ((*IPriv)->getType()->isArrayType()) { 5760 const auto *LHSVar = 5761 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5762 const auto *RHSVar = 5763 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5764 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5765 CritRedGen); 5766 } else { 5767 CritRedGen(CGF, nullptr, nullptr, nullptr); 5768 } 5769 } 5770 ++ILHS; 5771 ++IRHS; 5772 ++IPriv; 5773 } 5774 }; 5775 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5776 if (!WithNowait) { 5777 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5778 llvm::Value *EndArgs[] = { 5779 IdentTLoc, // ident_t *<loc> 5780 ThreadId, // i32 <gtid> 5781 Lock // kmp_critical_name *&<lock> 5782 }; 5783 CommonActionTy Action(nullptr, llvm::None, 5784 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 5785 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5786 EndArgs); 5787 AtomicRCG.setAction(Action); 5788 AtomicRCG(CGF); 5789 } else { 5790 AtomicRCG(CGF); 5791 } 5792 5793 CGF.EmitBranch(DefaultBB); 5794 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5795 } 5796 5797 /// Generates unique name for artificial threadprivate variables. 5798 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5799 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5800 const Expr *Ref) { 5801 SmallString<256> Buffer; 5802 llvm::raw_svector_ostream Out(Buffer); 5803 const clang::DeclRefExpr *DE; 5804 const VarDecl *D = ::getBaseDecl(Ref, DE); 5805 if (!D) 5806 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5807 D = D->getCanonicalDecl(); 5808 std::string Name = CGM.getOpenMPRuntime().getName( 5809 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5810 Out << Prefix << Name << "_" 5811 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5812 return std::string(Out.str()); 5813 } 5814 5815 /// Emits reduction initializer function: 5816 /// \code 5817 /// void @.red_init(void* %arg, void* %orig) { 5818 /// %0 = bitcast void* %arg to <type>* 5819 /// store <type> <init>, <type>* %0 5820 /// ret void 5821 /// } 5822 /// \endcode 5823 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5824 SourceLocation Loc, 5825 ReductionCodeGen &RCG, unsigned N) { 5826 ASTContext &C = CGM.getContext(); 5827 QualType VoidPtrTy = C.VoidPtrTy; 5828 VoidPtrTy.addRestrict(); 5829 FunctionArgList Args; 5830 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5831 ImplicitParamDecl::Other); 5832 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5833 ImplicitParamDecl::Other); 5834 Args.emplace_back(&Param); 5835 Args.emplace_back(&ParamOrig); 5836 const auto &FnInfo = 5837 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5838 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5839 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5840 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5841 Name, &CGM.getModule()); 5842 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5843 Fn->setDoesNotRecurse(); 5844 CodeGenFunction CGF(CGM); 5845 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5846 Address PrivateAddr = CGF.EmitLoadOfPointer( 5847 CGF.GetAddrOfLocalVar(&Param), 5848 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5849 llvm::Value *Size = nullptr; 5850 // If the size of the reduction item is non-constant, load it from global 5851 // threadprivate variable. 5852 if (RCG.getSizes(N).second) { 5853 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5854 CGF, CGM.getContext().getSizeType(), 5855 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5856 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5857 CGM.getContext().getSizeType(), Loc); 5858 } 5859 RCG.emitAggregateType(CGF, N, Size); 5860 LValue OrigLVal; 5861 // If initializer uses initializer from declare reduction construct, emit a 5862 // pointer to the address of the original reduction item (reuired by reduction 5863 // initializer) 5864 if (RCG.usesReductionInitializer(N)) { 5865 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5866 SharedAddr = CGF.EmitLoadOfPointer( 5867 SharedAddr, 5868 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5869 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5870 } else { 5871 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5872 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5873 CGM.getContext().VoidPtrTy); 5874 } 5875 // Emit the initializer: 5876 // %0 = bitcast void* %arg to <type>* 5877 // store <type> <init>, <type>* %0 5878 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5879 [](CodeGenFunction &) { return false; }); 5880 CGF.FinishFunction(); 5881 return Fn; 5882 } 5883 5884 /// Emits reduction combiner function: 5885 /// \code 5886 /// void @.red_comb(void* %arg0, void* %arg1) { 5887 /// %lhs = bitcast void* %arg0 to <type>* 5888 /// %rhs = bitcast void* %arg1 to <type>* 5889 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5890 /// store <type> %2, <type>* %lhs 5891 /// ret void 5892 /// } 5893 /// \endcode 5894 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5895 SourceLocation Loc, 5896 ReductionCodeGen &RCG, unsigned N, 5897 const Expr *ReductionOp, 5898 const Expr *LHS, const Expr *RHS, 5899 const Expr *PrivateRef) { 5900 ASTContext &C = CGM.getContext(); 5901 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5902 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5903 FunctionArgList Args; 5904 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5905 C.VoidPtrTy, ImplicitParamDecl::Other); 5906 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5907 ImplicitParamDecl::Other); 5908 Args.emplace_back(&ParamInOut); 5909 Args.emplace_back(&ParamIn); 5910 const auto &FnInfo = 5911 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5912 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5913 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5914 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5915 Name, &CGM.getModule()); 5916 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5917 Fn->setDoesNotRecurse(); 5918 CodeGenFunction CGF(CGM); 5919 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5920 llvm::Value *Size = nullptr; 5921 // If the size of the reduction item is non-constant, load it from global 5922 // threadprivate variable. 5923 if (RCG.getSizes(N).second) { 5924 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5925 CGF, CGM.getContext().getSizeType(), 5926 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5927 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5928 CGM.getContext().getSizeType(), Loc); 5929 } 5930 RCG.emitAggregateType(CGF, N, Size); 5931 // Remap lhs and rhs variables to the addresses of the function arguments. 5932 // %lhs = bitcast void* %arg0 to <type>* 5933 // %rhs = bitcast void* %arg1 to <type>* 5934 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5935 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5936 // Pull out the pointer to the variable. 5937 Address PtrAddr = CGF.EmitLoadOfPointer( 5938 CGF.GetAddrOfLocalVar(&ParamInOut), 5939 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5940 return CGF.Builder.CreateElementBitCast( 5941 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5942 }); 5943 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5944 // Pull out the pointer to the variable. 5945 Address PtrAddr = CGF.EmitLoadOfPointer( 5946 CGF.GetAddrOfLocalVar(&ParamIn), 5947 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5948 return CGF.Builder.CreateElementBitCast( 5949 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5950 }); 5951 PrivateScope.Privatize(); 5952 // Emit the combiner body: 5953 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5954 // store <type> %2, <type>* %lhs 5955 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5956 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5957 cast<DeclRefExpr>(RHS)); 5958 CGF.FinishFunction(); 5959 return Fn; 5960 } 5961 5962 /// Emits reduction finalizer function: 5963 /// \code 5964 /// void @.red_fini(void* %arg) { 5965 /// %0 = bitcast void* %arg to <type>* 5966 /// <destroy>(<type>* %0) 5967 /// ret void 5968 /// } 5969 /// \endcode 5970 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5971 SourceLocation Loc, 5972 ReductionCodeGen &RCG, unsigned N) { 5973 if (!RCG.needCleanups(N)) 5974 return nullptr; 5975 ASTContext &C = CGM.getContext(); 5976 FunctionArgList Args; 5977 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5978 ImplicitParamDecl::Other); 5979 Args.emplace_back(&Param); 5980 const auto &FnInfo = 5981 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5982 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5983 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5984 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5985 Name, &CGM.getModule()); 5986 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5987 Fn->setDoesNotRecurse(); 5988 CodeGenFunction CGF(CGM); 5989 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5990 Address PrivateAddr = CGF.EmitLoadOfPointer( 5991 CGF.GetAddrOfLocalVar(&Param), 5992 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5993 llvm::Value *Size = nullptr; 5994 // If the size of the reduction item is non-constant, load it from global 5995 // threadprivate variable. 5996 if (RCG.getSizes(N).second) { 5997 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5998 CGF, CGM.getContext().getSizeType(), 5999 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6000 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6001 CGM.getContext().getSizeType(), Loc); 6002 } 6003 RCG.emitAggregateType(CGF, N, Size); 6004 // Emit the finalizer body: 6005 // <destroy>(<type>* %0) 6006 RCG.emitCleanups(CGF, N, PrivateAddr); 6007 CGF.FinishFunction(Loc); 6008 return Fn; 6009 } 6010 6011 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6012 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6013 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6014 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6015 return nullptr; 6016 6017 // Build typedef struct: 6018 // kmp_taskred_input { 6019 // void *reduce_shar; // shared reduction item 6020 // void *reduce_orig; // original reduction item used for initialization 6021 // size_t reduce_size; // size of data item 6022 // void *reduce_init; // data initialization routine 6023 // void *reduce_fini; // data finalization routine 6024 // void *reduce_comb; // data combiner routine 6025 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6026 // } kmp_taskred_input_t; 6027 ASTContext &C = CGM.getContext(); 6028 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6029 RD->startDefinition(); 6030 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6031 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6032 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6033 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6034 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6035 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6036 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6037 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6038 RD->completeDefinition(); 6039 QualType RDType = C.getRecordType(RD); 6040 unsigned Size = Data.ReductionVars.size(); 6041 llvm::APInt ArraySize(/*numBits=*/64, Size); 6042 QualType ArrayRDType = C.getConstantArrayType( 6043 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6044 // kmp_task_red_input_t .rd_input.[Size]; 6045 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6046 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6047 Data.ReductionCopies, Data.ReductionOps); 6048 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6049 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6050 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6051 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6052 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6053 TaskRedInput.getPointer(), Idxs, 6054 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6055 ".rd_input.gep."); 6056 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6057 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6058 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6059 RCG.emitSharedOrigLValue(CGF, Cnt); 6060 llvm::Value *CastedShared = 6061 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6062 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6063 // ElemLVal.reduce_orig = &Origs[Cnt]; 6064 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6065 llvm::Value *CastedOrig = 6066 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6067 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6068 RCG.emitAggregateType(CGF, Cnt); 6069 llvm::Value *SizeValInChars; 6070 llvm::Value *SizeVal; 6071 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6072 // We use delayed creation/initialization for VLAs and array sections. It is 6073 // required because runtime does not provide the way to pass the sizes of 6074 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6075 // threadprivate global variables are used to store these values and use 6076 // them in the functions. 6077 bool DelayedCreation = !!SizeVal; 6078 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6079 /*isSigned=*/false); 6080 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6081 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6082 // ElemLVal.reduce_init = init; 6083 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6084 llvm::Value *InitAddr = 6085 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6086 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6087 // ElemLVal.reduce_fini = fini; 6088 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6089 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6090 llvm::Value *FiniAddr = Fini 6091 ? CGF.EmitCastToVoidPtr(Fini) 6092 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6093 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6094 // ElemLVal.reduce_comb = comb; 6095 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6096 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6097 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6098 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6099 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6100 // ElemLVal.flags = 0; 6101 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6102 if (DelayedCreation) { 6103 CGF.EmitStoreOfScalar( 6104 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6105 FlagsLVal); 6106 } else 6107 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6108 FlagsLVal.getType()); 6109 } 6110 if (Data.IsReductionWithTaskMod) { 6111 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6112 // is_ws, int num, void *data); 6113 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6114 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6115 CGM.IntTy, /*isSigned=*/true); 6116 llvm::Value *Args[] = { 6117 IdentTLoc, GTid, 6118 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6119 /*isSigned=*/true), 6120 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6121 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6122 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6123 return CGF.EmitRuntimeCall( 6124 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6125 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6126 Args); 6127 } 6128 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6129 llvm::Value *Args[] = { 6130 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6131 /*isSigned=*/true), 6132 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6133 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6134 CGM.VoidPtrTy)}; 6135 return CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6136 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6137 Args); 6138 } 6139 6140 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6141 SourceLocation Loc, 6142 bool IsWorksharingReduction) { 6143 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6144 // is_ws, int num, void *data); 6145 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6146 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6147 CGM.IntTy, /*isSigned=*/true); 6148 llvm::Value *Args[] = {IdentTLoc, GTid, 6149 llvm::ConstantInt::get(CGM.IntTy, 6150 IsWorksharingReduction ? 1 : 0, 6151 /*isSigned=*/true)}; 6152 (void)CGF.EmitRuntimeCall( 6153 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6154 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6155 Args); 6156 } 6157 6158 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6159 SourceLocation Loc, 6160 ReductionCodeGen &RCG, 6161 unsigned N) { 6162 auto Sizes = RCG.getSizes(N); 6163 // Emit threadprivate global variable if the type is non-constant 6164 // (Sizes.second = nullptr). 6165 if (Sizes.second) { 6166 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6167 /*isSigned=*/false); 6168 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6169 CGF, CGM.getContext().getSizeType(), 6170 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6171 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6172 } 6173 } 6174 6175 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6176 SourceLocation Loc, 6177 llvm::Value *ReductionsPtr, 6178 LValue SharedLVal) { 6179 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6180 // *d); 6181 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6182 CGM.IntTy, 6183 /*isSigned=*/true), 6184 ReductionsPtr, 6185 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6186 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6187 return Address( 6188 CGF.EmitRuntimeCall( 6189 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6190 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6191 Args), 6192 SharedLVal.getAlignment()); 6193 } 6194 6195 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6196 SourceLocation Loc) { 6197 if (!CGF.HaveInsertPoint()) 6198 return; 6199 6200 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6201 if (OMPBuilder) { 6202 OMPBuilder->CreateTaskwait(CGF.Builder); 6203 } else { 6204 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6205 // global_tid); 6206 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6207 // Ignore return result until untied tasks are supported. 6208 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6209 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6210 Args); 6211 } 6212 6213 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6214 Region->emitUntiedSwitch(CGF); 6215 } 6216 6217 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6218 OpenMPDirectiveKind InnerKind, 6219 const RegionCodeGenTy &CodeGen, 6220 bool HasCancel) { 6221 if (!CGF.HaveInsertPoint()) 6222 return; 6223 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6224 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6225 } 6226 6227 namespace { 6228 enum RTCancelKind { 6229 CancelNoreq = 0, 6230 CancelParallel = 1, 6231 CancelLoop = 2, 6232 CancelSections = 3, 6233 CancelTaskgroup = 4 6234 }; 6235 } // anonymous namespace 6236 6237 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6238 RTCancelKind CancelKind = CancelNoreq; 6239 if (CancelRegion == OMPD_parallel) 6240 CancelKind = CancelParallel; 6241 else if (CancelRegion == OMPD_for) 6242 CancelKind = CancelLoop; 6243 else if (CancelRegion == OMPD_sections) 6244 CancelKind = CancelSections; 6245 else { 6246 assert(CancelRegion == OMPD_taskgroup); 6247 CancelKind = CancelTaskgroup; 6248 } 6249 return CancelKind; 6250 } 6251 6252 void CGOpenMPRuntime::emitCancellationPointCall( 6253 CodeGenFunction &CGF, SourceLocation Loc, 6254 OpenMPDirectiveKind CancelRegion) { 6255 if (!CGF.HaveInsertPoint()) 6256 return; 6257 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6258 // global_tid, kmp_int32 cncl_kind); 6259 if (auto *OMPRegionInfo = 6260 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6261 // For 'cancellation point taskgroup', the task region info may not have a 6262 // cancel. This may instead happen in another adjacent task. 6263 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6264 llvm::Value *Args[] = { 6265 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6266 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6267 // Ignore return result until untied tasks are supported. 6268 llvm::Value *Result = CGF.EmitRuntimeCall( 6269 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6270 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6271 Args); 6272 // if (__kmpc_cancellationpoint()) { 6273 // exit from construct; 6274 // } 6275 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6276 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6277 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6278 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6279 CGF.EmitBlock(ExitBB); 6280 // exit from construct; 6281 CodeGenFunction::JumpDest CancelDest = 6282 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6283 CGF.EmitBranchThroughCleanup(CancelDest); 6284 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6285 } 6286 } 6287 } 6288 6289 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6290 const Expr *IfCond, 6291 OpenMPDirectiveKind CancelRegion) { 6292 if (!CGF.HaveInsertPoint()) 6293 return; 6294 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6295 // kmp_int32 cncl_kind); 6296 auto &M = CGM.getModule(); 6297 if (auto *OMPRegionInfo = 6298 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6299 auto &&ThenGen = [&M, Loc, CancelRegion, 6300 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6301 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6302 llvm::Value *Args[] = { 6303 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6304 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6305 // Ignore return result until untied tasks are supported. 6306 llvm::Value *Result = 6307 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6308 M, OMPRTL___kmpc_cancel), 6309 Args); 6310 // if (__kmpc_cancel()) { 6311 // exit from construct; 6312 // } 6313 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6314 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6315 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6316 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6317 CGF.EmitBlock(ExitBB); 6318 // exit from construct; 6319 CodeGenFunction::JumpDest CancelDest = 6320 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6321 CGF.EmitBranchThroughCleanup(CancelDest); 6322 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6323 }; 6324 if (IfCond) { 6325 emitIfClause(CGF, IfCond, ThenGen, 6326 [](CodeGenFunction &, PrePostActionTy &) {}); 6327 } else { 6328 RegionCodeGenTy ThenRCG(ThenGen); 6329 ThenRCG(CGF); 6330 } 6331 } 6332 } 6333 6334 namespace { 6335 /// Cleanup action for uses_allocators support. 6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6337 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6338 6339 public: 6340 OMPUsesAllocatorsActionTy( 6341 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6342 : Allocators(Allocators) {} 6343 void Enter(CodeGenFunction &CGF) override { 6344 if (!CGF.HaveInsertPoint()) 6345 return; 6346 for (const auto &AllocatorData : Allocators) { 6347 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6348 CGF, AllocatorData.first, AllocatorData.second); 6349 } 6350 } 6351 void Exit(CodeGenFunction &CGF) override { 6352 if (!CGF.HaveInsertPoint()) 6353 return; 6354 for (const auto &AllocatorData : Allocators) { 6355 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6356 AllocatorData.first); 6357 } 6358 } 6359 }; 6360 } // namespace 6361 6362 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6363 const OMPExecutableDirective &D, StringRef ParentName, 6364 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6365 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6366 assert(!ParentName.empty() && "Invalid target region parent name!"); 6367 HasEmittedTargetRegion = true; 6368 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6369 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6370 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6371 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6372 if (!D.AllocatorTraits) 6373 continue; 6374 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6375 } 6376 } 6377 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6378 CodeGen.setAction(UsesAllocatorAction); 6379 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6380 IsOffloadEntry, CodeGen); 6381 } 6382 6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6384 const Expr *Allocator, 6385 const Expr *AllocatorTraits) { 6386 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6387 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6388 // Use default memspace handle. 6389 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6390 llvm::Value *NumTraits = llvm::ConstantInt::get( 6391 CGF.IntTy, cast<ConstantArrayType>( 6392 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6393 ->getSize() 6394 .getLimitedValue()); 6395 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6396 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6397 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6398 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6399 AllocatorTraitsLVal.getBaseInfo(), 6400 AllocatorTraitsLVal.getTBAAInfo()); 6401 llvm::Value *Traits = 6402 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6403 6404 llvm::Value *AllocatorVal = 6405 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6406 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6407 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6408 // Store to allocator. 6409 CGF.EmitVarDecl(*cast<VarDecl>( 6410 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6411 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6412 AllocatorVal = 6413 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6414 Allocator->getType(), Allocator->getExprLoc()); 6415 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6416 } 6417 6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6419 const Expr *Allocator) { 6420 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6421 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6422 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6423 llvm::Value *AllocatorVal = 6424 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6425 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6426 CGF.getContext().VoidPtrTy, 6427 Allocator->getExprLoc()); 6428 (void)CGF.EmitRuntimeCall( 6429 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 6430 CGM.getModule(), OMPRTL___kmpc_destroy_allocator), 6431 {ThreadId, AllocatorVal}); 6432 } 6433 6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6435 const OMPExecutableDirective &D, StringRef ParentName, 6436 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6437 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6438 // Create a unique name for the entry function using the source location 6439 // information of the current target region. The name will be something like: 6440 // 6441 // __omp_offloading_DD_FFFF_PP_lBB 6442 // 6443 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6444 // mangled name of the function that encloses the target region and BB is the 6445 // line number of the target region. 6446 6447 unsigned DeviceID; 6448 unsigned FileID; 6449 unsigned Line; 6450 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6451 Line); 6452 SmallString<64> EntryFnName; 6453 { 6454 llvm::raw_svector_ostream OS(EntryFnName); 6455 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6456 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6457 } 6458 6459 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6460 6461 CodeGenFunction CGF(CGM, true); 6462 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6463 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6464 6465 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6466 6467 // If this target outline function is not an offload entry, we don't need to 6468 // register it. 6469 if (!IsOffloadEntry) 6470 return; 6471 6472 // The target region ID is used by the runtime library to identify the current 6473 // target region, so it only has to be unique and not necessarily point to 6474 // anything. It could be the pointer to the outlined function that implements 6475 // the target region, but we aren't using that so that the compiler doesn't 6476 // need to keep that, and could therefore inline the host function if proven 6477 // worthwhile during optimization. In the other hand, if emitting code for the 6478 // device, the ID has to be the function address so that it can retrieved from 6479 // the offloading entry and launched by the runtime library. We also mark the 6480 // outlined function to have external linkage in case we are emitting code for 6481 // the device, because these functions will be entry points to the device. 6482 6483 if (CGM.getLangOpts().OpenMPIsDevice) { 6484 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6485 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6486 OutlinedFn->setDSOLocal(false); 6487 } else { 6488 std::string Name = getName({EntryFnName, "region_id"}); 6489 OutlinedFnID = new llvm::GlobalVariable( 6490 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6491 llvm::GlobalValue::WeakAnyLinkage, 6492 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6493 } 6494 6495 // Register the information for the entry associated with this target region. 6496 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6497 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6498 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6499 } 6500 6501 /// Checks if the expression is constant or does not have non-trivial function 6502 /// calls. 6503 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6504 // We can skip constant expressions. 6505 // We can skip expressions with trivial calls or simple expressions. 6506 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6507 !E->hasNonTrivialCall(Ctx)) && 6508 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6509 } 6510 6511 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6512 const Stmt *Body) { 6513 const Stmt *Child = Body->IgnoreContainers(); 6514 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6515 Child = nullptr; 6516 for (const Stmt *S : C->body()) { 6517 if (const auto *E = dyn_cast<Expr>(S)) { 6518 if (isTrivial(Ctx, E)) 6519 continue; 6520 } 6521 // Some of the statements can be ignored. 6522 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6523 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6524 continue; 6525 // Analyze declarations. 6526 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6527 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6528 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6529 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6530 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6531 isa<UsingDirectiveDecl>(D) || 6532 isa<OMPDeclareReductionDecl>(D) || 6533 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6534 return true; 6535 const auto *VD = dyn_cast<VarDecl>(D); 6536 if (!VD) 6537 return false; 6538 return VD->isConstexpr() || 6539 ((VD->getType().isTrivialType(Ctx) || 6540 VD->getType()->isReferenceType()) && 6541 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6542 })) 6543 continue; 6544 } 6545 // Found multiple children - cannot get the one child only. 6546 if (Child) 6547 return nullptr; 6548 Child = S; 6549 } 6550 if (Child) 6551 Child = Child->IgnoreContainers(); 6552 } 6553 return Child; 6554 } 6555 6556 /// Emit the number of teams for a target directive. Inspect the num_teams 6557 /// clause associated with a teams construct combined or closely nested 6558 /// with the target directive. 6559 /// 6560 /// Emit a team of size one for directives such as 'target parallel' that 6561 /// have no associated teams construct. 6562 /// 6563 /// Otherwise, return nullptr. 6564 static llvm::Value * 6565 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6566 const OMPExecutableDirective &D) { 6567 assert(!CGF.getLangOpts().OpenMPIsDevice && 6568 "Clauses associated with the teams directive expected to be emitted " 6569 "only for the host!"); 6570 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6571 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6572 "Expected target-based executable directive."); 6573 CGBuilderTy &Bld = CGF.Builder; 6574 switch (DirectiveKind) { 6575 case OMPD_target: { 6576 const auto *CS = D.getInnermostCapturedStmt(); 6577 const auto *Body = 6578 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6579 const Stmt *ChildStmt = 6580 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6581 if (const auto *NestedDir = 6582 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6583 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6584 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6585 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6586 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6587 const Expr *NumTeams = 6588 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6589 llvm::Value *NumTeamsVal = 6590 CGF.EmitScalarExpr(NumTeams, 6591 /*IgnoreResultAssign*/ true); 6592 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6593 /*isSigned=*/true); 6594 } 6595 return Bld.getInt32(0); 6596 } 6597 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6598 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6599 return Bld.getInt32(1); 6600 return Bld.getInt32(0); 6601 } 6602 return nullptr; 6603 } 6604 case OMPD_target_teams: 6605 case OMPD_target_teams_distribute: 6606 case OMPD_target_teams_distribute_simd: 6607 case OMPD_target_teams_distribute_parallel_for: 6608 case OMPD_target_teams_distribute_parallel_for_simd: { 6609 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6610 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6611 const Expr *NumTeams = 6612 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6613 llvm::Value *NumTeamsVal = 6614 CGF.EmitScalarExpr(NumTeams, 6615 /*IgnoreResultAssign*/ true); 6616 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6617 /*isSigned=*/true); 6618 } 6619 return Bld.getInt32(0); 6620 } 6621 case OMPD_target_parallel: 6622 case OMPD_target_parallel_for: 6623 case OMPD_target_parallel_for_simd: 6624 case OMPD_target_simd: 6625 return Bld.getInt32(1); 6626 case OMPD_parallel: 6627 case OMPD_for: 6628 case OMPD_parallel_for: 6629 case OMPD_parallel_master: 6630 case OMPD_parallel_sections: 6631 case OMPD_for_simd: 6632 case OMPD_parallel_for_simd: 6633 case OMPD_cancel: 6634 case OMPD_cancellation_point: 6635 case OMPD_ordered: 6636 case OMPD_threadprivate: 6637 case OMPD_allocate: 6638 case OMPD_task: 6639 case OMPD_simd: 6640 case OMPD_sections: 6641 case OMPD_section: 6642 case OMPD_single: 6643 case OMPD_master: 6644 case OMPD_critical: 6645 case OMPD_taskyield: 6646 case OMPD_barrier: 6647 case OMPD_taskwait: 6648 case OMPD_taskgroup: 6649 case OMPD_atomic: 6650 case OMPD_flush: 6651 case OMPD_depobj: 6652 case OMPD_scan: 6653 case OMPD_teams: 6654 case OMPD_target_data: 6655 case OMPD_target_exit_data: 6656 case OMPD_target_enter_data: 6657 case OMPD_distribute: 6658 case OMPD_distribute_simd: 6659 case OMPD_distribute_parallel_for: 6660 case OMPD_distribute_parallel_for_simd: 6661 case OMPD_teams_distribute: 6662 case OMPD_teams_distribute_simd: 6663 case OMPD_teams_distribute_parallel_for: 6664 case OMPD_teams_distribute_parallel_for_simd: 6665 case OMPD_target_update: 6666 case OMPD_declare_simd: 6667 case OMPD_declare_variant: 6668 case OMPD_begin_declare_variant: 6669 case OMPD_end_declare_variant: 6670 case OMPD_declare_target: 6671 case OMPD_end_declare_target: 6672 case OMPD_declare_reduction: 6673 case OMPD_declare_mapper: 6674 case OMPD_taskloop: 6675 case OMPD_taskloop_simd: 6676 case OMPD_master_taskloop: 6677 case OMPD_master_taskloop_simd: 6678 case OMPD_parallel_master_taskloop: 6679 case OMPD_parallel_master_taskloop_simd: 6680 case OMPD_requires: 6681 case OMPD_unknown: 6682 break; 6683 } 6684 llvm_unreachable("Unexpected directive kind."); 6685 } 6686 6687 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6688 llvm::Value *DefaultThreadLimitVal) { 6689 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6690 CGF.getContext(), CS->getCapturedStmt()); 6691 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6692 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6693 llvm::Value *NumThreads = nullptr; 6694 llvm::Value *CondVal = nullptr; 6695 // Handle if clause. If if clause present, the number of threads is 6696 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6697 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6698 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6699 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6700 const OMPIfClause *IfClause = nullptr; 6701 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6702 if (C->getNameModifier() == OMPD_unknown || 6703 C->getNameModifier() == OMPD_parallel) { 6704 IfClause = C; 6705 break; 6706 } 6707 } 6708 if (IfClause) { 6709 const Expr *Cond = IfClause->getCondition(); 6710 bool Result; 6711 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6712 if (!Result) 6713 return CGF.Builder.getInt32(1); 6714 } else { 6715 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6716 if (const auto *PreInit = 6717 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6718 for (const auto *I : PreInit->decls()) { 6719 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6720 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6721 } else { 6722 CodeGenFunction::AutoVarEmission Emission = 6723 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6724 CGF.EmitAutoVarCleanups(Emission); 6725 } 6726 } 6727 } 6728 CondVal = CGF.EvaluateExprAsBool(Cond); 6729 } 6730 } 6731 } 6732 // Check the value of num_threads clause iff if clause was not specified 6733 // or is not evaluated to false. 6734 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6735 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6736 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6737 const auto *NumThreadsClause = 6738 Dir->getSingleClause<OMPNumThreadsClause>(); 6739 CodeGenFunction::LexicalScope Scope( 6740 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6741 if (const auto *PreInit = 6742 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6743 for (const auto *I : PreInit->decls()) { 6744 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6745 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6746 } else { 6747 CodeGenFunction::AutoVarEmission Emission = 6748 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6749 CGF.EmitAutoVarCleanups(Emission); 6750 } 6751 } 6752 } 6753 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6754 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6755 /*isSigned=*/false); 6756 if (DefaultThreadLimitVal) 6757 NumThreads = CGF.Builder.CreateSelect( 6758 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6759 DefaultThreadLimitVal, NumThreads); 6760 } else { 6761 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6762 : CGF.Builder.getInt32(0); 6763 } 6764 // Process condition of the if clause. 6765 if (CondVal) { 6766 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6767 CGF.Builder.getInt32(1)); 6768 } 6769 return NumThreads; 6770 } 6771 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6772 return CGF.Builder.getInt32(1); 6773 return DefaultThreadLimitVal; 6774 } 6775 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6776 : CGF.Builder.getInt32(0); 6777 } 6778 6779 /// Emit the number of threads for a target directive. Inspect the 6780 /// thread_limit clause associated with a teams construct combined or closely 6781 /// nested with the target directive. 6782 /// 6783 /// Emit the num_threads clause for directives such as 'target parallel' that 6784 /// have no associated teams construct. 6785 /// 6786 /// Otherwise, return nullptr. 6787 static llvm::Value * 6788 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6789 const OMPExecutableDirective &D) { 6790 assert(!CGF.getLangOpts().OpenMPIsDevice && 6791 "Clauses associated with the teams directive expected to be emitted " 6792 "only for the host!"); 6793 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6794 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6795 "Expected target-based executable directive."); 6796 CGBuilderTy &Bld = CGF.Builder; 6797 llvm::Value *ThreadLimitVal = nullptr; 6798 llvm::Value *NumThreadsVal = nullptr; 6799 switch (DirectiveKind) { 6800 case OMPD_target: { 6801 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6802 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6803 return NumThreads; 6804 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6805 CGF.getContext(), CS->getCapturedStmt()); 6806 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6807 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6808 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6809 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6810 const auto *ThreadLimitClause = 6811 Dir->getSingleClause<OMPThreadLimitClause>(); 6812 CodeGenFunction::LexicalScope Scope( 6813 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6814 if (const auto *PreInit = 6815 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6816 for (const auto *I : PreInit->decls()) { 6817 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6818 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6819 } else { 6820 CodeGenFunction::AutoVarEmission Emission = 6821 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6822 CGF.EmitAutoVarCleanups(Emission); 6823 } 6824 } 6825 } 6826 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6827 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6828 ThreadLimitVal = 6829 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6830 } 6831 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6832 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6833 CS = Dir->getInnermostCapturedStmt(); 6834 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6835 CGF.getContext(), CS->getCapturedStmt()); 6836 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6837 } 6838 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6839 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6840 CS = Dir->getInnermostCapturedStmt(); 6841 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6842 return NumThreads; 6843 } 6844 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6845 return Bld.getInt32(1); 6846 } 6847 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6848 } 6849 case OMPD_target_teams: { 6850 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6851 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6852 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6853 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6854 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6855 ThreadLimitVal = 6856 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6857 } 6858 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6859 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6860 return NumThreads; 6861 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6862 CGF.getContext(), CS->getCapturedStmt()); 6863 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6864 if (Dir->getDirectiveKind() == OMPD_distribute) { 6865 CS = Dir->getInnermostCapturedStmt(); 6866 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6867 return NumThreads; 6868 } 6869 } 6870 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6871 } 6872 case OMPD_target_teams_distribute: 6873 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6874 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6875 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6876 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6877 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6878 ThreadLimitVal = 6879 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6880 } 6881 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6882 case OMPD_target_parallel: 6883 case OMPD_target_parallel_for: 6884 case OMPD_target_parallel_for_simd: 6885 case OMPD_target_teams_distribute_parallel_for: 6886 case OMPD_target_teams_distribute_parallel_for_simd: { 6887 llvm::Value *CondVal = nullptr; 6888 // Handle if clause. If if clause present, the number of threads is 6889 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6890 if (D.hasClausesOfKind<OMPIfClause>()) { 6891 const OMPIfClause *IfClause = nullptr; 6892 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6893 if (C->getNameModifier() == OMPD_unknown || 6894 C->getNameModifier() == OMPD_parallel) { 6895 IfClause = C; 6896 break; 6897 } 6898 } 6899 if (IfClause) { 6900 const Expr *Cond = IfClause->getCondition(); 6901 bool Result; 6902 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6903 if (!Result) 6904 return Bld.getInt32(1); 6905 } else { 6906 CodeGenFunction::RunCleanupsScope Scope(CGF); 6907 CondVal = CGF.EvaluateExprAsBool(Cond); 6908 } 6909 } 6910 } 6911 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6912 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6913 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6914 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6915 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6916 ThreadLimitVal = 6917 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6918 } 6919 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6920 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6921 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6922 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6923 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6924 NumThreadsVal = 6925 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6926 ThreadLimitVal = ThreadLimitVal 6927 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6928 ThreadLimitVal), 6929 NumThreadsVal, ThreadLimitVal) 6930 : NumThreadsVal; 6931 } 6932 if (!ThreadLimitVal) 6933 ThreadLimitVal = Bld.getInt32(0); 6934 if (CondVal) 6935 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6936 return ThreadLimitVal; 6937 } 6938 case OMPD_target_teams_distribute_simd: 6939 case OMPD_target_simd: 6940 return Bld.getInt32(1); 6941 case OMPD_parallel: 6942 case OMPD_for: 6943 case OMPD_parallel_for: 6944 case OMPD_parallel_master: 6945 case OMPD_parallel_sections: 6946 case OMPD_for_simd: 6947 case OMPD_parallel_for_simd: 6948 case OMPD_cancel: 6949 case OMPD_cancellation_point: 6950 case OMPD_ordered: 6951 case OMPD_threadprivate: 6952 case OMPD_allocate: 6953 case OMPD_task: 6954 case OMPD_simd: 6955 case OMPD_sections: 6956 case OMPD_section: 6957 case OMPD_single: 6958 case OMPD_master: 6959 case OMPD_critical: 6960 case OMPD_taskyield: 6961 case OMPD_barrier: 6962 case OMPD_taskwait: 6963 case OMPD_taskgroup: 6964 case OMPD_atomic: 6965 case OMPD_flush: 6966 case OMPD_depobj: 6967 case OMPD_scan: 6968 case OMPD_teams: 6969 case OMPD_target_data: 6970 case OMPD_target_exit_data: 6971 case OMPD_target_enter_data: 6972 case OMPD_distribute: 6973 case OMPD_distribute_simd: 6974 case OMPD_distribute_parallel_for: 6975 case OMPD_distribute_parallel_for_simd: 6976 case OMPD_teams_distribute: 6977 case OMPD_teams_distribute_simd: 6978 case OMPD_teams_distribute_parallel_for: 6979 case OMPD_teams_distribute_parallel_for_simd: 6980 case OMPD_target_update: 6981 case OMPD_declare_simd: 6982 case OMPD_declare_variant: 6983 case OMPD_begin_declare_variant: 6984 case OMPD_end_declare_variant: 6985 case OMPD_declare_target: 6986 case OMPD_end_declare_target: 6987 case OMPD_declare_reduction: 6988 case OMPD_declare_mapper: 6989 case OMPD_taskloop: 6990 case OMPD_taskloop_simd: 6991 case OMPD_master_taskloop: 6992 case OMPD_master_taskloop_simd: 6993 case OMPD_parallel_master_taskloop: 6994 case OMPD_parallel_master_taskloop_simd: 6995 case OMPD_requires: 6996 case OMPD_unknown: 6997 break; 6998 } 6999 llvm_unreachable("Unsupported directive kind."); 7000 } 7001 7002 namespace { 7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7004 7005 // Utility to handle information from clauses associated with a given 7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7007 // It provides a convenient interface to obtain the information and generate 7008 // code for that information. 7009 class MappableExprsHandler { 7010 public: 7011 /// Values for bit flags used to specify the mapping type for 7012 /// offloading. 7013 enum OpenMPOffloadMappingFlags : uint64_t { 7014 /// No flags 7015 OMP_MAP_NONE = 0x0, 7016 /// Allocate memory on the device and move data from host to device. 7017 OMP_MAP_TO = 0x01, 7018 /// Allocate memory on the device and move data from device to host. 7019 OMP_MAP_FROM = 0x02, 7020 /// Always perform the requested mapping action on the element, even 7021 /// if it was already mapped before. 7022 OMP_MAP_ALWAYS = 0x04, 7023 /// Delete the element from the device environment, ignoring the 7024 /// current reference count associated with the element. 7025 OMP_MAP_DELETE = 0x08, 7026 /// The element being mapped is a pointer-pointee pair; both the 7027 /// pointer and the pointee should be mapped. 7028 OMP_MAP_PTR_AND_OBJ = 0x10, 7029 /// This flags signals that the base address of an entry should be 7030 /// passed to the target kernel as an argument. 7031 OMP_MAP_TARGET_PARAM = 0x20, 7032 /// Signal that the runtime library has to return the device pointer 7033 /// in the current position for the data being mapped. Used when we have the 7034 /// use_device_ptr or use_device_addr clause. 7035 OMP_MAP_RETURN_PARAM = 0x40, 7036 /// This flag signals that the reference being passed is a pointer to 7037 /// private data. 7038 OMP_MAP_PRIVATE = 0x80, 7039 /// Pass the element to the device by value. 7040 OMP_MAP_LITERAL = 0x100, 7041 /// Implicit map 7042 OMP_MAP_IMPLICIT = 0x200, 7043 /// Close is a hint to the runtime to allocate memory close to 7044 /// the target device. 7045 OMP_MAP_CLOSE = 0x400, 7046 /// The 16 MSBs of the flags indicate whether the entry is member of some 7047 /// struct/class. 7048 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7049 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7050 }; 7051 7052 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7053 static unsigned getFlagMemberOffset() { 7054 unsigned Offset = 0; 7055 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7056 Remain = Remain >> 1) 7057 Offset++; 7058 return Offset; 7059 } 7060 7061 /// Class that associates information with a base pointer to be passed to the 7062 /// runtime library. 7063 class BasePointerInfo { 7064 /// The base pointer. 7065 llvm::Value *Ptr = nullptr; 7066 /// The base declaration that refers to this device pointer, or null if 7067 /// there is none. 7068 const ValueDecl *DevPtrDecl = nullptr; 7069 7070 public: 7071 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7072 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7073 llvm::Value *operator*() const { return Ptr; } 7074 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7075 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7076 }; 7077 7078 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7079 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7080 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7081 7082 /// Map between a struct and the its lowest & highest elements which have been 7083 /// mapped. 7084 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7085 /// HE(FieldIndex, Pointer)} 7086 struct StructRangeInfoTy { 7087 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7088 0, Address::invalid()}; 7089 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7090 0, Address::invalid()}; 7091 Address Base = Address::invalid(); 7092 }; 7093 7094 private: 7095 /// Kind that defines how a device pointer has to be returned. 7096 struct MapInfo { 7097 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7098 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7099 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7100 bool ReturnDevicePointer = false; 7101 bool IsImplicit = false; 7102 bool ForDeviceAddr = false; 7103 7104 MapInfo() = default; 7105 MapInfo( 7106 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7107 OpenMPMapClauseKind MapType, 7108 ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, 7109 bool IsImplicit, bool ForDeviceAddr = false) 7110 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7111 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7112 ForDeviceAddr(ForDeviceAddr) {} 7113 }; 7114 7115 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7116 /// member and there is no map information about it, then emission of that 7117 /// entry is deferred until the whole struct has been processed. 7118 struct DeferredDevicePtrEntryTy { 7119 const Expr *IE = nullptr; 7120 const ValueDecl *VD = nullptr; 7121 bool ForDeviceAddr = false; 7122 7123 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7124 bool ForDeviceAddr) 7125 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7126 }; 7127 7128 /// The target directive from where the mappable clauses were extracted. It 7129 /// is either a executable directive or a user-defined mapper directive. 7130 llvm::PointerUnion<const OMPExecutableDirective *, 7131 const OMPDeclareMapperDecl *> 7132 CurDir; 7133 7134 /// Function the directive is being generated for. 7135 CodeGenFunction &CGF; 7136 7137 /// Set of all first private variables in the current directive. 7138 /// bool data is set to true if the variable is implicitly marked as 7139 /// firstprivate, false otherwise. 7140 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7141 7142 /// Map between device pointer declarations and their expression components. 7143 /// The key value for declarations in 'this' is null. 7144 llvm::DenseMap< 7145 const ValueDecl *, 7146 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7147 DevPointersMap; 7148 7149 llvm::Value *getExprTypeSize(const Expr *E) const { 7150 QualType ExprTy = E->getType().getCanonicalType(); 7151 7152 // Calculate the size for array shaping expression. 7153 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7154 llvm::Value *Size = 7155 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7156 for (const Expr *SE : OAE->getDimensions()) { 7157 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7158 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7159 CGF.getContext().getSizeType(), 7160 SE->getExprLoc()); 7161 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7162 } 7163 return Size; 7164 } 7165 7166 // Reference types are ignored for mapping purposes. 7167 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7168 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7169 7170 // Given that an array section is considered a built-in type, we need to 7171 // do the calculation based on the length of the section instead of relying 7172 // on CGF.getTypeSize(E->getType()). 7173 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7174 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7175 OAE->getBase()->IgnoreParenImpCasts()) 7176 .getCanonicalType(); 7177 7178 // If there is no length associated with the expression and lower bound is 7179 // not specified too, that means we are using the whole length of the 7180 // base. 7181 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7182 !OAE->getLowerBound()) 7183 return CGF.getTypeSize(BaseTy); 7184 7185 llvm::Value *ElemSize; 7186 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7187 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7188 } else { 7189 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7190 assert(ATy && "Expecting array type if not a pointer type."); 7191 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7192 } 7193 7194 // If we don't have a length at this point, that is because we have an 7195 // array section with a single element. 7196 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7197 return ElemSize; 7198 7199 if (const Expr *LenExpr = OAE->getLength()) { 7200 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7201 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7202 CGF.getContext().getSizeType(), 7203 LenExpr->getExprLoc()); 7204 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7205 } 7206 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7207 OAE->getLowerBound() && "expected array_section[lb:]."); 7208 // Size = sizetype - lb * elemtype; 7209 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7210 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7211 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7212 CGF.getContext().getSizeType(), 7213 OAE->getLowerBound()->getExprLoc()); 7214 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7215 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7216 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7217 LengthVal = CGF.Builder.CreateSelect( 7218 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7219 return LengthVal; 7220 } 7221 return CGF.getTypeSize(ExprTy); 7222 } 7223 7224 /// Return the corresponding bits for a given map clause modifier. Add 7225 /// a flag marking the map as a pointer if requested. Add a flag marking the 7226 /// map as the first one of a series of maps that relate to the same map 7227 /// expression. 7228 OpenMPOffloadMappingFlags getMapTypeBits( 7229 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7230 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7231 OpenMPOffloadMappingFlags Bits = 7232 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7233 switch (MapType) { 7234 case OMPC_MAP_alloc: 7235 case OMPC_MAP_release: 7236 // alloc and release is the default behavior in the runtime library, i.e. 7237 // if we don't pass any bits alloc/release that is what the runtime is 7238 // going to do. Therefore, we don't need to signal anything for these two 7239 // type modifiers. 7240 break; 7241 case OMPC_MAP_to: 7242 Bits |= OMP_MAP_TO; 7243 break; 7244 case OMPC_MAP_from: 7245 Bits |= OMP_MAP_FROM; 7246 break; 7247 case OMPC_MAP_tofrom: 7248 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7249 break; 7250 case OMPC_MAP_delete: 7251 Bits |= OMP_MAP_DELETE; 7252 break; 7253 case OMPC_MAP_unknown: 7254 llvm_unreachable("Unexpected map type!"); 7255 } 7256 if (AddPtrFlag) 7257 Bits |= OMP_MAP_PTR_AND_OBJ; 7258 if (AddIsTargetParamFlag) 7259 Bits |= OMP_MAP_TARGET_PARAM; 7260 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7261 != MapModifiers.end()) 7262 Bits |= OMP_MAP_ALWAYS; 7263 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7264 != MapModifiers.end()) 7265 Bits |= OMP_MAP_CLOSE; 7266 return Bits; 7267 } 7268 7269 /// Return true if the provided expression is a final array section. A 7270 /// final array section, is one whose length can't be proved to be one. 7271 bool isFinalArraySectionExpression(const Expr *E) const { 7272 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7273 7274 // It is not an array section and therefore not a unity-size one. 7275 if (!OASE) 7276 return false; 7277 7278 // An array section with no colon always refer to a single element. 7279 if (OASE->getColonLoc().isInvalid()) 7280 return false; 7281 7282 const Expr *Length = OASE->getLength(); 7283 7284 // If we don't have a length we have to check if the array has size 1 7285 // for this dimension. Also, we should always expect a length if the 7286 // base type is pointer. 7287 if (!Length) { 7288 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7289 OASE->getBase()->IgnoreParenImpCasts()) 7290 .getCanonicalType(); 7291 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7292 return ATy->getSize().getSExtValue() != 1; 7293 // If we don't have a constant dimension length, we have to consider 7294 // the current section as having any size, so it is not necessarily 7295 // unitary. If it happen to be unity size, that's user fault. 7296 return true; 7297 } 7298 7299 // Check if the length evaluates to 1. 7300 Expr::EvalResult Result; 7301 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7302 return true; // Can have more that size 1. 7303 7304 llvm::APSInt ConstLength = Result.Val.getInt(); 7305 return ConstLength.getSExtValue() != 1; 7306 } 7307 7308 /// Generate the base pointers, section pointers, sizes and map type 7309 /// bits for the provided map type, map modifier, and expression components. 7310 /// \a IsFirstComponent should be set to true if the provided set of 7311 /// components is the first associated with a capture. 7312 void generateInfoForComponentList( 7313 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7314 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7315 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7316 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7317 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7318 bool IsImplicit, bool ForDeviceAddr = false, 7319 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7320 OverlappedElements = llvm::None) const { 7321 // The following summarizes what has to be generated for each map and the 7322 // types below. The generated information is expressed in this order: 7323 // base pointer, section pointer, size, flags 7324 // (to add to the ones that come from the map type and modifier). 7325 // 7326 // double d; 7327 // int i[100]; 7328 // float *p; 7329 // 7330 // struct S1 { 7331 // int i; 7332 // float f[50]; 7333 // } 7334 // struct S2 { 7335 // int i; 7336 // float f[50]; 7337 // S1 s; 7338 // double *p; 7339 // struct S2 *ps; 7340 // } 7341 // S2 s; 7342 // S2 *ps; 7343 // 7344 // map(d) 7345 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7346 // 7347 // map(i) 7348 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7349 // 7350 // map(i[1:23]) 7351 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7352 // 7353 // map(p) 7354 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7355 // 7356 // map(p[1:24]) 7357 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7358 // 7359 // map(s) 7360 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7361 // 7362 // map(s.i) 7363 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7364 // 7365 // map(s.s.f) 7366 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7367 // 7368 // map(s.p) 7369 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7370 // 7371 // map(to: s.p[:22]) 7372 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7373 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7374 // &(s.p), &(s.p[0]), 22*sizeof(double), 7375 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7376 // (*) alloc space for struct members, only this is a target parameter 7377 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7378 // optimizes this entry out, same in the examples below) 7379 // (***) map the pointee (map: to) 7380 // 7381 // map(s.ps) 7382 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7383 // 7384 // map(from: s.ps->s.i) 7385 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7386 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7387 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7388 // 7389 // map(to: s.ps->ps) 7390 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7391 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7392 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7393 // 7394 // map(s.ps->ps->ps) 7395 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7396 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7397 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7398 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7399 // 7400 // map(to: s.ps->ps->s.f[:22]) 7401 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7402 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7403 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7404 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7405 // 7406 // map(ps) 7407 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7408 // 7409 // map(ps->i) 7410 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7411 // 7412 // map(ps->s.f) 7413 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7414 // 7415 // map(from: ps->p) 7416 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7417 // 7418 // map(to: ps->p[:22]) 7419 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7420 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7421 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7422 // 7423 // map(ps->ps) 7424 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7425 // 7426 // map(from: ps->ps->s.i) 7427 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7428 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7429 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7430 // 7431 // map(from: ps->ps->ps) 7432 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7433 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7434 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7435 // 7436 // map(ps->ps->ps->ps) 7437 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7438 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7439 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7440 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7441 // 7442 // map(to: ps->ps->ps->s.f[:22]) 7443 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7444 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7445 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7446 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7447 // 7448 // map(to: s.f[:22]) map(from: s.p[:33]) 7449 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7450 // sizeof(double*) (**), TARGET_PARAM 7451 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7452 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7453 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7454 // (*) allocate contiguous space needed to fit all mapped members even if 7455 // we allocate space for members not mapped (in this example, 7456 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7457 // them as well because they fall between &s.f[0] and &s.p) 7458 // 7459 // map(from: s.f[:22]) map(to: ps->p[:33]) 7460 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7461 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7462 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7463 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7464 // (*) the struct this entry pertains to is the 2nd element in the list of 7465 // arguments, hence MEMBER_OF(2) 7466 // 7467 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7468 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7469 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7470 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7471 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7472 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7473 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7474 // (*) the struct this entry pertains to is the 4th element in the list 7475 // of arguments, hence MEMBER_OF(4) 7476 7477 // Track if the map information being generated is the first for a capture. 7478 bool IsCaptureFirstInfo = IsFirstComponentList; 7479 // When the variable is on a declare target link or in a to clause with 7480 // unified memory, a reference is needed to hold the host/device address 7481 // of the variable. 7482 bool RequiresReference = false; 7483 7484 // Scan the components from the base to the complete expression. 7485 auto CI = Components.rbegin(); 7486 auto CE = Components.rend(); 7487 auto I = CI; 7488 7489 // Track if the map information being generated is the first for a list of 7490 // components. 7491 bool IsExpressionFirstInfo = true; 7492 Address BP = Address::invalid(); 7493 const Expr *AssocExpr = I->getAssociatedExpression(); 7494 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7495 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7496 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7497 7498 if (isa<MemberExpr>(AssocExpr)) { 7499 // The base is the 'this' pointer. The content of the pointer is going 7500 // to be the base of the field being mapped. 7501 BP = CGF.LoadCXXThisAddress(); 7502 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7503 (OASE && 7504 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7505 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7506 } else if (OAShE && 7507 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7508 BP = Address( 7509 CGF.EmitScalarExpr(OAShE->getBase()), 7510 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7511 } else { 7512 // The base is the reference to the variable. 7513 // BP = &Var. 7514 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7515 if (const auto *VD = 7516 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7517 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7519 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7520 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7521 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7522 RequiresReference = true; 7523 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7524 } 7525 } 7526 } 7527 7528 // If the variable is a pointer and is being dereferenced (i.e. is not 7529 // the last component), the base has to be the pointer itself, not its 7530 // reference. References are ignored for mapping purposes. 7531 QualType Ty = 7532 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7533 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7534 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7535 7536 // We do not need to generate individual map information for the 7537 // pointer, it can be associated with the combined storage. 7538 ++I; 7539 } 7540 } 7541 7542 // Track whether a component of the list should be marked as MEMBER_OF some 7543 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7544 // in a component list should be marked as MEMBER_OF, all subsequent entries 7545 // do not belong to the base struct. E.g. 7546 // struct S2 s; 7547 // s.ps->ps->ps->f[:] 7548 // (1) (2) (3) (4) 7549 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7550 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7551 // is the pointee of ps(2) which is not member of struct s, so it should not 7552 // be marked as such (it is still PTR_AND_OBJ). 7553 // The variable is initialized to false so that PTR_AND_OBJ entries which 7554 // are not struct members are not considered (e.g. array of pointers to 7555 // data). 7556 bool ShouldBeMemberOf = false; 7557 7558 // Variable keeping track of whether or not we have encountered a component 7559 // in the component list which is a member expression. Useful when we have a 7560 // pointer or a final array section, in which case it is the previous 7561 // component in the list which tells us whether we have a member expression. 7562 // E.g. X.f[:] 7563 // While processing the final array section "[:]" it is "f" which tells us 7564 // whether we are dealing with a member of a declared struct. 7565 const MemberExpr *EncounteredME = nullptr; 7566 7567 for (; I != CE; ++I) { 7568 // If the current component is member of a struct (parent struct) mark it. 7569 if (!EncounteredME) { 7570 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7571 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7572 // as MEMBER_OF the parent struct. 7573 if (EncounteredME) 7574 ShouldBeMemberOf = true; 7575 } 7576 7577 auto Next = std::next(I); 7578 7579 // We need to generate the addresses and sizes if this is the last 7580 // component, if the component is a pointer or if it is an array section 7581 // whose length can't be proved to be one. If this is a pointer, it 7582 // becomes the base address for the following components. 7583 7584 // A final array section, is one whose length can't be proved to be one. 7585 bool IsFinalArraySection = 7586 isFinalArraySectionExpression(I->getAssociatedExpression()); 7587 7588 // Get information on whether the element is a pointer. Have to do a 7589 // special treatment for array sections given that they are built-in 7590 // types. 7591 const auto *OASE = 7592 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7593 const auto *OAShE = 7594 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7595 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7596 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7597 bool IsPointer = 7598 OAShE || 7599 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7600 .getCanonicalType() 7601 ->isAnyPointerType()) || 7602 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7603 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7604 7605 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7606 // If this is not the last component, we expect the pointer to be 7607 // associated with an array expression or member expression. 7608 assert((Next == CE || 7609 isa<MemberExpr>(Next->getAssociatedExpression()) || 7610 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7611 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7612 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7613 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7614 "Unexpected expression"); 7615 7616 Address LB = Address::invalid(); 7617 if (OAShE) { 7618 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7619 CGF.getContext().getTypeAlignInChars( 7620 OAShE->getBase()->getType())); 7621 } else { 7622 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7623 .getAddress(CGF); 7624 } 7625 7626 // If this component is a pointer inside the base struct then we don't 7627 // need to create any entry for it - it will be combined with the object 7628 // it is pointing to into a single PTR_AND_OBJ entry. 7629 bool IsMemberPointerOrAddr = 7630 (IsPointer || ForDeviceAddr) && EncounteredME && 7631 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7632 EncounteredME); 7633 if (!OverlappedElements.empty()) { 7634 // Handle base element with the info for overlapped elements. 7635 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7636 assert(Next == CE && 7637 "Expected last element for the overlapped elements."); 7638 assert(!IsPointer && 7639 "Unexpected base element with the pointer type."); 7640 // Mark the whole struct as the struct that requires allocation on the 7641 // device. 7642 PartialStruct.LowestElem = {0, LB}; 7643 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7644 I->getAssociatedExpression()->getType()); 7645 Address HB = CGF.Builder.CreateConstGEP( 7646 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7647 CGF.VoidPtrTy), 7648 TypeSize.getQuantity() - 1); 7649 PartialStruct.HighestElem = { 7650 std::numeric_limits<decltype( 7651 PartialStruct.HighestElem.first)>::max(), 7652 HB}; 7653 PartialStruct.Base = BP; 7654 // Emit data for non-overlapped data. 7655 OpenMPOffloadMappingFlags Flags = 7656 OMP_MAP_MEMBER_OF | 7657 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7658 /*AddPtrFlag=*/false, 7659 /*AddIsTargetParamFlag=*/false); 7660 LB = BP; 7661 llvm::Value *Size = nullptr; 7662 // Do bitcopy of all non-overlapped structure elements. 7663 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7664 Component : OverlappedElements) { 7665 Address ComponentLB = Address::invalid(); 7666 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7667 Component) { 7668 if (MC.getAssociatedDeclaration()) { 7669 ComponentLB = 7670 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7671 .getAddress(CGF); 7672 Size = CGF.Builder.CreatePtrDiff( 7673 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7674 CGF.EmitCastToVoidPtr(LB.getPointer())); 7675 break; 7676 } 7677 } 7678 BasePointers.push_back(BP.getPointer()); 7679 Pointers.push_back(LB.getPointer()); 7680 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7681 /*isSigned=*/true)); 7682 Types.push_back(Flags); 7683 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7684 } 7685 BasePointers.push_back(BP.getPointer()); 7686 Pointers.push_back(LB.getPointer()); 7687 Size = CGF.Builder.CreatePtrDiff( 7688 CGF.EmitCastToVoidPtr( 7689 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7690 CGF.EmitCastToVoidPtr(LB.getPointer())); 7691 Sizes.push_back( 7692 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7693 Types.push_back(Flags); 7694 break; 7695 } 7696 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7697 if (!IsMemberPointerOrAddr) { 7698 BasePointers.push_back(BP.getPointer()); 7699 Pointers.push_back(LB.getPointer()); 7700 Sizes.push_back( 7701 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7702 7703 // We need to add a pointer flag for each map that comes from the 7704 // same expression except for the first one. We also need to signal 7705 // this map is the first one that relates with the current capture 7706 // (there is a set of entries for each capture). 7707 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7708 MapType, MapModifiers, IsImplicit, 7709 !IsExpressionFirstInfo || RequiresReference, 7710 IsCaptureFirstInfo && !RequiresReference); 7711 7712 if (!IsExpressionFirstInfo) { 7713 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7714 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7715 if (IsPointer) 7716 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7717 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7718 7719 if (ShouldBeMemberOf) { 7720 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7721 // should be later updated with the correct value of MEMBER_OF. 7722 Flags |= OMP_MAP_MEMBER_OF; 7723 // From now on, all subsequent PTR_AND_OBJ entries should not be 7724 // marked as MEMBER_OF. 7725 ShouldBeMemberOf = false; 7726 } 7727 } 7728 7729 Types.push_back(Flags); 7730 } 7731 7732 // If we have encountered a member expression so far, keep track of the 7733 // mapped member. If the parent is "*this", then the value declaration 7734 // is nullptr. 7735 if (EncounteredME) { 7736 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7737 unsigned FieldIndex = FD->getFieldIndex(); 7738 7739 // Update info about the lowest and highest elements for this struct 7740 if (!PartialStruct.Base.isValid()) { 7741 PartialStruct.LowestElem = {FieldIndex, LB}; 7742 if (IsFinalArraySection) { 7743 Address HB = 7744 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7745 .getAddress(CGF); 7746 PartialStruct.HighestElem = {FieldIndex, HB}; 7747 } else { 7748 PartialStruct.HighestElem = {FieldIndex, LB}; 7749 } 7750 PartialStruct.Base = BP; 7751 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7752 PartialStruct.LowestElem = {FieldIndex, LB}; 7753 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7754 PartialStruct.HighestElem = {FieldIndex, LB}; 7755 } 7756 } 7757 7758 // If we have a final array section, we are done with this expression. 7759 if (IsFinalArraySection) 7760 break; 7761 7762 // The pointer becomes the base for the next element. 7763 if (Next != CE) 7764 BP = LB; 7765 7766 IsExpressionFirstInfo = false; 7767 IsCaptureFirstInfo = false; 7768 } 7769 } 7770 } 7771 7772 /// Return the adjusted map modifiers if the declaration a capture refers to 7773 /// appears in a first-private clause. This is expected to be used only with 7774 /// directives that start with 'target'. 7775 MappableExprsHandler::OpenMPOffloadMappingFlags 7776 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7777 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7778 7779 // A first private variable captured by reference will use only the 7780 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7781 // declaration is known as first-private in this handler. 7782 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7783 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7784 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7785 return MappableExprsHandler::OMP_MAP_ALWAYS | 7786 MappableExprsHandler::OMP_MAP_TO; 7787 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7788 return MappableExprsHandler::OMP_MAP_TO | 7789 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7790 return MappableExprsHandler::OMP_MAP_PRIVATE | 7791 MappableExprsHandler::OMP_MAP_TO; 7792 } 7793 return MappableExprsHandler::OMP_MAP_TO | 7794 MappableExprsHandler::OMP_MAP_FROM; 7795 } 7796 7797 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7798 // Rotate by getFlagMemberOffset() bits. 7799 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7800 << getFlagMemberOffset()); 7801 } 7802 7803 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7804 OpenMPOffloadMappingFlags MemberOfFlag) { 7805 // If the entry is PTR_AND_OBJ but has not been marked with the special 7806 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7807 // marked as MEMBER_OF. 7808 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7809 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7810 return; 7811 7812 // Reset the placeholder value to prepare the flag for the assignment of the 7813 // proper MEMBER_OF value. 7814 Flags &= ~OMP_MAP_MEMBER_OF; 7815 Flags |= MemberOfFlag; 7816 } 7817 7818 void getPlainLayout(const CXXRecordDecl *RD, 7819 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7820 bool AsBase) const { 7821 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7822 7823 llvm::StructType *St = 7824 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7825 7826 unsigned NumElements = St->getNumElements(); 7827 llvm::SmallVector< 7828 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7829 RecordLayout(NumElements); 7830 7831 // Fill bases. 7832 for (const auto &I : RD->bases()) { 7833 if (I.isVirtual()) 7834 continue; 7835 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7836 // Ignore empty bases. 7837 if (Base->isEmpty() || CGF.getContext() 7838 .getASTRecordLayout(Base) 7839 .getNonVirtualSize() 7840 .isZero()) 7841 continue; 7842 7843 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7844 RecordLayout[FieldIndex] = Base; 7845 } 7846 // Fill in virtual bases. 7847 for (const auto &I : RD->vbases()) { 7848 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7849 // Ignore empty bases. 7850 if (Base->isEmpty()) 7851 continue; 7852 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7853 if (RecordLayout[FieldIndex]) 7854 continue; 7855 RecordLayout[FieldIndex] = Base; 7856 } 7857 // Fill in all the fields. 7858 assert(!RD->isUnion() && "Unexpected union."); 7859 for (const auto *Field : RD->fields()) { 7860 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7861 // will fill in later.) 7862 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7863 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7864 RecordLayout[FieldIndex] = Field; 7865 } 7866 } 7867 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7868 &Data : RecordLayout) { 7869 if (Data.isNull()) 7870 continue; 7871 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7872 getPlainLayout(Base, Layout, /*AsBase=*/true); 7873 else 7874 Layout.push_back(Data.get<const FieldDecl *>()); 7875 } 7876 } 7877 7878 public: 7879 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7880 : CurDir(&Dir), CGF(CGF) { 7881 // Extract firstprivate clause information. 7882 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7883 for (const auto *D : C->varlists()) 7884 FirstPrivateDecls.try_emplace( 7885 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7886 // Extract implicit firstprivates from uses_allocators clauses. 7887 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7888 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7889 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7890 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7891 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7892 /*Implicit=*/true); 7893 else if (const auto *VD = dyn_cast<VarDecl>( 7894 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7895 ->getDecl())) 7896 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7897 } 7898 } 7899 // Extract device pointer clause information. 7900 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7901 for (auto L : C->component_lists()) 7902 DevPointersMap[L.first].push_back(L.second); 7903 } 7904 7905 /// Constructor for the declare mapper directive. 7906 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7907 : CurDir(&Dir), CGF(CGF) {} 7908 7909 /// Generate code for the combined entry if we have a partially mapped struct 7910 /// and take care of the mapping flags of the arguments corresponding to 7911 /// individual struct members. 7912 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7913 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7914 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7915 const StructRangeInfoTy &PartialStruct) const { 7916 // Base is the base of the struct 7917 BasePointers.push_back(PartialStruct.Base.getPointer()); 7918 // Pointer is the address of the lowest element 7919 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7920 Pointers.push_back(LB); 7921 // Size is (addr of {highest+1} element) - (addr of lowest element) 7922 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7923 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7924 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7925 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7926 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7927 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7928 /*isSigned=*/false); 7929 Sizes.push_back(Size); 7930 // Map type is always TARGET_PARAM 7931 Types.push_back(OMP_MAP_TARGET_PARAM); 7932 // Remove TARGET_PARAM flag from the first element 7933 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7934 7935 // All other current entries will be MEMBER_OF the combined entry 7936 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7937 // 0xFFFF in the MEMBER_OF field). 7938 OpenMPOffloadMappingFlags MemberOfFlag = 7939 getMemberOfFlag(BasePointers.size() - 1); 7940 for (auto &M : CurTypes) 7941 setCorrectMemberOfFlag(M, MemberOfFlag); 7942 } 7943 7944 /// Generate all the base pointers, section pointers, sizes and map 7945 /// types for the extracted mappable expressions. Also, for each item that 7946 /// relates with a device pointer, a pair of the relevant declaration and 7947 /// index where it occurs is appended to the device pointers info array. 7948 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7949 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7950 MapFlagsArrayTy &Types) const { 7951 // We have to process the component lists that relate with the same 7952 // declaration in a single chunk so that we can generate the map flags 7953 // correctly. Therefore, we organize all lists in a map. 7954 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7955 7956 // Helper function to fill the information map for the different supported 7957 // clauses. 7958 auto &&InfoGen = 7959 [&Info](const ValueDecl *D, 7960 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7961 OpenMPMapClauseKind MapType, 7962 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7963 bool ReturnDevicePointer, bool IsImplicit, 7964 bool ForDeviceAddr = false) { 7965 const ValueDecl *VD = 7966 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7967 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7968 IsImplicit, ForDeviceAddr); 7969 }; 7970 7971 assert(CurDir.is<const OMPExecutableDirective *>() && 7972 "Expect a executable directive"); 7973 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7974 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7975 for (const auto L : C->component_lists()) { 7976 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7977 /*ReturnDevicePointer=*/false, C->isImplicit()); 7978 } 7979 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7980 for (const auto L : C->component_lists()) { 7981 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7982 /*ReturnDevicePointer=*/false, C->isImplicit()); 7983 } 7984 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7985 for (const auto L : C->component_lists()) { 7986 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7987 /*ReturnDevicePointer=*/false, C->isImplicit()); 7988 } 7989 7990 // Look at the use_device_ptr clause information and mark the existing map 7991 // entries as such. If there is no map information for an entry in the 7992 // use_device_ptr list, we create one with map type 'alloc' and zero size 7993 // section. It is the user fault if that was not mapped before. If there is 7994 // no map information and the pointer is a struct member, then we defer the 7995 // emission of that entry until the whole struct has been processed. 7996 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7997 DeferredInfo; 7998 7999 for (const auto *C : 8000 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8001 for (const auto L : C->component_lists()) { 8002 assert(!L.second.empty() && "Not expecting empty list of components!"); 8003 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8004 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8005 const Expr *IE = L.second.back().getAssociatedExpression(); 8006 // If the first component is a member expression, we have to look into 8007 // 'this', which maps to null in the map of map information. Otherwise 8008 // look directly for the information. 8009 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8010 8011 // We potentially have map information for this declaration already. 8012 // Look for the first set of components that refer to it. 8013 if (It != Info.end()) { 8014 auto CI = std::find_if( 8015 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8016 return MI.Components.back().getAssociatedDeclaration() == VD; 8017 }); 8018 // If we found a map entry, signal that the pointer has to be returned 8019 // and move on to the next declaration. 8020 if (CI != It->second.end()) { 8021 CI->ReturnDevicePointer = true; 8022 continue; 8023 } 8024 } 8025 8026 // We didn't find any match in our map information - generate a zero 8027 // size array section - if the pointer is a struct member we defer this 8028 // action until the whole struct has been processed. 8029 if (isa<MemberExpr>(IE)) { 8030 // Insert the pointer into Info to be processed by 8031 // generateInfoForComponentList. Because it is a member pointer 8032 // without a pointee, no entry will be generated for it, therefore 8033 // we need to generate one after the whole struct has been processed. 8034 // Nonetheless, generateInfoForComponentList must be called to take 8035 // the pointer into account for the calculation of the range of the 8036 // partial struct. 8037 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8038 /*ReturnDevicePointer=*/false, C->isImplicit()); 8039 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8040 } else { 8041 llvm::Value *Ptr = 8042 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8043 BasePointers.emplace_back(Ptr, VD); 8044 Pointers.push_back(Ptr); 8045 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8046 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8047 } 8048 } 8049 } 8050 8051 // Look at the use_device_addr clause information and mark the existing map 8052 // entries as such. If there is no map information for an entry in the 8053 // use_device_addr list, we create one with map type 'alloc' and zero size 8054 // section. It is the user fault if that was not mapped before. If there is 8055 // no map information and the pointer is a struct member, then we defer the 8056 // emission of that entry until the whole struct has been processed. 8057 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8058 for (const auto *C : 8059 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8060 for (const auto L : C->component_lists()) { 8061 assert(!L.second.empty() && "Not expecting empty list of components!"); 8062 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8063 if (!Processed.insert(VD).second) 8064 continue; 8065 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8066 const Expr *IE = L.second.back().getAssociatedExpression(); 8067 // If the first component is a member expression, we have to look into 8068 // 'this', which maps to null in the map of map information. Otherwise 8069 // look directly for the information. 8070 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8071 8072 // We potentially have map information for this declaration already. 8073 // Look for the first set of components that refer to it. 8074 if (It != Info.end()) { 8075 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8076 return MI.Components.back().getAssociatedDeclaration() == VD; 8077 }); 8078 // If we found a map entry, signal that the pointer has to be returned 8079 // and move on to the next declaration. 8080 if (CI != It->second.end()) { 8081 CI->ReturnDevicePointer = true; 8082 continue; 8083 } 8084 } 8085 8086 // We didn't find any match in our map information - generate a zero 8087 // size array section - if the pointer is a struct member we defer this 8088 // action until the whole struct has been processed. 8089 if (isa<MemberExpr>(IE)) { 8090 // Insert the pointer into Info to be processed by 8091 // generateInfoForComponentList. Because it is a member pointer 8092 // without a pointee, no entry will be generated for it, therefore 8093 // we need to generate one after the whole struct has been processed. 8094 // Nonetheless, generateInfoForComponentList must be called to take 8095 // the pointer into account for the calculation of the range of the 8096 // partial struct. 8097 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8098 /*ReturnDevicePointer=*/false, C->isImplicit(), 8099 /*ForDeviceAddr=*/true); 8100 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8101 } else { 8102 llvm::Value *Ptr; 8103 if (IE->isGLValue()) 8104 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8105 else 8106 Ptr = CGF.EmitScalarExpr(IE); 8107 BasePointers.emplace_back(Ptr, VD); 8108 Pointers.push_back(Ptr); 8109 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8110 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8111 } 8112 } 8113 } 8114 8115 for (const auto &M : Info) { 8116 // We need to know when we generate information for the first component 8117 // associated with a capture, because the mapping flags depend on it. 8118 bool IsFirstComponentList = true; 8119 8120 // Temporary versions of arrays 8121 MapBaseValuesArrayTy CurBasePointers; 8122 MapValuesArrayTy CurPointers; 8123 MapValuesArrayTy CurSizes; 8124 MapFlagsArrayTy CurTypes; 8125 StructRangeInfoTy PartialStruct; 8126 8127 for (const MapInfo &L : M.second) { 8128 assert(!L.Components.empty() && 8129 "Not expecting declaration with no component lists."); 8130 8131 // Remember the current base pointer index. 8132 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8133 generateInfoForComponentList( 8134 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8135 CurPointers, CurSizes, CurTypes, PartialStruct, 8136 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); 8137 8138 // If this entry relates with a device pointer, set the relevant 8139 // declaration and add the 'return pointer' flag. 8140 if (L.ReturnDevicePointer) { 8141 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8142 "Unexpected number of mapped base pointers."); 8143 8144 const ValueDecl *RelevantVD = 8145 L.Components.back().getAssociatedDeclaration(); 8146 assert(RelevantVD && 8147 "No relevant declaration related with device pointer??"); 8148 8149 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8150 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8151 } 8152 IsFirstComponentList = false; 8153 } 8154 8155 // Append any pending zero-length pointers which are struct members and 8156 // used with use_device_ptr or use_device_addr. 8157 auto CI = DeferredInfo.find(M.first); 8158 if (CI != DeferredInfo.end()) { 8159 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8160 llvm::Value *BasePtr; 8161 llvm::Value *Ptr; 8162 if (L.ForDeviceAddr) { 8163 if (L.IE->isGLValue()) 8164 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8165 else 8166 Ptr = this->CGF.EmitScalarExpr(L.IE); 8167 BasePtr = Ptr; 8168 // Entry is RETURN_PARAM. Also, set the placeholder value 8169 // MEMBER_OF=FFFF so that the entry is later updated with the 8170 // correct value of MEMBER_OF. 8171 CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8172 } else { 8173 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8174 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8175 L.IE->getExprLoc()); 8176 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8177 // value MEMBER_OF=FFFF so that the entry is later updated with the 8178 // correct value of MEMBER_OF. 8179 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8180 OMP_MAP_MEMBER_OF); 8181 } 8182 CurBasePointers.emplace_back(BasePtr, L.VD); 8183 CurPointers.push_back(Ptr); 8184 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8185 } 8186 } 8187 8188 // If there is an entry in PartialStruct it means we have a struct with 8189 // individual members mapped. Emit an extra combined entry. 8190 if (PartialStruct.Base.isValid()) 8191 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8192 PartialStruct); 8193 8194 // We need to append the results of this capture to what we already have. 8195 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8196 Pointers.append(CurPointers.begin(), CurPointers.end()); 8197 Sizes.append(CurSizes.begin(), CurSizes.end()); 8198 Types.append(CurTypes.begin(), CurTypes.end()); 8199 } 8200 } 8201 8202 /// Generate all the base pointers, section pointers, sizes and map types for 8203 /// the extracted map clauses of user-defined mapper. 8204 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8205 MapValuesArrayTy &Pointers, 8206 MapValuesArrayTy &Sizes, 8207 MapFlagsArrayTy &Types) const { 8208 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8209 "Expect a declare mapper directive"); 8210 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8211 // We have to process the component lists that relate with the same 8212 // declaration in a single chunk so that we can generate the map flags 8213 // correctly. Therefore, we organize all lists in a map. 8214 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8215 8216 // Helper function to fill the information map for the different supported 8217 // clauses. 8218 auto &&InfoGen = [&Info]( 8219 const ValueDecl *D, 8220 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8221 OpenMPMapClauseKind MapType, 8222 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8223 bool ReturnDevicePointer, bool IsImplicit) { 8224 const ValueDecl *VD = 8225 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8226 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8227 IsImplicit); 8228 }; 8229 8230 for (const auto *C : CurMapperDir->clauselists()) { 8231 const auto *MC = cast<OMPMapClause>(C); 8232 for (const auto L : MC->component_lists()) { 8233 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8234 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8235 } 8236 } 8237 8238 for (const auto &M : Info) { 8239 // We need to know when we generate information for the first component 8240 // associated with a capture, because the mapping flags depend on it. 8241 bool IsFirstComponentList = true; 8242 8243 // Temporary versions of arrays 8244 MapBaseValuesArrayTy CurBasePointers; 8245 MapValuesArrayTy CurPointers; 8246 MapValuesArrayTy CurSizes; 8247 MapFlagsArrayTy CurTypes; 8248 StructRangeInfoTy PartialStruct; 8249 8250 for (const MapInfo &L : M.second) { 8251 assert(!L.Components.empty() && 8252 "Not expecting declaration with no component lists."); 8253 generateInfoForComponentList( 8254 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8255 CurPointers, CurSizes, CurTypes, PartialStruct, 8256 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); 8257 IsFirstComponentList = false; 8258 } 8259 8260 // If there is an entry in PartialStruct it means we have a struct with 8261 // individual members mapped. Emit an extra combined entry. 8262 if (PartialStruct.Base.isValid()) 8263 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8264 PartialStruct); 8265 8266 // We need to append the results of this capture to what we already have. 8267 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8268 Pointers.append(CurPointers.begin(), CurPointers.end()); 8269 Sizes.append(CurSizes.begin(), CurSizes.end()); 8270 Types.append(CurTypes.begin(), CurTypes.end()); 8271 } 8272 } 8273 8274 /// Emit capture info for lambdas for variables captured by reference. 8275 void generateInfoForLambdaCaptures( 8276 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8277 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8278 MapFlagsArrayTy &Types, 8279 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8280 const auto *RD = VD->getType() 8281 .getCanonicalType() 8282 .getNonReferenceType() 8283 ->getAsCXXRecordDecl(); 8284 if (!RD || !RD->isLambda()) 8285 return; 8286 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8287 LValue VDLVal = CGF.MakeAddrLValue( 8288 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8289 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8290 FieldDecl *ThisCapture = nullptr; 8291 RD->getCaptureFields(Captures, ThisCapture); 8292 if (ThisCapture) { 8293 LValue ThisLVal = 8294 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8295 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8296 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8297 VDLVal.getPointer(CGF)); 8298 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8299 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8300 Sizes.push_back( 8301 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8302 CGF.Int64Ty, /*isSigned=*/true)); 8303 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8304 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8305 } 8306 for (const LambdaCapture &LC : RD->captures()) { 8307 if (!LC.capturesVariable()) 8308 continue; 8309 const VarDecl *VD = LC.getCapturedVar(); 8310 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8311 continue; 8312 auto It = Captures.find(VD); 8313 assert(It != Captures.end() && "Found lambda capture without field."); 8314 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8315 if (LC.getCaptureKind() == LCK_ByRef) { 8316 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8317 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8318 VDLVal.getPointer(CGF)); 8319 BasePointers.push_back(VarLVal.getPointer(CGF)); 8320 Pointers.push_back(VarLValVal.getPointer(CGF)); 8321 Sizes.push_back(CGF.Builder.CreateIntCast( 8322 CGF.getTypeSize( 8323 VD->getType().getCanonicalType().getNonReferenceType()), 8324 CGF.Int64Ty, /*isSigned=*/true)); 8325 } else { 8326 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8327 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8328 VDLVal.getPointer(CGF)); 8329 BasePointers.push_back(VarLVal.getPointer(CGF)); 8330 Pointers.push_back(VarRVal.getScalarVal()); 8331 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8332 } 8333 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8334 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8335 } 8336 } 8337 8338 /// Set correct indices for lambdas captures. 8339 void adjustMemberOfForLambdaCaptures( 8340 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8341 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8342 MapFlagsArrayTy &Types) const { 8343 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8344 // Set correct member_of idx for all implicit lambda captures. 8345 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8346 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8347 continue; 8348 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8349 assert(BasePtr && "Unable to find base lambda address."); 8350 int TgtIdx = -1; 8351 for (unsigned J = I; J > 0; --J) { 8352 unsigned Idx = J - 1; 8353 if (Pointers[Idx] != BasePtr) 8354 continue; 8355 TgtIdx = Idx; 8356 break; 8357 } 8358 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8359 // All other current entries will be MEMBER_OF the combined entry 8360 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8361 // 0xFFFF in the MEMBER_OF field). 8362 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8363 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8364 } 8365 } 8366 8367 /// Generate the base pointers, section pointers, sizes and map types 8368 /// associated to a given capture. 8369 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8370 llvm::Value *Arg, 8371 MapBaseValuesArrayTy &BasePointers, 8372 MapValuesArrayTy &Pointers, 8373 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8374 StructRangeInfoTy &PartialStruct) const { 8375 assert(!Cap->capturesVariableArrayType() && 8376 "Not expecting to generate map info for a variable array type!"); 8377 8378 // We need to know when we generating information for the first component 8379 const ValueDecl *VD = Cap->capturesThis() 8380 ? nullptr 8381 : Cap->getCapturedVar()->getCanonicalDecl(); 8382 8383 // If this declaration appears in a is_device_ptr clause we just have to 8384 // pass the pointer by value. If it is a reference to a declaration, we just 8385 // pass its value. 8386 if (DevPointersMap.count(VD)) { 8387 BasePointers.emplace_back(Arg, VD); 8388 Pointers.push_back(Arg); 8389 Sizes.push_back( 8390 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8391 CGF.Int64Ty, /*isSigned=*/true)); 8392 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8393 return; 8394 } 8395 8396 using MapData = 8397 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8398 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8399 SmallVector<MapData, 4> DeclComponentLists; 8400 assert(CurDir.is<const OMPExecutableDirective *>() && 8401 "Expect a executable directive"); 8402 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8403 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8404 for (const auto L : C->decl_component_lists(VD)) { 8405 assert(L.first == VD && 8406 "We got information for the wrong declaration??"); 8407 assert(!L.second.empty() && 8408 "Not expecting declaration with no component lists."); 8409 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8410 C->getMapTypeModifiers(), 8411 C->isImplicit()); 8412 } 8413 } 8414 8415 // Find overlapping elements (including the offset from the base element). 8416 llvm::SmallDenseMap< 8417 const MapData *, 8418 llvm::SmallVector< 8419 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8420 4> 8421 OverlappedData; 8422 size_t Count = 0; 8423 for (const MapData &L : DeclComponentLists) { 8424 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8425 OpenMPMapClauseKind MapType; 8426 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8427 bool IsImplicit; 8428 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8429 ++Count; 8430 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8431 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8432 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8433 auto CI = Components.rbegin(); 8434 auto CE = Components.rend(); 8435 auto SI = Components1.rbegin(); 8436 auto SE = Components1.rend(); 8437 for (; CI != CE && SI != SE; ++CI, ++SI) { 8438 if (CI->getAssociatedExpression()->getStmtClass() != 8439 SI->getAssociatedExpression()->getStmtClass()) 8440 break; 8441 // Are we dealing with different variables/fields? 8442 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8443 break; 8444 } 8445 // Found overlapping if, at least for one component, reached the head of 8446 // the components list. 8447 if (CI == CE || SI == SE) { 8448 assert((CI != CE || SI != SE) && 8449 "Unexpected full match of the mapping components."); 8450 const MapData &BaseData = CI == CE ? L : L1; 8451 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8452 SI == SE ? Components : Components1; 8453 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8454 OverlappedElements.getSecond().push_back(SubData); 8455 } 8456 } 8457 } 8458 // Sort the overlapped elements for each item. 8459 llvm::SmallVector<const FieldDecl *, 4> Layout; 8460 if (!OverlappedData.empty()) { 8461 if (const auto *CRD = 8462 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8463 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8464 else { 8465 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8466 Layout.append(RD->field_begin(), RD->field_end()); 8467 } 8468 } 8469 for (auto &Pair : OverlappedData) { 8470 llvm::sort( 8471 Pair.getSecond(), 8472 [&Layout]( 8473 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8474 OMPClauseMappableExprCommon::MappableExprComponentListRef 8475 Second) { 8476 auto CI = First.rbegin(); 8477 auto CE = First.rend(); 8478 auto SI = Second.rbegin(); 8479 auto SE = Second.rend(); 8480 for (; CI != CE && SI != SE; ++CI, ++SI) { 8481 if (CI->getAssociatedExpression()->getStmtClass() != 8482 SI->getAssociatedExpression()->getStmtClass()) 8483 break; 8484 // Are we dealing with different variables/fields? 8485 if (CI->getAssociatedDeclaration() != 8486 SI->getAssociatedDeclaration()) 8487 break; 8488 } 8489 8490 // Lists contain the same elements. 8491 if (CI == CE && SI == SE) 8492 return false; 8493 8494 // List with less elements is less than list with more elements. 8495 if (CI == CE || SI == SE) 8496 return CI == CE; 8497 8498 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8499 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8500 if (FD1->getParent() == FD2->getParent()) 8501 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8502 const auto It = 8503 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8504 return FD == FD1 || FD == FD2; 8505 }); 8506 return *It == FD1; 8507 }); 8508 } 8509 8510 // Associated with a capture, because the mapping flags depend on it. 8511 // Go through all of the elements with the overlapped elements. 8512 for (const auto &Pair : OverlappedData) { 8513 const MapData &L = *Pair.getFirst(); 8514 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8515 OpenMPMapClauseKind MapType; 8516 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8517 bool IsImplicit; 8518 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8519 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8520 OverlappedComponents = Pair.getSecond(); 8521 bool IsFirstComponentList = true; 8522 generateInfoForComponentList( 8523 MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, 8524 Types, PartialStruct, IsFirstComponentList, IsImplicit, 8525 /*ForDeviceAddr=*/false, OverlappedComponents); 8526 } 8527 // Go through other elements without overlapped elements. 8528 bool IsFirstComponentList = OverlappedData.empty(); 8529 for (const MapData &L : DeclComponentLists) { 8530 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8531 OpenMPMapClauseKind MapType; 8532 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8533 bool IsImplicit; 8534 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8535 auto It = OverlappedData.find(&L); 8536 if (It == OverlappedData.end()) 8537 generateInfoForComponentList(MapType, MapModifiers, Components, 8538 BasePointers, Pointers, Sizes, Types, 8539 PartialStruct, IsFirstComponentList, 8540 IsImplicit); 8541 IsFirstComponentList = false; 8542 } 8543 } 8544 8545 /// Generate the base pointers, section pointers, sizes and map types 8546 /// associated with the declare target link variables. 8547 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8548 MapValuesArrayTy &Pointers, 8549 MapValuesArrayTy &Sizes, 8550 MapFlagsArrayTy &Types) const { 8551 assert(CurDir.is<const OMPExecutableDirective *>() && 8552 "Expect a executable directive"); 8553 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8554 // Map other list items in the map clause which are not captured variables 8555 // but "declare target link" global variables. 8556 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8557 for (const auto L : C->component_lists()) { 8558 if (!L.first) 8559 continue; 8560 const auto *VD = dyn_cast<VarDecl>(L.first); 8561 if (!VD) 8562 continue; 8563 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8564 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8565 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8566 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8567 continue; 8568 StructRangeInfoTy PartialStruct; 8569 generateInfoForComponentList( 8570 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8571 Pointers, Sizes, Types, PartialStruct, 8572 /*IsFirstComponentList=*/true, C->isImplicit()); 8573 assert(!PartialStruct.Base.isValid() && 8574 "No partial structs for declare target link expected."); 8575 } 8576 } 8577 } 8578 8579 /// Generate the default map information for a given capture \a CI, 8580 /// record field declaration \a RI and captured value \a CV. 8581 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8582 const FieldDecl &RI, llvm::Value *CV, 8583 MapBaseValuesArrayTy &CurBasePointers, 8584 MapValuesArrayTy &CurPointers, 8585 MapValuesArrayTy &CurSizes, 8586 MapFlagsArrayTy &CurMapTypes) const { 8587 bool IsImplicit = true; 8588 // Do the default mapping. 8589 if (CI.capturesThis()) { 8590 CurBasePointers.push_back(CV); 8591 CurPointers.push_back(CV); 8592 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8593 CurSizes.push_back( 8594 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8595 CGF.Int64Ty, /*isSigned=*/true)); 8596 // Default map type. 8597 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8598 } else if (CI.capturesVariableByCopy()) { 8599 CurBasePointers.push_back(CV); 8600 CurPointers.push_back(CV); 8601 if (!RI.getType()->isAnyPointerType()) { 8602 // We have to signal to the runtime captures passed by value that are 8603 // not pointers. 8604 CurMapTypes.push_back(OMP_MAP_LITERAL); 8605 CurSizes.push_back(CGF.Builder.CreateIntCast( 8606 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8607 } else { 8608 // Pointers are implicitly mapped with a zero size and no flags 8609 // (other than first map that is added for all implicit maps). 8610 CurMapTypes.push_back(OMP_MAP_NONE); 8611 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8612 } 8613 const VarDecl *VD = CI.getCapturedVar(); 8614 auto I = FirstPrivateDecls.find(VD); 8615 if (I != FirstPrivateDecls.end()) 8616 IsImplicit = I->getSecond(); 8617 } else { 8618 assert(CI.capturesVariable() && "Expected captured reference."); 8619 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8620 QualType ElementType = PtrTy->getPointeeType(); 8621 CurSizes.push_back(CGF.Builder.CreateIntCast( 8622 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8623 // The default map type for a scalar/complex type is 'to' because by 8624 // default the value doesn't have to be retrieved. For an aggregate 8625 // type, the default is 'tofrom'. 8626 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8627 const VarDecl *VD = CI.getCapturedVar(); 8628 auto I = FirstPrivateDecls.find(VD); 8629 if (I != FirstPrivateDecls.end() && 8630 VD->getType().isConstant(CGF.getContext())) { 8631 llvm::Constant *Addr = 8632 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8633 // Copy the value of the original variable to the new global copy. 8634 CGF.Builder.CreateMemCpy( 8635 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8636 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8637 CurSizes.back(), /*IsVolatile=*/false); 8638 // Use new global variable as the base pointers. 8639 CurBasePointers.push_back(Addr); 8640 CurPointers.push_back(Addr); 8641 } else { 8642 CurBasePointers.push_back(CV); 8643 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8644 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8645 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8646 AlignmentSource::Decl)); 8647 CurPointers.push_back(PtrAddr.getPointer()); 8648 } else { 8649 CurPointers.push_back(CV); 8650 } 8651 } 8652 if (I != FirstPrivateDecls.end()) 8653 IsImplicit = I->getSecond(); 8654 } 8655 // Every default map produces a single argument which is a target parameter. 8656 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8657 8658 // Add flag stating this is an implicit map. 8659 if (IsImplicit) 8660 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8661 } 8662 }; 8663 } // anonymous namespace 8664 8665 /// Emit the arrays used to pass the captures and map information to the 8666 /// offloading runtime library. If there is no map or capture information, 8667 /// return nullptr by reference. 8668 static void 8669 emitOffloadingArrays(CodeGenFunction &CGF, 8670 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8671 MappableExprsHandler::MapValuesArrayTy &Pointers, 8672 MappableExprsHandler::MapValuesArrayTy &Sizes, 8673 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8674 CGOpenMPRuntime::TargetDataInfo &Info) { 8675 CodeGenModule &CGM = CGF.CGM; 8676 ASTContext &Ctx = CGF.getContext(); 8677 8678 // Reset the array information. 8679 Info.clearArrayInfo(); 8680 Info.NumberOfPtrs = BasePointers.size(); 8681 8682 if (Info.NumberOfPtrs) { 8683 // Detect if we have any capture size requiring runtime evaluation of the 8684 // size so that a constant array could be eventually used. 8685 bool hasRuntimeEvaluationCaptureSize = false; 8686 for (llvm::Value *S : Sizes) 8687 if (!isa<llvm::Constant>(S)) { 8688 hasRuntimeEvaluationCaptureSize = true; 8689 break; 8690 } 8691 8692 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8693 QualType PointerArrayType = Ctx.getConstantArrayType( 8694 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8695 /*IndexTypeQuals=*/0); 8696 8697 Info.BasePointersArray = 8698 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8699 Info.PointersArray = 8700 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8701 8702 // If we don't have any VLA types or other types that require runtime 8703 // evaluation, we can use a constant array for the map sizes, otherwise we 8704 // need to fill up the arrays as we do for the pointers. 8705 QualType Int64Ty = 8706 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8707 if (hasRuntimeEvaluationCaptureSize) { 8708 QualType SizeArrayType = Ctx.getConstantArrayType( 8709 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8710 /*IndexTypeQuals=*/0); 8711 Info.SizesArray = 8712 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8713 } else { 8714 // We expect all the sizes to be constant, so we collect them to create 8715 // a constant array. 8716 SmallVector<llvm::Constant *, 16> ConstSizes; 8717 for (llvm::Value *S : Sizes) 8718 ConstSizes.push_back(cast<llvm::Constant>(S)); 8719 8720 auto *SizesArrayInit = llvm::ConstantArray::get( 8721 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8722 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8723 auto *SizesArrayGbl = new llvm::GlobalVariable( 8724 CGM.getModule(), SizesArrayInit->getType(), 8725 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8726 SizesArrayInit, Name); 8727 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8728 Info.SizesArray = SizesArrayGbl; 8729 } 8730 8731 // The map types are always constant so we don't need to generate code to 8732 // fill arrays. Instead, we create an array constant. 8733 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8734 llvm::copy(MapTypes, Mapping.begin()); 8735 llvm::Constant *MapTypesArrayInit = 8736 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8737 std::string MaptypesName = 8738 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8739 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8740 CGM.getModule(), MapTypesArrayInit->getType(), 8741 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8742 MapTypesArrayInit, MaptypesName); 8743 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8744 Info.MapTypesArray = MapTypesArrayGbl; 8745 8746 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8747 llvm::Value *BPVal = *BasePointers[I]; 8748 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8749 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8750 Info.BasePointersArray, 0, I); 8751 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8752 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8753 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8754 CGF.Builder.CreateStore(BPVal, BPAddr); 8755 8756 if (Info.requiresDevicePointerInfo()) 8757 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8758 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8759 8760 llvm::Value *PVal = Pointers[I]; 8761 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8762 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8763 Info.PointersArray, 0, I); 8764 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8765 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8766 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8767 CGF.Builder.CreateStore(PVal, PAddr); 8768 8769 if (hasRuntimeEvaluationCaptureSize) { 8770 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8771 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8772 Info.SizesArray, 8773 /*Idx0=*/0, 8774 /*Idx1=*/I); 8775 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8776 CGF.Builder.CreateStore( 8777 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8778 SAddr); 8779 } 8780 } 8781 } 8782 } 8783 8784 /// Emit the arguments to be passed to the runtime library based on the 8785 /// arrays of pointers, sizes and map types. 8786 static void emitOffloadingArraysArgument( 8787 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8788 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8789 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8790 CodeGenModule &CGM = CGF.CGM; 8791 if (Info.NumberOfPtrs) { 8792 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8793 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8794 Info.BasePointersArray, 8795 /*Idx0=*/0, /*Idx1=*/0); 8796 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8797 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8798 Info.PointersArray, 8799 /*Idx0=*/0, 8800 /*Idx1=*/0); 8801 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8802 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8803 /*Idx0=*/0, /*Idx1=*/0); 8804 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8805 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8806 Info.MapTypesArray, 8807 /*Idx0=*/0, 8808 /*Idx1=*/0); 8809 } else { 8810 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8811 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8812 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8813 MapTypesArrayArg = 8814 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8815 } 8816 } 8817 8818 /// Check for inner distribute directive. 8819 static const OMPExecutableDirective * 8820 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8821 const auto *CS = D.getInnermostCapturedStmt(); 8822 const auto *Body = 8823 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8824 const Stmt *ChildStmt = 8825 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8826 8827 if (const auto *NestedDir = 8828 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8829 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8830 switch (D.getDirectiveKind()) { 8831 case OMPD_target: 8832 if (isOpenMPDistributeDirective(DKind)) 8833 return NestedDir; 8834 if (DKind == OMPD_teams) { 8835 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8836 /*IgnoreCaptured=*/true); 8837 if (!Body) 8838 return nullptr; 8839 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8840 if (const auto *NND = 8841 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8842 DKind = NND->getDirectiveKind(); 8843 if (isOpenMPDistributeDirective(DKind)) 8844 return NND; 8845 } 8846 } 8847 return nullptr; 8848 case OMPD_target_teams: 8849 if (isOpenMPDistributeDirective(DKind)) 8850 return NestedDir; 8851 return nullptr; 8852 case OMPD_target_parallel: 8853 case OMPD_target_simd: 8854 case OMPD_target_parallel_for: 8855 case OMPD_target_parallel_for_simd: 8856 return nullptr; 8857 case OMPD_target_teams_distribute: 8858 case OMPD_target_teams_distribute_simd: 8859 case OMPD_target_teams_distribute_parallel_for: 8860 case OMPD_target_teams_distribute_parallel_for_simd: 8861 case OMPD_parallel: 8862 case OMPD_for: 8863 case OMPD_parallel_for: 8864 case OMPD_parallel_master: 8865 case OMPD_parallel_sections: 8866 case OMPD_for_simd: 8867 case OMPD_parallel_for_simd: 8868 case OMPD_cancel: 8869 case OMPD_cancellation_point: 8870 case OMPD_ordered: 8871 case OMPD_threadprivate: 8872 case OMPD_allocate: 8873 case OMPD_task: 8874 case OMPD_simd: 8875 case OMPD_sections: 8876 case OMPD_section: 8877 case OMPD_single: 8878 case OMPD_master: 8879 case OMPD_critical: 8880 case OMPD_taskyield: 8881 case OMPD_barrier: 8882 case OMPD_taskwait: 8883 case OMPD_taskgroup: 8884 case OMPD_atomic: 8885 case OMPD_flush: 8886 case OMPD_depobj: 8887 case OMPD_scan: 8888 case OMPD_teams: 8889 case OMPD_target_data: 8890 case OMPD_target_exit_data: 8891 case OMPD_target_enter_data: 8892 case OMPD_distribute: 8893 case OMPD_distribute_simd: 8894 case OMPD_distribute_parallel_for: 8895 case OMPD_distribute_parallel_for_simd: 8896 case OMPD_teams_distribute: 8897 case OMPD_teams_distribute_simd: 8898 case OMPD_teams_distribute_parallel_for: 8899 case OMPD_teams_distribute_parallel_for_simd: 8900 case OMPD_target_update: 8901 case OMPD_declare_simd: 8902 case OMPD_declare_variant: 8903 case OMPD_begin_declare_variant: 8904 case OMPD_end_declare_variant: 8905 case OMPD_declare_target: 8906 case OMPD_end_declare_target: 8907 case OMPD_declare_reduction: 8908 case OMPD_declare_mapper: 8909 case OMPD_taskloop: 8910 case OMPD_taskloop_simd: 8911 case OMPD_master_taskloop: 8912 case OMPD_master_taskloop_simd: 8913 case OMPD_parallel_master_taskloop: 8914 case OMPD_parallel_master_taskloop_simd: 8915 case OMPD_requires: 8916 case OMPD_unknown: 8917 llvm_unreachable("Unexpected directive."); 8918 } 8919 } 8920 8921 return nullptr; 8922 } 8923 8924 /// Emit the user-defined mapper function. The code generation follows the 8925 /// pattern in the example below. 8926 /// \code 8927 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8928 /// void *base, void *begin, 8929 /// int64_t size, int64_t type) { 8930 /// // Allocate space for an array section first. 8931 /// if (size > 1 && !maptype.IsDelete) 8932 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8933 /// size*sizeof(Ty), clearToFrom(type)); 8934 /// // Map members. 8935 /// for (unsigned i = 0; i < size; i++) { 8936 /// // For each component specified by this mapper: 8937 /// for (auto c : all_components) { 8938 /// if (c.hasMapper()) 8939 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8940 /// c.arg_type); 8941 /// else 8942 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8943 /// c.arg_begin, c.arg_size, c.arg_type); 8944 /// } 8945 /// } 8946 /// // Delete the array section. 8947 /// if (size > 1 && maptype.IsDelete) 8948 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8949 /// size*sizeof(Ty), clearToFrom(type)); 8950 /// } 8951 /// \endcode 8952 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8953 CodeGenFunction *CGF) { 8954 if (UDMMap.count(D) > 0) 8955 return; 8956 ASTContext &C = CGM.getContext(); 8957 QualType Ty = D->getType(); 8958 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8959 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8960 auto *MapperVarDecl = 8961 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8962 SourceLocation Loc = D->getLocation(); 8963 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8964 8965 // Prepare mapper function arguments and attributes. 8966 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8967 C.VoidPtrTy, ImplicitParamDecl::Other); 8968 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8969 ImplicitParamDecl::Other); 8970 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8971 C.VoidPtrTy, ImplicitParamDecl::Other); 8972 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8973 ImplicitParamDecl::Other); 8974 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8975 ImplicitParamDecl::Other); 8976 FunctionArgList Args; 8977 Args.push_back(&HandleArg); 8978 Args.push_back(&BaseArg); 8979 Args.push_back(&BeginArg); 8980 Args.push_back(&SizeArg); 8981 Args.push_back(&TypeArg); 8982 const CGFunctionInfo &FnInfo = 8983 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8984 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8985 SmallString<64> TyStr; 8986 llvm::raw_svector_ostream Out(TyStr); 8987 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8988 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8989 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8990 Name, &CGM.getModule()); 8991 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8992 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8993 // Start the mapper function code generation. 8994 CodeGenFunction MapperCGF(CGM); 8995 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8996 // Compute the starting and end addreses of array elements. 8997 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8998 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8999 C.getPointerType(Int64Ty), Loc); 9000 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9001 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9002 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9003 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9004 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9005 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9006 C.getPointerType(Int64Ty), Loc); 9007 // Prepare common arguments for array initiation and deletion. 9008 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9009 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9010 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9011 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9012 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9013 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9014 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9015 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9016 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9017 9018 // Emit array initiation if this is an array section and \p MapType indicates 9019 // that memory allocation is required. 9020 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9021 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9022 ElementSize, HeadBB, /*IsInit=*/true); 9023 9024 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9025 9026 // Emit the loop header block. 9027 MapperCGF.EmitBlock(HeadBB); 9028 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9029 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9030 // Evaluate whether the initial condition is satisfied. 9031 llvm::Value *IsEmpty = 9032 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9033 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9034 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9035 9036 // Emit the loop body block. 9037 MapperCGF.EmitBlock(BodyBB); 9038 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9039 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9040 PtrPHI->addIncoming(PtrBegin, EntryBB); 9041 Address PtrCurrent = 9042 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9043 .getAlignment() 9044 .alignmentOfArrayElement(ElementSize)); 9045 // Privatize the declared variable of mapper to be the current array element. 9046 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9047 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9048 return MapperCGF 9049 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9050 .getAddress(MapperCGF); 9051 }); 9052 (void)Scope.Privatize(); 9053 9054 // Get map clause information. Fill up the arrays with all mapped variables. 9055 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9056 MappableExprsHandler::MapValuesArrayTy Pointers; 9057 MappableExprsHandler::MapValuesArrayTy Sizes; 9058 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9059 MappableExprsHandler MEHandler(*D, MapperCGF); 9060 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9061 9062 // Call the runtime API __tgt_mapper_num_components to get the number of 9063 // pre-existing components. 9064 llvm::Value *OffloadingArgs[] = {Handle}; 9065 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9066 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9067 CGM.getModule(), OMPRTL___tgt_mapper_num_components), 9068 OffloadingArgs); 9069 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9070 PreviousSize, 9071 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9072 9073 // Fill up the runtime mapper handle for all components. 9074 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9075 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9076 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9077 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9078 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9079 llvm::Value *CurSizeArg = Sizes[I]; 9080 9081 // Extract the MEMBER_OF field from the map type. 9082 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9083 MapperCGF.EmitBlock(MemberBB); 9084 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9085 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9086 OriMapType, 9087 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9088 llvm::BasicBlock *MemberCombineBB = 9089 MapperCGF.createBasicBlock("omp.member.combine"); 9090 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9091 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9092 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9093 // Add the number of pre-existing components to the MEMBER_OF field if it 9094 // is valid. 9095 MapperCGF.EmitBlock(MemberCombineBB); 9096 llvm::Value *CombinedMember = 9097 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9098 // Do nothing if it is not a member of previous components. 9099 MapperCGF.EmitBlock(TypeBB); 9100 llvm::PHINode *MemberMapType = 9101 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9102 MemberMapType->addIncoming(OriMapType, MemberBB); 9103 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9104 9105 // Combine the map type inherited from user-defined mapper with that 9106 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9107 // bits of the \a MapType, which is the input argument of the mapper 9108 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9109 // bits of MemberMapType. 9110 // [OpenMP 5.0], 1.2.6. map-type decay. 9111 // | alloc | to | from | tofrom | release | delete 9112 // ---------------------------------------------------------- 9113 // alloc | alloc | alloc | alloc | alloc | release | delete 9114 // to | alloc | to | alloc | to | release | delete 9115 // from | alloc | alloc | from | from | release | delete 9116 // tofrom | alloc | to | from | tofrom | release | delete 9117 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9118 MapType, 9119 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9120 MappableExprsHandler::OMP_MAP_FROM)); 9121 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9122 llvm::BasicBlock *AllocElseBB = 9123 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9124 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9125 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9126 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9127 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9128 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9129 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9130 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9131 MapperCGF.EmitBlock(AllocBB); 9132 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9133 MemberMapType, 9134 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9135 MappableExprsHandler::OMP_MAP_FROM))); 9136 MapperCGF.Builder.CreateBr(EndBB); 9137 MapperCGF.EmitBlock(AllocElseBB); 9138 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9139 LeftToFrom, 9140 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9141 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9142 // In case of to, clear OMP_MAP_FROM. 9143 MapperCGF.EmitBlock(ToBB); 9144 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9145 MemberMapType, 9146 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9147 MapperCGF.Builder.CreateBr(EndBB); 9148 MapperCGF.EmitBlock(ToElseBB); 9149 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9150 LeftToFrom, 9151 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9152 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9153 // In case of from, clear OMP_MAP_TO. 9154 MapperCGF.EmitBlock(FromBB); 9155 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9156 MemberMapType, 9157 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9158 // In case of tofrom, do nothing. 9159 MapperCGF.EmitBlock(EndBB); 9160 llvm::PHINode *CurMapType = 9161 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9162 CurMapType->addIncoming(AllocMapType, AllocBB); 9163 CurMapType->addIncoming(ToMapType, ToBB); 9164 CurMapType->addIncoming(FromMapType, FromBB); 9165 CurMapType->addIncoming(MemberMapType, ToElseBB); 9166 9167 // TODO: call the corresponding mapper function if a user-defined mapper is 9168 // associated with this map clause. 9169 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9170 // data structure. 9171 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9172 CurSizeArg, CurMapType}; 9173 MapperCGF.EmitRuntimeCall( 9174 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9175 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9176 OffloadingArgs); 9177 } 9178 9179 // Update the pointer to point to the next element that needs to be mapped, 9180 // and check whether we have mapped all elements. 9181 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9182 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9183 PtrPHI->addIncoming(PtrNext, BodyBB); 9184 llvm::Value *IsDone = 9185 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9186 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9187 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9188 9189 MapperCGF.EmitBlock(ExitBB); 9190 // Emit array deletion if this is an array section and \p MapType indicates 9191 // that deletion is required. 9192 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9193 ElementSize, DoneBB, /*IsInit=*/false); 9194 9195 // Emit the function exit block. 9196 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9197 MapperCGF.FinishFunction(); 9198 UDMMap.try_emplace(D, Fn); 9199 if (CGF) { 9200 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9201 Decls.second.push_back(D); 9202 } 9203 } 9204 9205 /// Emit the array initialization or deletion portion for user-defined mapper 9206 /// code generation. First, it evaluates whether an array section is mapped and 9207 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9208 /// true, and \a MapType indicates to not delete this array, array 9209 /// initialization code is generated. If \a IsInit is false, and \a MapType 9210 /// indicates to not this array, array deletion code is generated. 9211 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9212 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9213 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9214 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9215 StringRef Prefix = IsInit ? ".init" : ".del"; 9216 9217 // Evaluate if this is an array section. 9218 llvm::BasicBlock *IsDeleteBB = 9219 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9220 llvm::BasicBlock *BodyBB = 9221 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9222 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9223 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9224 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9225 9226 // Evaluate if we are going to delete this section. 9227 MapperCGF.EmitBlock(IsDeleteBB); 9228 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9229 MapType, 9230 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9231 llvm::Value *DeleteCond; 9232 if (IsInit) { 9233 DeleteCond = MapperCGF.Builder.CreateIsNull( 9234 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9235 } else { 9236 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9237 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9238 } 9239 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9240 9241 MapperCGF.EmitBlock(BodyBB); 9242 // Get the array size by multiplying element size and element number (i.e., \p 9243 // Size). 9244 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9245 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9246 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9247 // memory allocation/deletion purpose only. 9248 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9249 MapType, 9250 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9251 MappableExprsHandler::OMP_MAP_FROM))); 9252 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9253 // data structure. 9254 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9255 MapperCGF.EmitRuntimeCall( 9256 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9257 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9258 OffloadingArgs); 9259 } 9260 9261 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9262 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9263 llvm::Value *DeviceID, 9264 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9265 const OMPLoopDirective &D)> 9266 SizeEmitter) { 9267 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9268 const OMPExecutableDirective *TD = &D; 9269 // Get nested teams distribute kind directive, if any. 9270 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9271 TD = getNestedDistributeDirective(CGM.getContext(), D); 9272 if (!TD) 9273 return; 9274 const auto *LD = cast<OMPLoopDirective>(TD); 9275 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9276 PrePostActionTy &) { 9277 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9278 llvm::Value *Args[] = {DeviceID, NumIterations}; 9279 CGF.EmitRuntimeCall( 9280 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9281 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9282 Args); 9283 } 9284 }; 9285 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9286 } 9287 9288 void CGOpenMPRuntime::emitTargetCall( 9289 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9290 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9291 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9292 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9293 const OMPLoopDirective &D)> 9294 SizeEmitter) { 9295 if (!CGF.HaveInsertPoint()) 9296 return; 9297 9298 assert(OutlinedFn && "Invalid outlined function!"); 9299 9300 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9301 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9302 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9303 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9304 PrePostActionTy &) { 9305 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9306 }; 9307 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9308 9309 CodeGenFunction::OMPTargetDataInfo InputInfo; 9310 llvm::Value *MapTypesArray = nullptr; 9311 // Fill up the pointer arrays and transfer execution to the device. 9312 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9313 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9314 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9315 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9316 // Reverse offloading is not supported, so just execute on the host. 9317 if (RequiresOuterTask) { 9318 CapturedVars.clear(); 9319 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9320 } 9321 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9322 return; 9323 } 9324 9325 // On top of the arrays that were filled up, the target offloading call 9326 // takes as arguments the device id as well as the host pointer. The host 9327 // pointer is used by the runtime library to identify the current target 9328 // region, so it only has to be unique and not necessarily point to 9329 // anything. It could be the pointer to the outlined function that 9330 // implements the target region, but we aren't using that so that the 9331 // compiler doesn't need to keep that, and could therefore inline the host 9332 // function if proven worthwhile during optimization. 9333 9334 // From this point on, we need to have an ID of the target region defined. 9335 assert(OutlinedFnID && "Invalid outlined function ID!"); 9336 9337 // Emit device ID if any. 9338 llvm::Value *DeviceID; 9339 if (Device.getPointer()) { 9340 assert((Device.getInt() == OMPC_DEVICE_unknown || 9341 Device.getInt() == OMPC_DEVICE_device_num) && 9342 "Expected device_num modifier."); 9343 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9344 DeviceID = 9345 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9346 } else { 9347 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9348 } 9349 9350 // Emit the number of elements in the offloading arrays. 9351 llvm::Value *PointerNum = 9352 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9353 9354 // Return value of the runtime offloading call. 9355 llvm::Value *Return; 9356 9357 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9358 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9359 9360 // Emit tripcount for the target loop-based directive. 9361 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9362 9363 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9364 // The target region is an outlined function launched by the runtime 9365 // via calls __tgt_target() or __tgt_target_teams(). 9366 // 9367 // __tgt_target() launches a target region with one team and one thread, 9368 // executing a serial region. This master thread may in turn launch 9369 // more threads within its team upon encountering a parallel region, 9370 // however, no additional teams can be launched on the device. 9371 // 9372 // __tgt_target_teams() launches a target region with one or more teams, 9373 // each with one or more threads. This call is required for target 9374 // constructs such as: 9375 // 'target teams' 9376 // 'target' / 'teams' 9377 // 'target teams distribute parallel for' 9378 // 'target parallel' 9379 // and so on. 9380 // 9381 // Note that on the host and CPU targets, the runtime implementation of 9382 // these calls simply call the outlined function without forking threads. 9383 // The outlined functions themselves have runtime calls to 9384 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9385 // the compiler in emitTeamsCall() and emitParallelCall(). 9386 // 9387 // In contrast, on the NVPTX target, the implementation of 9388 // __tgt_target_teams() launches a GPU kernel with the requested number 9389 // of teams and threads so no additional calls to the runtime are required. 9390 if (NumTeams) { 9391 // If we have NumTeams defined this means that we have an enclosed teams 9392 // region. Therefore we also expect to have NumThreads defined. These two 9393 // values should be defined in the presence of a teams directive, 9394 // regardless of having any clauses associated. If the user is using teams 9395 // but no clauses, these two values will be the default that should be 9396 // passed to the runtime library - a 32-bit integer with the value zero. 9397 assert(NumThreads && "Thread limit expression should be available along " 9398 "with number of teams."); 9399 llvm::Value *OffloadingArgs[] = {DeviceID, 9400 OutlinedFnID, 9401 PointerNum, 9402 InputInfo.BasePointersArray.getPointer(), 9403 InputInfo.PointersArray.getPointer(), 9404 InputInfo.SizesArray.getPointer(), 9405 MapTypesArray, 9406 NumTeams, 9407 NumThreads}; 9408 Return = CGF.EmitRuntimeCall( 9409 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9410 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait 9411 : OMPRTL___tgt_target_teams), 9412 OffloadingArgs); 9413 } else { 9414 llvm::Value *OffloadingArgs[] = {DeviceID, 9415 OutlinedFnID, 9416 PointerNum, 9417 InputInfo.BasePointersArray.getPointer(), 9418 InputInfo.PointersArray.getPointer(), 9419 InputInfo.SizesArray.getPointer(), 9420 MapTypesArray}; 9421 Return = CGF.EmitRuntimeCall( 9422 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 9423 CGM.getModule(), 9424 HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target), 9425 OffloadingArgs); 9426 } 9427 9428 // Check the error code and execute the host version if required. 9429 llvm::BasicBlock *OffloadFailedBlock = 9430 CGF.createBasicBlock("omp_offload.failed"); 9431 llvm::BasicBlock *OffloadContBlock = 9432 CGF.createBasicBlock("omp_offload.cont"); 9433 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9434 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9435 9436 CGF.EmitBlock(OffloadFailedBlock); 9437 if (RequiresOuterTask) { 9438 CapturedVars.clear(); 9439 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9440 } 9441 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9442 CGF.EmitBranch(OffloadContBlock); 9443 9444 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9445 }; 9446 9447 // Notify that the host version must be executed. 9448 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9449 RequiresOuterTask](CodeGenFunction &CGF, 9450 PrePostActionTy &) { 9451 if (RequiresOuterTask) { 9452 CapturedVars.clear(); 9453 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9454 } 9455 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9456 }; 9457 9458 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9459 &CapturedVars, RequiresOuterTask, 9460 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9461 // Fill up the arrays with all the captured variables. 9462 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9463 MappableExprsHandler::MapValuesArrayTy Pointers; 9464 MappableExprsHandler::MapValuesArrayTy Sizes; 9465 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9466 9467 // Get mappable expression information. 9468 MappableExprsHandler MEHandler(D, CGF); 9469 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9470 9471 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9472 auto CV = CapturedVars.begin(); 9473 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9474 CE = CS.capture_end(); 9475 CI != CE; ++CI, ++RI, ++CV) { 9476 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9477 MappableExprsHandler::MapValuesArrayTy CurPointers; 9478 MappableExprsHandler::MapValuesArrayTy CurSizes; 9479 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9480 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9481 9482 // VLA sizes are passed to the outlined region by copy and do not have map 9483 // information associated. 9484 if (CI->capturesVariableArrayType()) { 9485 CurBasePointers.push_back(*CV); 9486 CurPointers.push_back(*CV); 9487 CurSizes.push_back(CGF.Builder.CreateIntCast( 9488 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9489 // Copy to the device as an argument. No need to retrieve it. 9490 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9491 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9492 MappableExprsHandler::OMP_MAP_IMPLICIT); 9493 } else { 9494 // If we have any information in the map clause, we use it, otherwise we 9495 // just do a default mapping. 9496 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9497 CurSizes, CurMapTypes, PartialStruct); 9498 if (CurBasePointers.empty()) 9499 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9500 CurPointers, CurSizes, CurMapTypes); 9501 // Generate correct mapping for variables captured by reference in 9502 // lambdas. 9503 if (CI->capturesVariable()) 9504 MEHandler.generateInfoForLambdaCaptures( 9505 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9506 CurMapTypes, LambdaPointers); 9507 } 9508 // We expect to have at least an element of information for this capture. 9509 assert(!CurBasePointers.empty() && 9510 "Non-existing map pointer for capture!"); 9511 assert(CurBasePointers.size() == CurPointers.size() && 9512 CurBasePointers.size() == CurSizes.size() && 9513 CurBasePointers.size() == CurMapTypes.size() && 9514 "Inconsistent map information sizes!"); 9515 9516 // If there is an entry in PartialStruct it means we have a struct with 9517 // individual members mapped. Emit an extra combined entry. 9518 if (PartialStruct.Base.isValid()) 9519 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9520 CurMapTypes, PartialStruct); 9521 9522 // We need to append the results of this capture to what we already have. 9523 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9524 Pointers.append(CurPointers.begin(), CurPointers.end()); 9525 Sizes.append(CurSizes.begin(), CurSizes.end()); 9526 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9527 } 9528 // Adjust MEMBER_OF flags for the lambdas captures. 9529 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9530 Pointers, MapTypes); 9531 // Map other list items in the map clause which are not captured variables 9532 // but "declare target link" global variables. 9533 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9534 MapTypes); 9535 9536 TargetDataInfo Info; 9537 // Fill up the arrays and create the arguments. 9538 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9539 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9540 Info.PointersArray, Info.SizesArray, 9541 Info.MapTypesArray, Info); 9542 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9543 InputInfo.BasePointersArray = 9544 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9545 InputInfo.PointersArray = 9546 Address(Info.PointersArray, CGM.getPointerAlign()); 9547 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9548 MapTypesArray = Info.MapTypesArray; 9549 if (RequiresOuterTask) 9550 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9551 else 9552 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9553 }; 9554 9555 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9556 CodeGenFunction &CGF, PrePostActionTy &) { 9557 if (RequiresOuterTask) { 9558 CodeGenFunction::OMPTargetDataInfo InputInfo; 9559 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9560 } else { 9561 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9562 } 9563 }; 9564 9565 // If we have a target function ID it means that we need to support 9566 // offloading, otherwise, just execute on the host. We need to execute on host 9567 // regardless of the conditional in the if clause if, e.g., the user do not 9568 // specify target triples. 9569 if (OutlinedFnID) { 9570 if (IfCond) { 9571 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9572 } else { 9573 RegionCodeGenTy ThenRCG(TargetThenGen); 9574 ThenRCG(CGF); 9575 } 9576 } else { 9577 RegionCodeGenTy ElseRCG(TargetElseGen); 9578 ElseRCG(CGF); 9579 } 9580 } 9581 9582 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9583 StringRef ParentName) { 9584 if (!S) 9585 return; 9586 9587 // Codegen OMP target directives that offload compute to the device. 9588 bool RequiresDeviceCodegen = 9589 isa<OMPExecutableDirective>(S) && 9590 isOpenMPTargetExecutionDirective( 9591 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9592 9593 if (RequiresDeviceCodegen) { 9594 const auto &E = *cast<OMPExecutableDirective>(S); 9595 unsigned DeviceID; 9596 unsigned FileID; 9597 unsigned Line; 9598 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9599 FileID, Line); 9600 9601 // Is this a target region that should not be emitted as an entry point? If 9602 // so just signal we are done with this target region. 9603 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9604 ParentName, Line)) 9605 return; 9606 9607 switch (E.getDirectiveKind()) { 9608 case OMPD_target: 9609 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9610 cast<OMPTargetDirective>(E)); 9611 break; 9612 case OMPD_target_parallel: 9613 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9614 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9615 break; 9616 case OMPD_target_teams: 9617 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9618 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9619 break; 9620 case OMPD_target_teams_distribute: 9621 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9622 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9623 break; 9624 case OMPD_target_teams_distribute_simd: 9625 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9626 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9627 break; 9628 case OMPD_target_parallel_for: 9629 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9630 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9631 break; 9632 case OMPD_target_parallel_for_simd: 9633 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9634 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9635 break; 9636 case OMPD_target_simd: 9637 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9638 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9639 break; 9640 case OMPD_target_teams_distribute_parallel_for: 9641 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9642 CGM, ParentName, 9643 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9644 break; 9645 case OMPD_target_teams_distribute_parallel_for_simd: 9646 CodeGenFunction:: 9647 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9648 CGM, ParentName, 9649 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9650 break; 9651 case OMPD_parallel: 9652 case OMPD_for: 9653 case OMPD_parallel_for: 9654 case OMPD_parallel_master: 9655 case OMPD_parallel_sections: 9656 case OMPD_for_simd: 9657 case OMPD_parallel_for_simd: 9658 case OMPD_cancel: 9659 case OMPD_cancellation_point: 9660 case OMPD_ordered: 9661 case OMPD_threadprivate: 9662 case OMPD_allocate: 9663 case OMPD_task: 9664 case OMPD_simd: 9665 case OMPD_sections: 9666 case OMPD_section: 9667 case OMPD_single: 9668 case OMPD_master: 9669 case OMPD_critical: 9670 case OMPD_taskyield: 9671 case OMPD_barrier: 9672 case OMPD_taskwait: 9673 case OMPD_taskgroup: 9674 case OMPD_atomic: 9675 case OMPD_flush: 9676 case OMPD_depobj: 9677 case OMPD_scan: 9678 case OMPD_teams: 9679 case OMPD_target_data: 9680 case OMPD_target_exit_data: 9681 case OMPD_target_enter_data: 9682 case OMPD_distribute: 9683 case OMPD_distribute_simd: 9684 case OMPD_distribute_parallel_for: 9685 case OMPD_distribute_parallel_for_simd: 9686 case OMPD_teams_distribute: 9687 case OMPD_teams_distribute_simd: 9688 case OMPD_teams_distribute_parallel_for: 9689 case OMPD_teams_distribute_parallel_for_simd: 9690 case OMPD_target_update: 9691 case OMPD_declare_simd: 9692 case OMPD_declare_variant: 9693 case OMPD_begin_declare_variant: 9694 case OMPD_end_declare_variant: 9695 case OMPD_declare_target: 9696 case OMPD_end_declare_target: 9697 case OMPD_declare_reduction: 9698 case OMPD_declare_mapper: 9699 case OMPD_taskloop: 9700 case OMPD_taskloop_simd: 9701 case OMPD_master_taskloop: 9702 case OMPD_master_taskloop_simd: 9703 case OMPD_parallel_master_taskloop: 9704 case OMPD_parallel_master_taskloop_simd: 9705 case OMPD_requires: 9706 case OMPD_unknown: 9707 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9708 } 9709 return; 9710 } 9711 9712 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9713 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9714 return; 9715 9716 scanForTargetRegionsFunctions( 9717 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9718 return; 9719 } 9720 9721 // If this is a lambda function, look into its body. 9722 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9723 S = L->getBody(); 9724 9725 // Keep looking for target regions recursively. 9726 for (const Stmt *II : S->children()) 9727 scanForTargetRegionsFunctions(II, ParentName); 9728 } 9729 9730 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9731 // If emitting code for the host, we do not process FD here. Instead we do 9732 // the normal code generation. 9733 if (!CGM.getLangOpts().OpenMPIsDevice) { 9734 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9735 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9736 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9737 // Do not emit device_type(nohost) functions for the host. 9738 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9739 return true; 9740 } 9741 return false; 9742 } 9743 9744 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9745 // Try to detect target regions in the function. 9746 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9747 StringRef Name = CGM.getMangledName(GD); 9748 scanForTargetRegionsFunctions(FD->getBody(), Name); 9749 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9750 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9751 // Do not emit device_type(nohost) functions for the host. 9752 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9753 return true; 9754 } 9755 9756 // Do not to emit function if it is not marked as declare target. 9757 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9758 AlreadyEmittedTargetDecls.count(VD) == 0; 9759 } 9760 9761 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9762 if (!CGM.getLangOpts().OpenMPIsDevice) 9763 return false; 9764 9765 // Check if there are Ctors/Dtors in this declaration and look for target 9766 // regions in it. We use the complete variant to produce the kernel name 9767 // mangling. 9768 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9769 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9770 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9771 StringRef ParentName = 9772 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9773 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9774 } 9775 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9776 StringRef ParentName = 9777 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9778 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9779 } 9780 } 9781 9782 // Do not to emit variable if it is not marked as declare target. 9783 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9784 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9785 cast<VarDecl>(GD.getDecl())); 9786 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9787 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9788 HasRequiresUnifiedSharedMemory)) { 9789 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9790 return true; 9791 } 9792 return false; 9793 } 9794 9795 llvm::Constant * 9796 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9797 const VarDecl *VD) { 9798 assert(VD->getType().isConstant(CGM.getContext()) && 9799 "Expected constant variable."); 9800 StringRef VarName; 9801 llvm::Constant *Addr; 9802 llvm::GlobalValue::LinkageTypes Linkage; 9803 QualType Ty = VD->getType(); 9804 SmallString<128> Buffer; 9805 { 9806 unsigned DeviceID; 9807 unsigned FileID; 9808 unsigned Line; 9809 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9810 FileID, Line); 9811 llvm::raw_svector_ostream OS(Buffer); 9812 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9813 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9814 VarName = OS.str(); 9815 } 9816 Linkage = llvm::GlobalValue::InternalLinkage; 9817 Addr = 9818 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9819 getDefaultFirstprivateAddressSpace()); 9820 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9821 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9822 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9823 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9824 VarName, Addr, VarSize, 9825 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9826 return Addr; 9827 } 9828 9829 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9830 llvm::Constant *Addr) { 9831 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9832 !CGM.getLangOpts().OpenMPIsDevice) 9833 return; 9834 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9835 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9836 if (!Res) { 9837 if (CGM.getLangOpts().OpenMPIsDevice) { 9838 // Register non-target variables being emitted in device code (debug info 9839 // may cause this). 9840 StringRef VarName = CGM.getMangledName(VD); 9841 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9842 } 9843 return; 9844 } 9845 // Register declare target variables. 9846 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9847 StringRef VarName; 9848 CharUnits VarSize; 9849 llvm::GlobalValue::LinkageTypes Linkage; 9850 9851 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9852 !HasRequiresUnifiedSharedMemory) { 9853 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9854 VarName = CGM.getMangledName(VD); 9855 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9856 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9857 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9858 } else { 9859 VarSize = CharUnits::Zero(); 9860 } 9861 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9862 // Temp solution to prevent optimizations of the internal variables. 9863 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9864 std::string RefName = getName({VarName, "ref"}); 9865 if (!CGM.GetGlobalValue(RefName)) { 9866 llvm::Constant *AddrRef = 9867 getOrCreateInternalVariable(Addr->getType(), RefName); 9868 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9869 GVAddrRef->setConstant(/*Val=*/true); 9870 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9871 GVAddrRef->setInitializer(Addr); 9872 CGM.addCompilerUsedGlobal(GVAddrRef); 9873 } 9874 } 9875 } else { 9876 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9877 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9878 HasRequiresUnifiedSharedMemory)) && 9879 "Declare target attribute must link or to with unified memory."); 9880 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9881 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9882 else 9883 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9884 9885 if (CGM.getLangOpts().OpenMPIsDevice) { 9886 VarName = Addr->getName(); 9887 Addr = nullptr; 9888 } else { 9889 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9890 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9891 } 9892 VarSize = CGM.getPointerSize(); 9893 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9894 } 9895 9896 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9897 VarName, Addr, VarSize, Flags, Linkage); 9898 } 9899 9900 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9901 if (isa<FunctionDecl>(GD.getDecl()) || 9902 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9903 return emitTargetFunctions(GD); 9904 9905 return emitTargetGlobalVariable(GD); 9906 } 9907 9908 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9909 for (const VarDecl *VD : DeferredGlobalVariables) { 9910 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9911 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9912 if (!Res) 9913 continue; 9914 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9915 !HasRequiresUnifiedSharedMemory) { 9916 CGM.EmitGlobal(VD); 9917 } else { 9918 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9919 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9920 HasRequiresUnifiedSharedMemory)) && 9921 "Expected link clause or to clause with unified memory."); 9922 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9923 } 9924 } 9925 } 9926 9927 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9928 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9929 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9930 " Expected target-based directive."); 9931 } 9932 9933 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9934 for (const OMPClause *Clause : D->clauselists()) { 9935 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9936 HasRequiresUnifiedSharedMemory = true; 9937 } else if (const auto *AC = 9938 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9939 switch (AC->getAtomicDefaultMemOrderKind()) { 9940 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9941 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9942 break; 9943 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9944 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 9945 break; 9946 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 9947 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 9948 break; 9949 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 9950 break; 9951 } 9952 } 9953 } 9954 } 9955 9956 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 9957 return RequiresAtomicOrdering; 9958 } 9959 9960 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9961 LangAS &AS) { 9962 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9963 return false; 9964 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9965 switch(A->getAllocatorType()) { 9966 case OMPAllocateDeclAttr::OMPNullMemAlloc: 9967 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9968 // Not supported, fallback to the default mem space. 9969 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9970 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9971 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9972 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9973 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9974 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9975 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9976 AS = LangAS::Default; 9977 return true; 9978 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9979 llvm_unreachable("Expected predefined allocator for the variables with the " 9980 "static storage."); 9981 } 9982 return false; 9983 } 9984 9985 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9986 return HasRequiresUnifiedSharedMemory; 9987 } 9988 9989 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9990 CodeGenModule &CGM) 9991 : CGM(CGM) { 9992 if (CGM.getLangOpts().OpenMPIsDevice) { 9993 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9994 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9995 } 9996 } 9997 9998 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9999 if (CGM.getLangOpts().OpenMPIsDevice) 10000 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10001 } 10002 10003 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10004 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10005 return true; 10006 10007 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10008 // Do not to emit function if it is marked as declare target as it was already 10009 // emitted. 10010 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10011 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10012 if (auto *F = dyn_cast_or_null<llvm::Function>( 10013 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10014 return !F->isDeclaration(); 10015 return false; 10016 } 10017 return true; 10018 } 10019 10020 return !AlreadyEmittedTargetDecls.insert(D).second; 10021 } 10022 10023 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10024 // If we don't have entries or if we are emitting code for the device, we 10025 // don't need to do anything. 10026 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10027 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10028 (OffloadEntriesInfoManager.empty() && 10029 !HasEmittedDeclareTargetRegion && 10030 !HasEmittedTargetRegion)) 10031 return nullptr; 10032 10033 // Create and register the function that handles the requires directives. 10034 ASTContext &C = CGM.getContext(); 10035 10036 llvm::Function *RequiresRegFn; 10037 { 10038 CodeGenFunction CGF(CGM); 10039 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10040 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10041 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10042 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10043 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10044 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10045 // TODO: check for other requires clauses. 10046 // The requires directive takes effect only when a target region is 10047 // present in the compilation unit. Otherwise it is ignored and not 10048 // passed to the runtime. This avoids the runtime from throwing an error 10049 // for mismatching requires clauses across compilation units that don't 10050 // contain at least 1 target region. 10051 assert((HasEmittedTargetRegion || 10052 HasEmittedDeclareTargetRegion || 10053 !OffloadEntriesInfoManager.empty()) && 10054 "Target or declare target region expected."); 10055 if (HasRequiresUnifiedSharedMemory) 10056 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10057 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10058 CGM.getModule(), OMPRTL___tgt_register_requires), 10059 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10060 CGF.FinishFunction(); 10061 } 10062 return RequiresRegFn; 10063 } 10064 10065 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10066 const OMPExecutableDirective &D, 10067 SourceLocation Loc, 10068 llvm::Function *OutlinedFn, 10069 ArrayRef<llvm::Value *> CapturedVars) { 10070 if (!CGF.HaveInsertPoint()) 10071 return; 10072 10073 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10074 CodeGenFunction::RunCleanupsScope Scope(CGF); 10075 10076 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10077 llvm::Value *Args[] = { 10078 RTLoc, 10079 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10080 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10081 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10082 RealArgs.append(std::begin(Args), std::end(Args)); 10083 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10084 10085 llvm::FunctionCallee RTLFn = 10086 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10087 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10088 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10089 } 10090 10091 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10092 const Expr *NumTeams, 10093 const Expr *ThreadLimit, 10094 SourceLocation Loc) { 10095 if (!CGF.HaveInsertPoint()) 10096 return; 10097 10098 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10099 10100 llvm::Value *NumTeamsVal = 10101 NumTeams 10102 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10103 CGF.CGM.Int32Ty, /* isSigned = */ true) 10104 : CGF.Builder.getInt32(0); 10105 10106 llvm::Value *ThreadLimitVal = 10107 ThreadLimit 10108 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10109 CGF.CGM.Int32Ty, /* isSigned = */ true) 10110 : CGF.Builder.getInt32(0); 10111 10112 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10113 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10114 ThreadLimitVal}; 10115 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10116 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10117 PushNumTeamsArgs); 10118 } 10119 10120 void CGOpenMPRuntime::emitTargetDataCalls( 10121 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10122 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10123 if (!CGF.HaveInsertPoint()) 10124 return; 10125 10126 // Action used to replace the default codegen action and turn privatization 10127 // off. 10128 PrePostActionTy NoPrivAction; 10129 10130 // Generate the code for the opening of the data environment. Capture all the 10131 // arguments of the runtime call by reference because they are used in the 10132 // closing of the region. 10133 auto &&BeginThenGen = [this, &D, Device, &Info, 10134 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10135 // Fill up the arrays with all the mapped variables. 10136 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10137 MappableExprsHandler::MapValuesArrayTy Pointers; 10138 MappableExprsHandler::MapValuesArrayTy Sizes; 10139 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10140 10141 // Get map clause information. 10142 MappableExprsHandler MCHandler(D, CGF); 10143 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10144 10145 // Fill up the arrays and create the arguments. 10146 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10147 10148 llvm::Value *BasePointersArrayArg = nullptr; 10149 llvm::Value *PointersArrayArg = nullptr; 10150 llvm::Value *SizesArrayArg = nullptr; 10151 llvm::Value *MapTypesArrayArg = nullptr; 10152 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10153 SizesArrayArg, MapTypesArrayArg, Info); 10154 10155 // Emit device ID if any. 10156 llvm::Value *DeviceID = nullptr; 10157 if (Device) { 10158 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10159 CGF.Int64Ty, /*isSigned=*/true); 10160 } else { 10161 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10162 } 10163 10164 // Emit the number of elements in the offloading arrays. 10165 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10166 10167 llvm::Value *OffloadingArgs[] = { 10168 DeviceID, PointerNum, BasePointersArrayArg, 10169 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10170 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10171 CGM.getModule(), OMPRTL___tgt_target_data_begin), 10172 OffloadingArgs); 10173 10174 // If device pointer privatization is required, emit the body of the region 10175 // here. It will have to be duplicated: with and without privatization. 10176 if (!Info.CaptureDeviceAddrMap.empty()) 10177 CodeGen(CGF); 10178 }; 10179 10180 // Generate code for the closing of the data region. 10181 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10182 PrePostActionTy &) { 10183 assert(Info.isValid() && "Invalid data environment closing arguments."); 10184 10185 llvm::Value *BasePointersArrayArg = nullptr; 10186 llvm::Value *PointersArrayArg = nullptr; 10187 llvm::Value *SizesArrayArg = nullptr; 10188 llvm::Value *MapTypesArrayArg = nullptr; 10189 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10190 SizesArrayArg, MapTypesArrayArg, Info); 10191 10192 // Emit device ID if any. 10193 llvm::Value *DeviceID = nullptr; 10194 if (Device) { 10195 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10196 CGF.Int64Ty, /*isSigned=*/true); 10197 } else { 10198 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10199 } 10200 10201 // Emit the number of elements in the offloading arrays. 10202 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10203 10204 llvm::Value *OffloadingArgs[] = { 10205 DeviceID, PointerNum, BasePointersArrayArg, 10206 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10207 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10208 CGM.getModule(), OMPRTL___tgt_target_data_end), 10209 OffloadingArgs); 10210 }; 10211 10212 // If we need device pointer privatization, we need to emit the body of the 10213 // region with no privatization in the 'else' branch of the conditional. 10214 // Otherwise, we don't have to do anything. 10215 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10216 PrePostActionTy &) { 10217 if (!Info.CaptureDeviceAddrMap.empty()) { 10218 CodeGen.setAction(NoPrivAction); 10219 CodeGen(CGF); 10220 } 10221 }; 10222 10223 // We don't have to do anything to close the region if the if clause evaluates 10224 // to false. 10225 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10226 10227 if (IfCond) { 10228 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10229 } else { 10230 RegionCodeGenTy RCG(BeginThenGen); 10231 RCG(CGF); 10232 } 10233 10234 // If we don't require privatization of device pointers, we emit the body in 10235 // between the runtime calls. This avoids duplicating the body code. 10236 if (Info.CaptureDeviceAddrMap.empty()) { 10237 CodeGen.setAction(NoPrivAction); 10238 CodeGen(CGF); 10239 } 10240 10241 if (IfCond) { 10242 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10243 } else { 10244 RegionCodeGenTy RCG(EndThenGen); 10245 RCG(CGF); 10246 } 10247 } 10248 10249 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10250 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10251 const Expr *Device) { 10252 if (!CGF.HaveInsertPoint()) 10253 return; 10254 10255 assert((isa<OMPTargetEnterDataDirective>(D) || 10256 isa<OMPTargetExitDataDirective>(D) || 10257 isa<OMPTargetUpdateDirective>(D)) && 10258 "Expecting either target enter, exit data, or update directives."); 10259 10260 CodeGenFunction::OMPTargetDataInfo InputInfo; 10261 llvm::Value *MapTypesArray = nullptr; 10262 // Generate the code for the opening of the data environment. 10263 auto &&ThenGen = [this, &D, Device, &InputInfo, 10264 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10265 // Emit device ID if any. 10266 llvm::Value *DeviceID = nullptr; 10267 if (Device) { 10268 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10269 CGF.Int64Ty, /*isSigned=*/true); 10270 } else { 10271 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10272 } 10273 10274 // Emit the number of elements in the offloading arrays. 10275 llvm::Constant *PointerNum = 10276 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10277 10278 llvm::Value *OffloadingArgs[] = {DeviceID, 10279 PointerNum, 10280 InputInfo.BasePointersArray.getPointer(), 10281 InputInfo.PointersArray.getPointer(), 10282 InputInfo.SizesArray.getPointer(), 10283 MapTypesArray}; 10284 10285 // Select the right runtime function call for each expected standalone 10286 // directive. 10287 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10288 RuntimeFunction RTLFn; 10289 switch (D.getDirectiveKind()) { 10290 case OMPD_target_enter_data: 10291 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait 10292 : OMPRTL___tgt_target_data_begin; 10293 break; 10294 case OMPD_target_exit_data: 10295 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait 10296 : OMPRTL___tgt_target_data_end; 10297 break; 10298 case OMPD_target_update: 10299 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait 10300 : OMPRTL___tgt_target_data_update; 10301 break; 10302 case OMPD_parallel: 10303 case OMPD_for: 10304 case OMPD_parallel_for: 10305 case OMPD_parallel_master: 10306 case OMPD_parallel_sections: 10307 case OMPD_for_simd: 10308 case OMPD_parallel_for_simd: 10309 case OMPD_cancel: 10310 case OMPD_cancellation_point: 10311 case OMPD_ordered: 10312 case OMPD_threadprivate: 10313 case OMPD_allocate: 10314 case OMPD_task: 10315 case OMPD_simd: 10316 case OMPD_sections: 10317 case OMPD_section: 10318 case OMPD_single: 10319 case OMPD_master: 10320 case OMPD_critical: 10321 case OMPD_taskyield: 10322 case OMPD_barrier: 10323 case OMPD_taskwait: 10324 case OMPD_taskgroup: 10325 case OMPD_atomic: 10326 case OMPD_flush: 10327 case OMPD_depobj: 10328 case OMPD_scan: 10329 case OMPD_teams: 10330 case OMPD_target_data: 10331 case OMPD_distribute: 10332 case OMPD_distribute_simd: 10333 case OMPD_distribute_parallel_for: 10334 case OMPD_distribute_parallel_for_simd: 10335 case OMPD_teams_distribute: 10336 case OMPD_teams_distribute_simd: 10337 case OMPD_teams_distribute_parallel_for: 10338 case OMPD_teams_distribute_parallel_for_simd: 10339 case OMPD_declare_simd: 10340 case OMPD_declare_variant: 10341 case OMPD_begin_declare_variant: 10342 case OMPD_end_declare_variant: 10343 case OMPD_declare_target: 10344 case OMPD_end_declare_target: 10345 case OMPD_declare_reduction: 10346 case OMPD_declare_mapper: 10347 case OMPD_taskloop: 10348 case OMPD_taskloop_simd: 10349 case OMPD_master_taskloop: 10350 case OMPD_master_taskloop_simd: 10351 case OMPD_parallel_master_taskloop: 10352 case OMPD_parallel_master_taskloop_simd: 10353 case OMPD_target: 10354 case OMPD_target_simd: 10355 case OMPD_target_teams_distribute: 10356 case OMPD_target_teams_distribute_simd: 10357 case OMPD_target_teams_distribute_parallel_for: 10358 case OMPD_target_teams_distribute_parallel_for_simd: 10359 case OMPD_target_teams: 10360 case OMPD_target_parallel: 10361 case OMPD_target_parallel_for: 10362 case OMPD_target_parallel_for_simd: 10363 case OMPD_requires: 10364 case OMPD_unknown: 10365 llvm_unreachable("Unexpected standalone target data directive."); 10366 break; 10367 } 10368 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 10369 CGM.getModule(), RTLFn), 10370 OffloadingArgs); 10371 }; 10372 10373 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10374 CodeGenFunction &CGF, PrePostActionTy &) { 10375 // Fill up the arrays with all the mapped variables. 10376 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10377 MappableExprsHandler::MapValuesArrayTy Pointers; 10378 MappableExprsHandler::MapValuesArrayTy Sizes; 10379 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10380 10381 // Get map clause information. 10382 MappableExprsHandler MEHandler(D, CGF); 10383 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10384 10385 TargetDataInfo Info; 10386 // Fill up the arrays and create the arguments. 10387 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10388 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10389 Info.PointersArray, Info.SizesArray, 10390 Info.MapTypesArray, Info); 10391 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10392 InputInfo.BasePointersArray = 10393 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10394 InputInfo.PointersArray = 10395 Address(Info.PointersArray, CGM.getPointerAlign()); 10396 InputInfo.SizesArray = 10397 Address(Info.SizesArray, CGM.getPointerAlign()); 10398 MapTypesArray = Info.MapTypesArray; 10399 if (D.hasClausesOfKind<OMPDependClause>()) 10400 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10401 else 10402 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10403 }; 10404 10405 if (IfCond) { 10406 emitIfClause(CGF, IfCond, TargetThenGen, 10407 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10408 } else { 10409 RegionCodeGenTy ThenRCG(TargetThenGen); 10410 ThenRCG(CGF); 10411 } 10412 } 10413 10414 namespace { 10415 /// Kind of parameter in a function with 'declare simd' directive. 10416 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10417 /// Attribute set of the parameter. 10418 struct ParamAttrTy { 10419 ParamKindTy Kind = Vector; 10420 llvm::APSInt StrideOrArg; 10421 llvm::APSInt Alignment; 10422 }; 10423 } // namespace 10424 10425 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10426 ArrayRef<ParamAttrTy> ParamAttrs) { 10427 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10428 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10429 // of that clause. The VLEN value must be power of 2. 10430 // In other case the notion of the function`s "characteristic data type" (CDT) 10431 // is used to compute the vector length. 10432 // CDT is defined in the following order: 10433 // a) For non-void function, the CDT is the return type. 10434 // b) If the function has any non-uniform, non-linear parameters, then the 10435 // CDT is the type of the first such parameter. 10436 // c) If the CDT determined by a) or b) above is struct, union, or class 10437 // type which is pass-by-value (except for the type that maps to the 10438 // built-in complex data type), the characteristic data type is int. 10439 // d) If none of the above three cases is applicable, the CDT is int. 10440 // The VLEN is then determined based on the CDT and the size of vector 10441 // register of that ISA for which current vector version is generated. The 10442 // VLEN is computed using the formula below: 10443 // VLEN = sizeof(vector_register) / sizeof(CDT), 10444 // where vector register size specified in section 3.2.1 Registers and the 10445 // Stack Frame of original AMD64 ABI document. 10446 QualType RetType = FD->getReturnType(); 10447 if (RetType.isNull()) 10448 return 0; 10449 ASTContext &C = FD->getASTContext(); 10450 QualType CDT; 10451 if (!RetType.isNull() && !RetType->isVoidType()) { 10452 CDT = RetType; 10453 } else { 10454 unsigned Offset = 0; 10455 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10456 if (ParamAttrs[Offset].Kind == Vector) 10457 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10458 ++Offset; 10459 } 10460 if (CDT.isNull()) { 10461 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10462 if (ParamAttrs[I + Offset].Kind == Vector) { 10463 CDT = FD->getParamDecl(I)->getType(); 10464 break; 10465 } 10466 } 10467 } 10468 } 10469 if (CDT.isNull()) 10470 CDT = C.IntTy; 10471 CDT = CDT->getCanonicalTypeUnqualified(); 10472 if (CDT->isRecordType() || CDT->isUnionType()) 10473 CDT = C.IntTy; 10474 return C.getTypeSize(CDT); 10475 } 10476 10477 static void 10478 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10479 const llvm::APSInt &VLENVal, 10480 ArrayRef<ParamAttrTy> ParamAttrs, 10481 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10482 struct ISADataTy { 10483 char ISA; 10484 unsigned VecRegSize; 10485 }; 10486 ISADataTy ISAData[] = { 10487 { 10488 'b', 128 10489 }, // SSE 10490 { 10491 'c', 256 10492 }, // AVX 10493 { 10494 'd', 256 10495 }, // AVX2 10496 { 10497 'e', 512 10498 }, // AVX512 10499 }; 10500 llvm::SmallVector<char, 2> Masked; 10501 switch (State) { 10502 case OMPDeclareSimdDeclAttr::BS_Undefined: 10503 Masked.push_back('N'); 10504 Masked.push_back('M'); 10505 break; 10506 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10507 Masked.push_back('N'); 10508 break; 10509 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10510 Masked.push_back('M'); 10511 break; 10512 } 10513 for (char Mask : Masked) { 10514 for (const ISADataTy &Data : ISAData) { 10515 SmallString<256> Buffer; 10516 llvm::raw_svector_ostream Out(Buffer); 10517 Out << "_ZGV" << Data.ISA << Mask; 10518 if (!VLENVal) { 10519 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10520 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10521 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10522 } else { 10523 Out << VLENVal; 10524 } 10525 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10526 switch (ParamAttr.Kind){ 10527 case LinearWithVarStride: 10528 Out << 's' << ParamAttr.StrideOrArg; 10529 break; 10530 case Linear: 10531 Out << 'l'; 10532 if (ParamAttr.StrideOrArg != 1) 10533 Out << ParamAttr.StrideOrArg; 10534 break; 10535 case Uniform: 10536 Out << 'u'; 10537 break; 10538 case Vector: 10539 Out << 'v'; 10540 break; 10541 } 10542 if (!!ParamAttr.Alignment) 10543 Out << 'a' << ParamAttr.Alignment; 10544 } 10545 Out << '_' << Fn->getName(); 10546 Fn->addFnAttr(Out.str()); 10547 } 10548 } 10549 } 10550 10551 // This are the Functions that are needed to mangle the name of the 10552 // vector functions generated by the compiler, according to the rules 10553 // defined in the "Vector Function ABI specifications for AArch64", 10554 // available at 10555 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10556 10557 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10558 /// 10559 /// TODO: Need to implement the behavior for reference marked with a 10560 /// var or no linear modifiers (1.b in the section). For this, we 10561 /// need to extend ParamKindTy to support the linear modifiers. 10562 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10563 QT = QT.getCanonicalType(); 10564 10565 if (QT->isVoidType()) 10566 return false; 10567 10568 if (Kind == ParamKindTy::Uniform) 10569 return false; 10570 10571 if (Kind == ParamKindTy::Linear) 10572 return false; 10573 10574 // TODO: Handle linear references with modifiers 10575 10576 if (Kind == ParamKindTy::LinearWithVarStride) 10577 return false; 10578 10579 return true; 10580 } 10581 10582 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10583 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10584 QT = QT.getCanonicalType(); 10585 unsigned Size = C.getTypeSize(QT); 10586 10587 // Only scalars and complex within 16 bytes wide set PVB to true. 10588 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10589 return false; 10590 10591 if (QT->isFloatingType()) 10592 return true; 10593 10594 if (QT->isIntegerType()) 10595 return true; 10596 10597 if (QT->isPointerType()) 10598 return true; 10599 10600 // TODO: Add support for complex types (section 3.1.2, item 2). 10601 10602 return false; 10603 } 10604 10605 /// Computes the lane size (LS) of a return type or of an input parameter, 10606 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10607 /// TODO: Add support for references, section 3.2.1, item 1. 10608 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10609 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10610 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10611 if (getAArch64PBV(PTy, C)) 10612 return C.getTypeSize(PTy); 10613 } 10614 if (getAArch64PBV(QT, C)) 10615 return C.getTypeSize(QT); 10616 10617 return C.getTypeSize(C.getUIntPtrType()); 10618 } 10619 10620 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10621 // signature of the scalar function, as defined in 3.2.2 of the 10622 // AAVFABI. 10623 static std::tuple<unsigned, unsigned, bool> 10624 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10625 QualType RetType = FD->getReturnType().getCanonicalType(); 10626 10627 ASTContext &C = FD->getASTContext(); 10628 10629 bool OutputBecomesInput = false; 10630 10631 llvm::SmallVector<unsigned, 8> Sizes; 10632 if (!RetType->isVoidType()) { 10633 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10634 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10635 OutputBecomesInput = true; 10636 } 10637 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10638 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10639 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10640 } 10641 10642 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10643 // The LS of a function parameter / return value can only be a power 10644 // of 2, starting from 8 bits, up to 128. 10645 assert(std::all_of(Sizes.begin(), Sizes.end(), 10646 [](unsigned Size) { 10647 return Size == 8 || Size == 16 || Size == 32 || 10648 Size == 64 || Size == 128; 10649 }) && 10650 "Invalid size"); 10651 10652 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10653 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10654 OutputBecomesInput); 10655 } 10656 10657 /// Mangle the parameter part of the vector function name according to 10658 /// their OpenMP classification. The mangling function is defined in 10659 /// section 3.5 of the AAVFABI. 10660 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10661 SmallString<256> Buffer; 10662 llvm::raw_svector_ostream Out(Buffer); 10663 for (const auto &ParamAttr : ParamAttrs) { 10664 switch (ParamAttr.Kind) { 10665 case LinearWithVarStride: 10666 Out << "ls" << ParamAttr.StrideOrArg; 10667 break; 10668 case Linear: 10669 Out << 'l'; 10670 // Don't print the step value if it is not present or if it is 10671 // equal to 1. 10672 if (ParamAttr.StrideOrArg != 1) 10673 Out << ParamAttr.StrideOrArg; 10674 break; 10675 case Uniform: 10676 Out << 'u'; 10677 break; 10678 case Vector: 10679 Out << 'v'; 10680 break; 10681 } 10682 10683 if (!!ParamAttr.Alignment) 10684 Out << 'a' << ParamAttr.Alignment; 10685 } 10686 10687 return std::string(Out.str()); 10688 } 10689 10690 // Function used to add the attribute. The parameter `VLEN` is 10691 // templated to allow the use of "x" when targeting scalable functions 10692 // for SVE. 10693 template <typename T> 10694 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10695 char ISA, StringRef ParSeq, 10696 StringRef MangledName, bool OutputBecomesInput, 10697 llvm::Function *Fn) { 10698 SmallString<256> Buffer; 10699 llvm::raw_svector_ostream Out(Buffer); 10700 Out << Prefix << ISA << LMask << VLEN; 10701 if (OutputBecomesInput) 10702 Out << "v"; 10703 Out << ParSeq << "_" << MangledName; 10704 Fn->addFnAttr(Out.str()); 10705 } 10706 10707 // Helper function to generate the Advanced SIMD names depending on 10708 // the value of the NDS when simdlen is not present. 10709 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10710 StringRef Prefix, char ISA, 10711 StringRef ParSeq, StringRef MangledName, 10712 bool OutputBecomesInput, 10713 llvm::Function *Fn) { 10714 switch (NDS) { 10715 case 8: 10716 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10717 OutputBecomesInput, Fn); 10718 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10719 OutputBecomesInput, Fn); 10720 break; 10721 case 16: 10722 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10723 OutputBecomesInput, Fn); 10724 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10725 OutputBecomesInput, Fn); 10726 break; 10727 case 32: 10728 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10729 OutputBecomesInput, Fn); 10730 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10731 OutputBecomesInput, Fn); 10732 break; 10733 case 64: 10734 case 128: 10735 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10736 OutputBecomesInput, Fn); 10737 break; 10738 default: 10739 llvm_unreachable("Scalar type is too wide."); 10740 } 10741 } 10742 10743 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10744 static void emitAArch64DeclareSimdFunction( 10745 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10746 ArrayRef<ParamAttrTy> ParamAttrs, 10747 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10748 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10749 10750 // Get basic data for building the vector signature. 10751 const auto Data = getNDSWDS(FD, ParamAttrs); 10752 const unsigned NDS = std::get<0>(Data); 10753 const unsigned WDS = std::get<1>(Data); 10754 const bool OutputBecomesInput = std::get<2>(Data); 10755 10756 // Check the values provided via `simdlen` by the user. 10757 // 1. A `simdlen(1)` doesn't produce vector signatures, 10758 if (UserVLEN == 1) { 10759 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10760 DiagnosticsEngine::Warning, 10761 "The clause simdlen(1) has no effect when targeting aarch64."); 10762 CGM.getDiags().Report(SLoc, DiagID); 10763 return; 10764 } 10765 10766 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10767 // Advanced SIMD output. 10768 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10769 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10770 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10771 "power of 2 when targeting Advanced SIMD."); 10772 CGM.getDiags().Report(SLoc, DiagID); 10773 return; 10774 } 10775 10776 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10777 // limits. 10778 if (ISA == 's' && UserVLEN != 0) { 10779 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10780 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10781 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10782 "lanes in the architectural constraints " 10783 "for SVE (min is 128-bit, max is " 10784 "2048-bit, by steps of 128-bit)"); 10785 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10786 return; 10787 } 10788 } 10789 10790 // Sort out parameter sequence. 10791 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10792 StringRef Prefix = "_ZGV"; 10793 // Generate simdlen from user input (if any). 10794 if (UserVLEN) { 10795 if (ISA == 's') { 10796 // SVE generates only a masked function. 10797 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10798 OutputBecomesInput, Fn); 10799 } else { 10800 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10801 // Advanced SIMD generates one or two functions, depending on 10802 // the `[not]inbranch` clause. 10803 switch (State) { 10804 case OMPDeclareSimdDeclAttr::BS_Undefined: 10805 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10806 OutputBecomesInput, Fn); 10807 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10808 OutputBecomesInput, Fn); 10809 break; 10810 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10811 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10812 OutputBecomesInput, Fn); 10813 break; 10814 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10815 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10816 OutputBecomesInput, Fn); 10817 break; 10818 } 10819 } 10820 } else { 10821 // If no user simdlen is provided, follow the AAVFABI rules for 10822 // generating the vector length. 10823 if (ISA == 's') { 10824 // SVE, section 3.4.1, item 1. 10825 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10826 OutputBecomesInput, Fn); 10827 } else { 10828 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10829 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10830 // two vector names depending on the use of the clause 10831 // `[not]inbranch`. 10832 switch (State) { 10833 case OMPDeclareSimdDeclAttr::BS_Undefined: 10834 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10835 OutputBecomesInput, Fn); 10836 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10837 OutputBecomesInput, Fn); 10838 break; 10839 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10840 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10841 OutputBecomesInput, Fn); 10842 break; 10843 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10844 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10845 OutputBecomesInput, Fn); 10846 break; 10847 } 10848 } 10849 } 10850 } 10851 10852 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10853 llvm::Function *Fn) { 10854 ASTContext &C = CGM.getContext(); 10855 FD = FD->getMostRecentDecl(); 10856 // Map params to their positions in function decl. 10857 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10858 if (isa<CXXMethodDecl>(FD)) 10859 ParamPositions.try_emplace(FD, 0); 10860 unsigned ParamPos = ParamPositions.size(); 10861 for (const ParmVarDecl *P : FD->parameters()) { 10862 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10863 ++ParamPos; 10864 } 10865 while (FD) { 10866 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10867 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10868 // Mark uniform parameters. 10869 for (const Expr *E : Attr->uniforms()) { 10870 E = E->IgnoreParenImpCasts(); 10871 unsigned Pos; 10872 if (isa<CXXThisExpr>(E)) { 10873 Pos = ParamPositions[FD]; 10874 } else { 10875 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10876 ->getCanonicalDecl(); 10877 Pos = ParamPositions[PVD]; 10878 } 10879 ParamAttrs[Pos].Kind = Uniform; 10880 } 10881 // Get alignment info. 10882 auto NI = Attr->alignments_begin(); 10883 for (const Expr *E : Attr->aligneds()) { 10884 E = E->IgnoreParenImpCasts(); 10885 unsigned Pos; 10886 QualType ParmTy; 10887 if (isa<CXXThisExpr>(E)) { 10888 Pos = ParamPositions[FD]; 10889 ParmTy = E->getType(); 10890 } else { 10891 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10892 ->getCanonicalDecl(); 10893 Pos = ParamPositions[PVD]; 10894 ParmTy = PVD->getType(); 10895 } 10896 ParamAttrs[Pos].Alignment = 10897 (*NI) 10898 ? (*NI)->EvaluateKnownConstInt(C) 10899 : llvm::APSInt::getUnsigned( 10900 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10901 .getQuantity()); 10902 ++NI; 10903 } 10904 // Mark linear parameters. 10905 auto SI = Attr->steps_begin(); 10906 auto MI = Attr->modifiers_begin(); 10907 for (const Expr *E : Attr->linears()) { 10908 E = E->IgnoreParenImpCasts(); 10909 unsigned Pos; 10910 // Rescaling factor needed to compute the linear parameter 10911 // value in the mangled name. 10912 unsigned PtrRescalingFactor = 1; 10913 if (isa<CXXThisExpr>(E)) { 10914 Pos = ParamPositions[FD]; 10915 } else { 10916 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10917 ->getCanonicalDecl(); 10918 Pos = ParamPositions[PVD]; 10919 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 10920 PtrRescalingFactor = CGM.getContext() 10921 .getTypeSizeInChars(P->getPointeeType()) 10922 .getQuantity(); 10923 } 10924 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10925 ParamAttr.Kind = Linear; 10926 // Assuming a stride of 1, for `linear` without modifiers. 10927 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 10928 if (*SI) { 10929 Expr::EvalResult Result; 10930 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10931 if (const auto *DRE = 10932 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10933 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10934 ParamAttr.Kind = LinearWithVarStride; 10935 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10936 ParamPositions[StridePVD->getCanonicalDecl()]); 10937 } 10938 } 10939 } else { 10940 ParamAttr.StrideOrArg = Result.Val.getInt(); 10941 } 10942 } 10943 // If we are using a linear clause on a pointer, we need to 10944 // rescale the value of linear_step with the byte size of the 10945 // pointee type. 10946 if (Linear == ParamAttr.Kind) 10947 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 10948 ++SI; 10949 ++MI; 10950 } 10951 llvm::APSInt VLENVal; 10952 SourceLocation ExprLoc; 10953 const Expr *VLENExpr = Attr->getSimdlen(); 10954 if (VLENExpr) { 10955 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10956 ExprLoc = VLENExpr->getExprLoc(); 10957 } 10958 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10959 if (CGM.getTriple().isX86()) { 10960 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10961 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10962 unsigned VLEN = VLENVal.getExtValue(); 10963 StringRef MangledName = Fn->getName(); 10964 if (CGM.getTarget().hasFeature("sve")) 10965 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10966 MangledName, 's', 128, Fn, ExprLoc); 10967 if (CGM.getTarget().hasFeature("neon")) 10968 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10969 MangledName, 'n', 128, Fn, ExprLoc); 10970 } 10971 } 10972 FD = FD->getPreviousDecl(); 10973 } 10974 } 10975 10976 namespace { 10977 /// Cleanup action for doacross support. 10978 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10979 public: 10980 static const int DoacrossFinArgs = 2; 10981 10982 private: 10983 llvm::FunctionCallee RTLFn; 10984 llvm::Value *Args[DoacrossFinArgs]; 10985 10986 public: 10987 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10988 ArrayRef<llvm::Value *> CallArgs) 10989 : RTLFn(RTLFn) { 10990 assert(CallArgs.size() == DoacrossFinArgs); 10991 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10992 } 10993 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10994 if (!CGF.HaveInsertPoint()) 10995 return; 10996 CGF.EmitRuntimeCall(RTLFn, Args); 10997 } 10998 }; 10999 } // namespace 11000 11001 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11002 const OMPLoopDirective &D, 11003 ArrayRef<Expr *> NumIterations) { 11004 if (!CGF.HaveInsertPoint()) 11005 return; 11006 11007 ASTContext &C = CGM.getContext(); 11008 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11009 RecordDecl *RD; 11010 if (KmpDimTy.isNull()) { 11011 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11012 // kmp_int64 lo; // lower 11013 // kmp_int64 up; // upper 11014 // kmp_int64 st; // stride 11015 // }; 11016 RD = C.buildImplicitRecord("kmp_dim"); 11017 RD->startDefinition(); 11018 addFieldToRecordDecl(C, RD, Int64Ty); 11019 addFieldToRecordDecl(C, RD, Int64Ty); 11020 addFieldToRecordDecl(C, RD, Int64Ty); 11021 RD->completeDefinition(); 11022 KmpDimTy = C.getRecordType(RD); 11023 } else { 11024 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11025 } 11026 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11027 QualType ArrayTy = 11028 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11029 11030 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11031 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11032 enum { LowerFD = 0, UpperFD, StrideFD }; 11033 // Fill dims with data. 11034 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11035 LValue DimsLVal = CGF.MakeAddrLValue( 11036 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11037 // dims.upper = num_iterations; 11038 LValue UpperLVal = CGF.EmitLValueForField( 11039 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11040 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11041 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11042 Int64Ty, NumIterations[I]->getExprLoc()); 11043 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11044 // dims.stride = 1; 11045 LValue StrideLVal = CGF.EmitLValueForField( 11046 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11047 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11048 StrideLVal); 11049 } 11050 11051 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11052 // kmp_int32 num_dims, struct kmp_dim * dims); 11053 llvm::Value *Args[] = { 11054 emitUpdateLocation(CGF, D.getBeginLoc()), 11055 getThreadID(CGF, D.getBeginLoc()), 11056 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11057 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11058 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11059 CGM.VoidPtrTy)}; 11060 11061 llvm::FunctionCallee RTLFn = 11062 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11063 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11064 CGF.EmitRuntimeCall(RTLFn, Args); 11065 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11066 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11067 llvm::FunctionCallee FiniRTLFn = 11068 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11069 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11070 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11071 llvm::makeArrayRef(FiniArgs)); 11072 } 11073 11074 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11075 const OMPDependClause *C) { 11076 QualType Int64Ty = 11077 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11078 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11079 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11080 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11081 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11082 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11083 const Expr *CounterVal = C->getLoopData(I); 11084 assert(CounterVal); 11085 llvm::Value *CntVal = CGF.EmitScalarConversion( 11086 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11087 CounterVal->getExprLoc()); 11088 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11089 /*Volatile=*/false, Int64Ty); 11090 } 11091 llvm::Value *Args[] = { 11092 emitUpdateLocation(CGF, C->getBeginLoc()), 11093 getThreadID(CGF, C->getBeginLoc()), 11094 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11095 llvm::FunctionCallee RTLFn; 11096 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11097 RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11098 CGM.getModule(), OMPRTL___kmpc_doacross_post); 11099 } else { 11100 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11101 RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11102 CGM.getModule(), OMPRTL___kmpc_doacross_wait); 11103 } 11104 CGF.EmitRuntimeCall(RTLFn, Args); 11105 } 11106 11107 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11108 llvm::FunctionCallee Callee, 11109 ArrayRef<llvm::Value *> Args) const { 11110 assert(Loc.isValid() && "Outlined function call location must be valid."); 11111 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11112 11113 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11114 if (Fn->doesNotThrow()) { 11115 CGF.EmitNounwindRuntimeCall(Fn, Args); 11116 return; 11117 } 11118 } 11119 CGF.EmitRuntimeCall(Callee, Args); 11120 } 11121 11122 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11123 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11124 ArrayRef<llvm::Value *> Args) const { 11125 emitCall(CGF, Loc, OutlinedFn, Args); 11126 } 11127 11128 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11129 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11130 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11131 HasEmittedDeclareTargetRegion = true; 11132 } 11133 11134 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11135 const VarDecl *NativeParam, 11136 const VarDecl *TargetParam) const { 11137 return CGF.GetAddrOfLocalVar(NativeParam); 11138 } 11139 11140 namespace { 11141 /// Cleanup action for allocate support. 11142 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11143 public: 11144 static const int CleanupArgs = 3; 11145 11146 private: 11147 llvm::FunctionCallee RTLFn; 11148 llvm::Value *Args[CleanupArgs]; 11149 11150 public: 11151 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11152 ArrayRef<llvm::Value *> CallArgs) 11153 : RTLFn(RTLFn) { 11154 assert(CallArgs.size() == CleanupArgs && 11155 "Size of arguments does not match."); 11156 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11157 } 11158 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11159 if (!CGF.HaveInsertPoint()) 11160 return; 11161 CGF.EmitRuntimeCall(RTLFn, Args); 11162 } 11163 }; 11164 } // namespace 11165 11166 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11167 const VarDecl *VD) { 11168 if (!VD) 11169 return Address::invalid(); 11170 const VarDecl *CVD = VD->getCanonicalDecl(); 11171 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11172 return Address::invalid(); 11173 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11174 // Use the default allocation. 11175 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11176 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11177 !AA->getAllocator()) 11178 return Address::invalid(); 11179 llvm::Value *Size; 11180 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11181 if (CVD->getType()->isVariablyModifiedType()) { 11182 Size = CGF.getTypeSize(CVD->getType()); 11183 // Align the size: ((size + align - 1) / align) * align 11184 Size = CGF.Builder.CreateNUWAdd( 11185 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11186 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11187 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11188 } else { 11189 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11190 Size = CGM.getSize(Sz.alignTo(Align)); 11191 } 11192 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11193 assert(AA->getAllocator() && 11194 "Expected allocator expression for non-default allocator."); 11195 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11196 // According to the standard, the original allocator type is a enum (integer). 11197 // Convert to pointer type, if required. 11198 if (Allocator->getType()->isIntegerTy()) 11199 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11200 else if (Allocator->getType()->isPointerTy()) 11201 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11202 CGM.VoidPtrTy); 11203 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11204 11205 llvm::Value *Addr = 11206 CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( 11207 CGM.getModule(), OMPRTL___kmpc_alloc), 11208 Args, getName({CVD->getName(), ".void.addr"})); 11209 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11210 Allocator}; 11211 llvm::FunctionCallee FiniRTLFn = 11212 llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(CGM.getModule(), 11213 OMPRTL___kmpc_free); 11214 11215 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11216 llvm::makeArrayRef(FiniArgs)); 11217 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11218 Addr, 11219 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11220 getName({CVD->getName(), ".addr"})); 11221 return Address(Addr, Align); 11222 } 11223 11224 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11225 CodeGenModule &CGM, const OMPLoopDirective &S) 11226 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11227 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11228 if (!NeedToPush) 11229 return; 11230 NontemporalDeclsSet &DS = 11231 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11232 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11233 for (const Stmt *Ref : C->private_refs()) { 11234 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11235 const ValueDecl *VD; 11236 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11237 VD = DRE->getDecl(); 11238 } else { 11239 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11240 assert((ME->isImplicitCXXThis() || 11241 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11242 "Expected member of current class."); 11243 VD = ME->getMemberDecl(); 11244 } 11245 DS.insert(VD); 11246 } 11247 } 11248 } 11249 11250 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11251 if (!NeedToPush) 11252 return; 11253 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11254 } 11255 11256 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11257 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11258 11259 return llvm::any_of( 11260 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11261 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11262 } 11263 11264 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11265 const OMPExecutableDirective &S, 11266 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11267 const { 11268 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11269 // Vars in target/task regions must be excluded completely. 11270 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11271 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11272 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11273 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11274 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11275 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11276 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11277 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11278 } 11279 } 11280 // Exclude vars in private clauses. 11281 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11282 for (const Expr *Ref : C->varlists()) { 11283 if (!Ref->getType()->isScalarType()) 11284 continue; 11285 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11286 if (!DRE) 11287 continue; 11288 NeedToCheckForLPCs.insert(DRE->getDecl()); 11289 } 11290 } 11291 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11292 for (const Expr *Ref : C->varlists()) { 11293 if (!Ref->getType()->isScalarType()) 11294 continue; 11295 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11296 if (!DRE) 11297 continue; 11298 NeedToCheckForLPCs.insert(DRE->getDecl()); 11299 } 11300 } 11301 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11302 for (const Expr *Ref : C->varlists()) { 11303 if (!Ref->getType()->isScalarType()) 11304 continue; 11305 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11306 if (!DRE) 11307 continue; 11308 NeedToCheckForLPCs.insert(DRE->getDecl()); 11309 } 11310 } 11311 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11312 for (const Expr *Ref : C->varlists()) { 11313 if (!Ref->getType()->isScalarType()) 11314 continue; 11315 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11316 if (!DRE) 11317 continue; 11318 NeedToCheckForLPCs.insert(DRE->getDecl()); 11319 } 11320 } 11321 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11322 for (const Expr *Ref : C->varlists()) { 11323 if (!Ref->getType()->isScalarType()) 11324 continue; 11325 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11326 if (!DRE) 11327 continue; 11328 NeedToCheckForLPCs.insert(DRE->getDecl()); 11329 } 11330 } 11331 for (const Decl *VD : NeedToCheckForLPCs) { 11332 for (const LastprivateConditionalData &Data : 11333 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11334 if (Data.DeclToUniqueName.count(VD) > 0) { 11335 if (!Data.Disabled) 11336 NeedToAddForLPCsAsDisabled.insert(VD); 11337 break; 11338 } 11339 } 11340 } 11341 } 11342 11343 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11344 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11345 : CGM(CGF.CGM), 11346 Action((CGM.getLangOpts().OpenMP >= 50 && 11347 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11348 [](const OMPLastprivateClause *C) { 11349 return C->getKind() == 11350 OMPC_LASTPRIVATE_conditional; 11351 })) 11352 ? ActionToDo::PushAsLastprivateConditional 11353 : ActionToDo::DoNotPush) { 11354 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11355 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11356 return; 11357 assert(Action == ActionToDo::PushAsLastprivateConditional && 11358 "Expected a push action."); 11359 LastprivateConditionalData &Data = 11360 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11361 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11362 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11363 continue; 11364 11365 for (const Expr *Ref : C->varlists()) { 11366 Data.DeclToUniqueName.insert(std::make_pair( 11367 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11368 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11369 } 11370 } 11371 Data.IVLVal = IVLVal; 11372 Data.Fn = CGF.CurFn; 11373 } 11374 11375 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11376 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11377 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11378 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11379 if (CGM.getLangOpts().OpenMP < 50) 11380 return; 11381 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11382 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11383 if (!NeedToAddForLPCsAsDisabled.empty()) { 11384 Action = ActionToDo::DisableLastprivateConditional; 11385 LastprivateConditionalData &Data = 11386 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11387 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11388 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11389 Data.Fn = CGF.CurFn; 11390 Data.Disabled = true; 11391 } 11392 } 11393 11394 CGOpenMPRuntime::LastprivateConditionalRAII 11395 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11396 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11397 return LastprivateConditionalRAII(CGF, S); 11398 } 11399 11400 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11401 if (CGM.getLangOpts().OpenMP < 50) 11402 return; 11403 if (Action == ActionToDo::DisableLastprivateConditional) { 11404 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11405 "Expected list of disabled private vars."); 11406 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11407 } 11408 if (Action == ActionToDo::PushAsLastprivateConditional) { 11409 assert( 11410 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11411 "Expected list of lastprivate conditional vars."); 11412 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11413 } 11414 } 11415 11416 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11417 const VarDecl *VD) { 11418 ASTContext &C = CGM.getContext(); 11419 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11420 if (I == LastprivateConditionalToTypes.end()) 11421 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11422 QualType NewType; 11423 const FieldDecl *VDField; 11424 const FieldDecl *FiredField; 11425 LValue BaseLVal; 11426 auto VI = I->getSecond().find(VD); 11427 if (VI == I->getSecond().end()) { 11428 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11429 RD->startDefinition(); 11430 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11431 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11432 RD->completeDefinition(); 11433 NewType = C.getRecordType(RD); 11434 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11435 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11436 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11437 } else { 11438 NewType = std::get<0>(VI->getSecond()); 11439 VDField = std::get<1>(VI->getSecond()); 11440 FiredField = std::get<2>(VI->getSecond()); 11441 BaseLVal = std::get<3>(VI->getSecond()); 11442 } 11443 LValue FiredLVal = 11444 CGF.EmitLValueForField(BaseLVal, FiredField); 11445 CGF.EmitStoreOfScalar( 11446 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11447 FiredLVal); 11448 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11449 } 11450 11451 namespace { 11452 /// Checks if the lastprivate conditional variable is referenced in LHS. 11453 class LastprivateConditionalRefChecker final 11454 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11455 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11456 const Expr *FoundE = nullptr; 11457 const Decl *FoundD = nullptr; 11458 StringRef UniqueDeclName; 11459 LValue IVLVal; 11460 llvm::Function *FoundFn = nullptr; 11461 SourceLocation Loc; 11462 11463 public: 11464 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11465 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11466 llvm::reverse(LPM)) { 11467 auto It = D.DeclToUniqueName.find(E->getDecl()); 11468 if (It == D.DeclToUniqueName.end()) 11469 continue; 11470 if (D.Disabled) 11471 return false; 11472 FoundE = E; 11473 FoundD = E->getDecl()->getCanonicalDecl(); 11474 UniqueDeclName = It->second; 11475 IVLVal = D.IVLVal; 11476 FoundFn = D.Fn; 11477 break; 11478 } 11479 return FoundE == E; 11480 } 11481 bool VisitMemberExpr(const MemberExpr *E) { 11482 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11483 return false; 11484 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11485 llvm::reverse(LPM)) { 11486 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11487 if (It == D.DeclToUniqueName.end()) 11488 continue; 11489 if (D.Disabled) 11490 return false; 11491 FoundE = E; 11492 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11493 UniqueDeclName = It->second; 11494 IVLVal = D.IVLVal; 11495 FoundFn = D.Fn; 11496 break; 11497 } 11498 return FoundE == E; 11499 } 11500 bool VisitStmt(const Stmt *S) { 11501 for (const Stmt *Child : S->children()) { 11502 if (!Child) 11503 continue; 11504 if (const auto *E = dyn_cast<Expr>(Child)) 11505 if (!E->isGLValue()) 11506 continue; 11507 if (Visit(Child)) 11508 return true; 11509 } 11510 return false; 11511 } 11512 explicit LastprivateConditionalRefChecker( 11513 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11514 : LPM(LPM) {} 11515 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11516 getFoundData() const { 11517 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11518 } 11519 }; 11520 } // namespace 11521 11522 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11523 LValue IVLVal, 11524 StringRef UniqueDeclName, 11525 LValue LVal, 11526 SourceLocation Loc) { 11527 // Last updated loop counter for the lastprivate conditional var. 11528 // int<xx> last_iv = 0; 11529 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11530 llvm::Constant *LastIV = 11531 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11532 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11533 IVLVal.getAlignment().getAsAlign()); 11534 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11535 11536 // Last value of the lastprivate conditional. 11537 // decltype(priv_a) last_a; 11538 llvm::Constant *Last = getOrCreateInternalVariable( 11539 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11540 cast<llvm::GlobalVariable>(Last)->setAlignment( 11541 LVal.getAlignment().getAsAlign()); 11542 LValue LastLVal = 11543 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11544 11545 // Global loop counter. Required to handle inner parallel-for regions. 11546 // iv 11547 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11548 11549 // #pragma omp critical(a) 11550 // if (last_iv <= iv) { 11551 // last_iv = iv; 11552 // last_a = priv_a; 11553 // } 11554 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11555 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11556 Action.Enter(CGF); 11557 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11558 // (last_iv <= iv) ? Check if the variable is updated and store new 11559 // value in global var. 11560 llvm::Value *CmpRes; 11561 if (IVLVal.getType()->isSignedIntegerType()) { 11562 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11563 } else { 11564 assert(IVLVal.getType()->isUnsignedIntegerType() && 11565 "Loop iteration variable must be integer."); 11566 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11567 } 11568 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11569 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11570 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11571 // { 11572 CGF.EmitBlock(ThenBB); 11573 11574 // last_iv = iv; 11575 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11576 11577 // last_a = priv_a; 11578 switch (CGF.getEvaluationKind(LVal.getType())) { 11579 case TEK_Scalar: { 11580 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11581 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11582 break; 11583 } 11584 case TEK_Complex: { 11585 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11586 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11587 break; 11588 } 11589 case TEK_Aggregate: 11590 llvm_unreachable( 11591 "Aggregates are not supported in lastprivate conditional."); 11592 } 11593 // } 11594 CGF.EmitBranch(ExitBB); 11595 // There is no need to emit line number for unconditional branch. 11596 (void)ApplyDebugLocation::CreateEmpty(CGF); 11597 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11598 }; 11599 11600 if (CGM.getLangOpts().OpenMPSimd) { 11601 // Do not emit as a critical region as no parallel region could be emitted. 11602 RegionCodeGenTy ThenRCG(CodeGen); 11603 ThenRCG(CGF); 11604 } else { 11605 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11606 } 11607 } 11608 11609 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11610 const Expr *LHS) { 11611 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11612 return; 11613 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11614 if (!Checker.Visit(LHS)) 11615 return; 11616 const Expr *FoundE; 11617 const Decl *FoundD; 11618 StringRef UniqueDeclName; 11619 LValue IVLVal; 11620 llvm::Function *FoundFn; 11621 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11622 Checker.getFoundData(); 11623 if (FoundFn != CGF.CurFn) { 11624 // Special codegen for inner parallel regions. 11625 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11626 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11627 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11628 "Lastprivate conditional is not found in outer region."); 11629 QualType StructTy = std::get<0>(It->getSecond()); 11630 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11631 LValue PrivLVal = CGF.EmitLValue(FoundE); 11632 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11633 PrivLVal.getAddress(CGF), 11634 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11635 LValue BaseLVal = 11636 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11637 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11638 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11639 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11640 FiredLVal, llvm::AtomicOrdering::Unordered, 11641 /*IsVolatile=*/true, /*isInit=*/false); 11642 return; 11643 } 11644 11645 // Private address of the lastprivate conditional in the current context. 11646 // priv_a 11647 LValue LVal = CGF.EmitLValue(FoundE); 11648 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11649 FoundE->getExprLoc()); 11650 } 11651 11652 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11653 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11654 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11655 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11656 return; 11657 auto Range = llvm::reverse(LastprivateConditionalStack); 11658 auto It = llvm::find_if( 11659 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11660 if (It == Range.end() || It->Fn != CGF.CurFn) 11661 return; 11662 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11663 assert(LPCI != LastprivateConditionalToTypes.end() && 11664 "Lastprivates must be registered already."); 11665 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11666 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11667 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11668 for (const auto &Pair : It->DeclToUniqueName) { 11669 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11670 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11671 continue; 11672 auto I = LPCI->getSecond().find(Pair.first); 11673 assert(I != LPCI->getSecond().end() && 11674 "Lastprivate must be rehistered already."); 11675 // bool Cmp = priv_a.Fired != 0; 11676 LValue BaseLVal = std::get<3>(I->getSecond()); 11677 LValue FiredLVal = 11678 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11679 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11680 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11681 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11682 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11683 // if (Cmp) { 11684 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11685 CGF.EmitBlock(ThenBB); 11686 Address Addr = CGF.GetAddrOfLocalVar(VD); 11687 LValue LVal; 11688 if (VD->getType()->isReferenceType()) 11689 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11690 AlignmentSource::Decl); 11691 else 11692 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11693 AlignmentSource::Decl); 11694 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11695 D.getBeginLoc()); 11696 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11697 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11698 // } 11699 } 11700 } 11701 11702 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11703 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11704 SourceLocation Loc) { 11705 if (CGF.getLangOpts().OpenMP < 50) 11706 return; 11707 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11708 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11709 "Unknown lastprivate conditional variable."); 11710 StringRef UniqueName = It->second; 11711 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11712 // The variable was not updated in the region - exit. 11713 if (!GV) 11714 return; 11715 LValue LPLVal = CGF.MakeAddrLValue( 11716 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11717 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11718 CGF.EmitStoreOfScalar(Res, PrivLVal); 11719 } 11720 11721 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11722 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11723 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11724 llvm_unreachable("Not supported in SIMD-only mode"); 11725 } 11726 11727 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11728 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11729 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11730 llvm_unreachable("Not supported in SIMD-only mode"); 11731 } 11732 11733 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11734 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11735 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11736 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11737 bool Tied, unsigned &NumberOfParts) { 11738 llvm_unreachable("Not supported in SIMD-only mode"); 11739 } 11740 11741 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11742 SourceLocation Loc, 11743 llvm::Function *OutlinedFn, 11744 ArrayRef<llvm::Value *> CapturedVars, 11745 const Expr *IfCond) { 11746 llvm_unreachable("Not supported in SIMD-only mode"); 11747 } 11748 11749 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11750 CodeGenFunction &CGF, StringRef CriticalName, 11751 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11752 const Expr *Hint) { 11753 llvm_unreachable("Not supported in SIMD-only mode"); 11754 } 11755 11756 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11757 const RegionCodeGenTy &MasterOpGen, 11758 SourceLocation Loc) { 11759 llvm_unreachable("Not supported in SIMD-only mode"); 11760 } 11761 11762 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11763 SourceLocation Loc) { 11764 llvm_unreachable("Not supported in SIMD-only mode"); 11765 } 11766 11767 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11768 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11769 SourceLocation Loc) { 11770 llvm_unreachable("Not supported in SIMD-only mode"); 11771 } 11772 11773 void CGOpenMPSIMDRuntime::emitSingleRegion( 11774 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11775 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11776 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11777 ArrayRef<const Expr *> AssignmentOps) { 11778 llvm_unreachable("Not supported in SIMD-only mode"); 11779 } 11780 11781 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11782 const RegionCodeGenTy &OrderedOpGen, 11783 SourceLocation Loc, 11784 bool IsThreads) { 11785 llvm_unreachable("Not supported in SIMD-only mode"); 11786 } 11787 11788 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11789 SourceLocation Loc, 11790 OpenMPDirectiveKind Kind, 11791 bool EmitChecks, 11792 bool ForceSimpleCall) { 11793 llvm_unreachable("Not supported in SIMD-only mode"); 11794 } 11795 11796 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11797 CodeGenFunction &CGF, SourceLocation Loc, 11798 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11799 bool Ordered, const DispatchRTInput &DispatchValues) { 11800 llvm_unreachable("Not supported in SIMD-only mode"); 11801 } 11802 11803 void CGOpenMPSIMDRuntime::emitForStaticInit( 11804 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11805 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11806 llvm_unreachable("Not supported in SIMD-only mode"); 11807 } 11808 11809 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11810 CodeGenFunction &CGF, SourceLocation Loc, 11811 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11812 llvm_unreachable("Not supported in SIMD-only mode"); 11813 } 11814 11815 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11816 SourceLocation Loc, 11817 unsigned IVSize, 11818 bool IVSigned) { 11819 llvm_unreachable("Not supported in SIMD-only mode"); 11820 } 11821 11822 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11823 SourceLocation Loc, 11824 OpenMPDirectiveKind DKind) { 11825 llvm_unreachable("Not supported in SIMD-only mode"); 11826 } 11827 11828 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11829 SourceLocation Loc, 11830 unsigned IVSize, bool IVSigned, 11831 Address IL, Address LB, 11832 Address UB, Address ST) { 11833 llvm_unreachable("Not supported in SIMD-only mode"); 11834 } 11835 11836 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11837 llvm::Value *NumThreads, 11838 SourceLocation Loc) { 11839 llvm_unreachable("Not supported in SIMD-only mode"); 11840 } 11841 11842 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11843 ProcBindKind ProcBind, 11844 SourceLocation Loc) { 11845 llvm_unreachable("Not supported in SIMD-only mode"); 11846 } 11847 11848 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11849 const VarDecl *VD, 11850 Address VDAddr, 11851 SourceLocation Loc) { 11852 llvm_unreachable("Not supported in SIMD-only mode"); 11853 } 11854 11855 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11856 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11857 CodeGenFunction *CGF) { 11858 llvm_unreachable("Not supported in SIMD-only mode"); 11859 } 11860 11861 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11862 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11863 llvm_unreachable("Not supported in SIMD-only mode"); 11864 } 11865 11866 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11867 ArrayRef<const Expr *> Vars, 11868 SourceLocation Loc, 11869 llvm::AtomicOrdering AO) { 11870 llvm_unreachable("Not supported in SIMD-only mode"); 11871 } 11872 11873 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11874 const OMPExecutableDirective &D, 11875 llvm::Function *TaskFunction, 11876 QualType SharedsTy, Address Shareds, 11877 const Expr *IfCond, 11878 const OMPTaskDataTy &Data) { 11879 llvm_unreachable("Not supported in SIMD-only mode"); 11880 } 11881 11882 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11883 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11884 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11885 const Expr *IfCond, const OMPTaskDataTy &Data) { 11886 llvm_unreachable("Not supported in SIMD-only mode"); 11887 } 11888 11889 void CGOpenMPSIMDRuntime::emitReduction( 11890 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11891 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11892 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11893 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11894 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11895 ReductionOps, Options); 11896 } 11897 11898 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11899 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11900 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11901 llvm_unreachable("Not supported in SIMD-only mode"); 11902 } 11903 11904 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 11905 SourceLocation Loc, 11906 bool IsWorksharingReduction) { 11907 llvm_unreachable("Not supported in SIMD-only mode"); 11908 } 11909 11910 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11911 SourceLocation Loc, 11912 ReductionCodeGen &RCG, 11913 unsigned N) { 11914 llvm_unreachable("Not supported in SIMD-only mode"); 11915 } 11916 11917 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11918 SourceLocation Loc, 11919 llvm::Value *ReductionsPtr, 11920 LValue SharedLVal) { 11921 llvm_unreachable("Not supported in SIMD-only mode"); 11922 } 11923 11924 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11925 SourceLocation Loc) { 11926 llvm_unreachable("Not supported in SIMD-only mode"); 11927 } 11928 11929 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11930 CodeGenFunction &CGF, SourceLocation Loc, 11931 OpenMPDirectiveKind CancelRegion) { 11932 llvm_unreachable("Not supported in SIMD-only mode"); 11933 } 11934 11935 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11936 SourceLocation Loc, const Expr *IfCond, 11937 OpenMPDirectiveKind CancelRegion) { 11938 llvm_unreachable("Not supported in SIMD-only mode"); 11939 } 11940 11941 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11942 const OMPExecutableDirective &D, StringRef ParentName, 11943 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11944 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11945 llvm_unreachable("Not supported in SIMD-only mode"); 11946 } 11947 11948 void CGOpenMPSIMDRuntime::emitTargetCall( 11949 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11950 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11951 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 11952 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11953 const OMPLoopDirective &D)> 11954 SizeEmitter) { 11955 llvm_unreachable("Not supported in SIMD-only mode"); 11956 } 11957 11958 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11959 llvm_unreachable("Not supported in SIMD-only mode"); 11960 } 11961 11962 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11963 llvm_unreachable("Not supported in SIMD-only mode"); 11964 } 11965 11966 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11967 return false; 11968 } 11969 11970 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11971 const OMPExecutableDirective &D, 11972 SourceLocation Loc, 11973 llvm::Function *OutlinedFn, 11974 ArrayRef<llvm::Value *> CapturedVars) { 11975 llvm_unreachable("Not supported in SIMD-only mode"); 11976 } 11977 11978 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11979 const Expr *NumTeams, 11980 const Expr *ThreadLimit, 11981 SourceLocation Loc) { 11982 llvm_unreachable("Not supported in SIMD-only mode"); 11983 } 11984 11985 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11986 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11987 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11988 llvm_unreachable("Not supported in SIMD-only mode"); 11989 } 11990 11991 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11992 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11993 const Expr *Device) { 11994 llvm_unreachable("Not supported in SIMD-only mode"); 11995 } 11996 11997 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11998 const OMPLoopDirective &D, 11999 ArrayRef<Expr *> NumIterations) { 12000 llvm_unreachable("Not supported in SIMD-only mode"); 12001 } 12002 12003 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12004 const OMPDependClause *C) { 12005 llvm_unreachable("Not supported in SIMD-only mode"); 12006 } 12007 12008 const VarDecl * 12009 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12010 const VarDecl *NativeParam) const { 12011 llvm_unreachable("Not supported in SIMD-only mode"); 12012 } 12013 12014 Address 12015 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12016 const VarDecl *NativeParam, 12017 const VarDecl *TargetParam) const { 12018 llvm_unreachable("Not supported in SIMD-only mode"); 12019 } 12020