1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1530 } 1531 1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1533 return OMPBuilder.IdentPtr; 1534 } 1535 1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1537 if (!Kmpc_MicroTy) { 1538 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1539 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1540 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1541 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1542 } 1543 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1544 } 1545 1546 llvm::FunctionCallee 1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1548 assert((IVSize == 32 || IVSize == 64) && 1549 "IV size is not compatible with the omp runtime"); 1550 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1551 : "__kmpc_for_static_init_4u") 1552 : (IVSigned ? "__kmpc_for_static_init_8" 1553 : "__kmpc_for_static_init_8u"); 1554 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1555 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1556 llvm::Type *TypeParams[] = { 1557 getIdentTyPointerTy(), // loc 1558 CGM.Int32Ty, // tid 1559 CGM.Int32Ty, // schedtype 1560 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1561 PtrTy, // p_lower 1562 PtrTy, // p_upper 1563 PtrTy, // p_stride 1564 ITy, // incr 1565 ITy // chunk 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1579 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1582 CGM.Int32Ty, // tid 1583 CGM.Int32Ty, // schedtype 1584 ITy, // lower 1585 ITy, // upper 1586 ITy, // stride 1587 ITy // chunk 1588 }; 1589 auto *FnTy = 1590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1591 return CGM.CreateRuntimeFunction(FnTy, Name); 1592 } 1593 1594 llvm::FunctionCallee 1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1596 assert((IVSize == 32 || IVSize == 64) && 1597 "IV size is not compatible with the omp runtime"); 1598 StringRef Name = 1599 IVSize == 32 1600 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1601 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1602 llvm::Type *TypeParams[] = { 1603 getIdentTyPointerTy(), // loc 1604 CGM.Int32Ty, // tid 1605 }; 1606 auto *FnTy = 1607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1608 return CGM.CreateRuntimeFunction(FnTy, Name); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = 1616 IVSize == 32 1617 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1618 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1621 llvm::Type *TypeParams[] = { 1622 getIdentTyPointerTy(), // loc 1623 CGM.Int32Ty, // tid 1624 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1625 PtrTy, // p_lower 1626 PtrTy, // p_upper 1627 PtrTy // p_stride 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 /// Obtain information that uniquely identifies a target entry. This 1635 /// consists of the file and device IDs as well as line number associated with 1636 /// the relevant entry source location. 1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1638 unsigned &DeviceID, unsigned &FileID, 1639 unsigned &LineNum) { 1640 SourceManager &SM = C.getSourceManager(); 1641 1642 // The loc should be always valid and have a file ID (the user cannot use 1643 // #pragma directives in macros) 1644 1645 assert(Loc.isValid() && "Source location is expected to be always valid."); 1646 1647 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1648 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1649 1650 llvm::sys::fs::UniqueID ID; 1651 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1652 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1653 << PLoc.getFilename() << EC.message(); 1654 1655 DeviceID = ID.getDevice(); 1656 FileID = ID.getFile(); 1657 LineNum = PLoc.getLine(); 1658 } 1659 1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1661 if (CGM.getLangOpts().OpenMPSimd) 1662 return Address::invalid(); 1663 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1664 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1665 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1666 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1667 HasRequiresUnifiedSharedMemory))) { 1668 SmallString<64> PtrName; 1669 { 1670 llvm::raw_svector_ostream OS(PtrName); 1671 OS << CGM.getMangledName(GlobalDecl(VD)); 1672 if (!VD->isExternallyVisible()) { 1673 unsigned DeviceID, FileID, Line; 1674 getTargetEntryUniqueInfo(CGM.getContext(), 1675 VD->getCanonicalDecl()->getBeginLoc(), 1676 DeviceID, FileID, Line); 1677 OS << llvm::format("_%x", FileID); 1678 } 1679 OS << "_decl_tgt_ref_ptr"; 1680 } 1681 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1682 if (!Ptr) { 1683 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1684 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1685 PtrName); 1686 1687 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1688 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1689 1690 if (!CGM.getLangOpts().OpenMPIsDevice) 1691 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1692 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1693 } 1694 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1695 } 1696 return Address::invalid(); 1697 } 1698 1699 llvm::Constant * 1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1701 assert(!CGM.getLangOpts().OpenMPUseTLS || 1702 !CGM.getContext().getTargetInfo().isTLSSupported()); 1703 // Lookup the entry, lazily creating it if necessary. 1704 std::string Suffix = getName({"cache", ""}); 1705 return getOrCreateInternalVariable( 1706 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1707 } 1708 1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1710 const VarDecl *VD, 1711 Address VDAddr, 1712 SourceLocation Loc) { 1713 if (CGM.getLangOpts().OpenMPUseTLS && 1714 CGM.getContext().getTargetInfo().isTLSSupported()) 1715 return VDAddr; 1716 1717 llvm::Type *VarTy = VDAddr.getElementType(); 1718 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1719 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1720 CGM.Int8PtrTy), 1721 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1722 getOrCreateThreadPrivateCache(VD)}; 1723 return Address(CGF.EmitRuntimeCall( 1724 OMPBuilder.getOrCreateRuntimeFunction( 1725 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1726 Args), 1727 VDAddr.getAlignment()); 1728 } 1729 1730 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1731 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1732 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1733 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1734 // library. 1735 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1736 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1737 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1738 OMPLoc); 1739 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1740 // to register constructor/destructor for variable. 1741 llvm::Value *Args[] = { 1742 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1743 Ctor, CopyCtor, Dtor}; 1744 CGF.EmitRuntimeCall( 1745 OMPBuilder.getOrCreateRuntimeFunction( 1746 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1747 Args); 1748 } 1749 1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1751 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1752 bool PerformInit, CodeGenFunction *CGF) { 1753 if (CGM.getLangOpts().OpenMPUseTLS && 1754 CGM.getContext().getTargetInfo().isTLSSupported()) 1755 return nullptr; 1756 1757 VD = VD->getDefinition(CGM.getContext()); 1758 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1759 QualType ASTTy = VD->getType(); 1760 1761 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1762 const Expr *Init = VD->getAnyInitializer(); 1763 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1764 // Generate function that re-emits the declaration's initializer into the 1765 // threadprivate copy of the variable VD 1766 CodeGenFunction CtorCGF(CGM); 1767 FunctionArgList Args; 1768 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1769 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1770 ImplicitParamDecl::Other); 1771 Args.push_back(&Dst); 1772 1773 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1774 CGM.getContext().VoidPtrTy, Args); 1775 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1776 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1777 llvm::Function *Fn = 1778 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1779 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1780 Args, Loc, Loc); 1781 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1782 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1783 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1784 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1785 Arg = CtorCGF.Builder.CreateElementBitCast( 1786 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1787 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1788 /*IsInitializer=*/true); 1789 ArgVal = CtorCGF.EmitLoadOfScalar( 1790 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1791 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1792 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1793 CtorCGF.FinishFunction(); 1794 Ctor = Fn; 1795 } 1796 if (VD->getType().isDestructedType() != QualType::DK_none) { 1797 // Generate function that emits destructor call for the threadprivate copy 1798 // of the variable VD 1799 CodeGenFunction DtorCGF(CGM); 1800 FunctionArgList Args; 1801 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1802 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1803 ImplicitParamDecl::Other); 1804 Args.push_back(&Dst); 1805 1806 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1807 CGM.getContext().VoidTy, Args); 1808 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1809 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1810 llvm::Function *Fn = 1811 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1812 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1813 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1814 Loc, Loc); 1815 // Create a scope with an artificial location for the body of this function. 1816 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1817 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1818 DtorCGF.GetAddrOfLocalVar(&Dst), 1819 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1820 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1821 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1822 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1823 DtorCGF.FinishFunction(); 1824 Dtor = Fn; 1825 } 1826 // Do not emit init function if it is not required. 1827 if (!Ctor && !Dtor) 1828 return nullptr; 1829 1830 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1831 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1832 /*isVarArg=*/false) 1833 ->getPointerTo(); 1834 // Copying constructor for the threadprivate variable. 1835 // Must be NULL - reserved by runtime, but currently it requires that this 1836 // parameter is always NULL. Otherwise it fires assertion. 1837 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1838 if (Ctor == nullptr) { 1839 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1840 /*isVarArg=*/false) 1841 ->getPointerTo(); 1842 Ctor = llvm::Constant::getNullValue(CtorTy); 1843 } 1844 if (Dtor == nullptr) { 1845 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1846 /*isVarArg=*/false) 1847 ->getPointerTo(); 1848 Dtor = llvm::Constant::getNullValue(DtorTy); 1849 } 1850 if (!CGF) { 1851 auto *InitFunctionTy = 1852 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1853 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1854 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1855 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1856 CodeGenFunction InitCGF(CGM); 1857 FunctionArgList ArgList; 1858 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1859 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1860 Loc, Loc); 1861 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1862 InitCGF.FinishFunction(); 1863 return InitFunction; 1864 } 1865 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1866 } 1867 return nullptr; 1868 } 1869 1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1871 llvm::GlobalVariable *Addr, 1872 bool PerformInit) { 1873 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1874 !CGM.getLangOpts().OpenMPIsDevice) 1875 return false; 1876 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1877 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1878 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1879 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1880 HasRequiresUnifiedSharedMemory)) 1881 return CGM.getLangOpts().OpenMPIsDevice; 1882 VD = VD->getDefinition(CGM.getContext()); 1883 assert(VD && "Unknown VarDecl"); 1884 1885 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1886 return CGM.getLangOpts().OpenMPIsDevice; 1887 1888 QualType ASTTy = VD->getType(); 1889 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1890 1891 // Produce the unique prefix to identify the new target regions. We use 1892 // the source location of the variable declaration which we know to not 1893 // conflict with any target region. 1894 unsigned DeviceID; 1895 unsigned FileID; 1896 unsigned Line; 1897 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1898 SmallString<128> Buffer, Out; 1899 { 1900 llvm::raw_svector_ostream OS(Buffer); 1901 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1902 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1903 } 1904 1905 const Expr *Init = VD->getAnyInitializer(); 1906 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1907 llvm::Constant *Ctor; 1908 llvm::Constant *ID; 1909 if (CGM.getLangOpts().OpenMPIsDevice) { 1910 // Generate function that re-emits the declaration's initializer into 1911 // the threadprivate copy of the variable VD 1912 CodeGenFunction CtorCGF(CGM); 1913 1914 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1915 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1916 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1917 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1918 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1919 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1920 FunctionArgList(), Loc, Loc); 1921 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1922 CtorCGF.EmitAnyExprToMem(Init, 1923 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1924 Init->getType().getQualifiers(), 1925 /*IsInitializer=*/true); 1926 CtorCGF.FinishFunction(); 1927 Ctor = Fn; 1928 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1929 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1930 } else { 1931 Ctor = new llvm::GlobalVariable( 1932 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1933 llvm::GlobalValue::PrivateLinkage, 1934 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1935 ID = Ctor; 1936 } 1937 1938 // Register the information for the entry associated with the constructor. 1939 Out.clear(); 1940 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1941 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1942 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1943 } 1944 if (VD->getType().isDestructedType() != QualType::DK_none) { 1945 llvm::Constant *Dtor; 1946 llvm::Constant *ID; 1947 if (CGM.getLangOpts().OpenMPIsDevice) { 1948 // Generate function that emits destructor call for the threadprivate 1949 // copy of the variable VD 1950 CodeGenFunction DtorCGF(CGM); 1951 1952 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1953 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1954 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1955 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1956 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1957 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1958 FunctionArgList(), Loc, Loc); 1959 // Create a scope with an artificial location for the body of this 1960 // function. 1961 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1962 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1963 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1964 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1965 DtorCGF.FinishFunction(); 1966 Dtor = Fn; 1967 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1968 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1969 } else { 1970 Dtor = new llvm::GlobalVariable( 1971 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1972 llvm::GlobalValue::PrivateLinkage, 1973 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1974 ID = Dtor; 1975 } 1976 // Register the information for the entry associated with the destructor. 1977 Out.clear(); 1978 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1979 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1980 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1981 } 1982 return CGM.getLangOpts().OpenMPIsDevice; 1983 } 1984 1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1986 QualType VarType, 1987 StringRef Name) { 1988 std::string Suffix = getName({"artificial", ""}); 1989 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1990 llvm::Value *GAddr = 1991 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1992 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1993 CGM.getTarget().isTLSSupported()) { 1994 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1995 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1996 } 1997 std::string CacheSuffix = getName({"cache", ""}); 1998 llvm::Value *Args[] = { 1999 emitUpdateLocation(CGF, SourceLocation()), 2000 getThreadID(CGF, SourceLocation()), 2001 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2002 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2003 /*isSigned=*/false), 2004 getOrCreateInternalVariable( 2005 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2006 return Address( 2007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2008 CGF.EmitRuntimeCall( 2009 OMPBuilder.getOrCreateRuntimeFunction( 2010 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2011 Args), 2012 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2013 CGM.getContext().getTypeAlignInChars(VarType)); 2014 } 2015 2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2017 const RegionCodeGenTy &ThenGen, 2018 const RegionCodeGenTy &ElseGen) { 2019 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2020 2021 // If the condition constant folds and can be elided, try to avoid emitting 2022 // the condition and the dead arm of the if/else. 2023 bool CondConstant; 2024 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2025 if (CondConstant) 2026 ThenGen(CGF); 2027 else 2028 ElseGen(CGF); 2029 return; 2030 } 2031 2032 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2033 // emit the conditional branch. 2034 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2035 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2036 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2037 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2038 2039 // Emit the 'then' code. 2040 CGF.EmitBlock(ThenBlock); 2041 ThenGen(CGF); 2042 CGF.EmitBranch(ContBlock); 2043 // Emit the 'else' code if present. 2044 // There is no need to emit line number for unconditional branch. 2045 (void)ApplyDebugLocation::CreateEmpty(CGF); 2046 CGF.EmitBlock(ElseBlock); 2047 ElseGen(CGF); 2048 // There is no need to emit line number for unconditional branch. 2049 (void)ApplyDebugLocation::CreateEmpty(CGF); 2050 CGF.EmitBranch(ContBlock); 2051 // Emit the continuation block for code after the if. 2052 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2053 } 2054 2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2056 llvm::Function *OutlinedFn, 2057 ArrayRef<llvm::Value *> CapturedVars, 2058 const Expr *IfCond) { 2059 if (!CGF.HaveInsertPoint()) 2060 return; 2061 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2062 auto &M = CGM.getModule(); 2063 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2064 this](CodeGenFunction &CGF, PrePostActionTy &) { 2065 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *Args[] = { 2068 RTLoc, 2069 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2070 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2071 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2072 RealArgs.append(std::begin(Args), std::end(Args)); 2073 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2074 2075 llvm::FunctionCallee RTLFn = 2076 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2077 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2078 }; 2079 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2082 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2083 // Build calls: 2084 // __kmpc_serialized_parallel(&Loc, GTid); 2085 llvm::Value *Args[] = {RTLoc, ThreadID}; 2086 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2087 M, OMPRTL___kmpc_serialized_parallel), 2088 Args); 2089 2090 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2091 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2092 Address ZeroAddrBound = 2093 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2094 /*Name=*/".bound.zero.addr"); 2095 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2096 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2097 // ThreadId for serialized parallels is 0. 2098 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2099 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2100 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2101 2102 // Ensure we do not inline the function. This is trivially true for the ones 2103 // passed to __kmpc_fork_call but the ones calles in serialized regions 2104 // could be inlined. This is not a perfect but it is closer to the invariant 2105 // we want, namely, every data environment starts with a new function. 2106 // TODO: We should pass the if condition to the runtime function and do the 2107 // handling there. Much cleaner code. 2108 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2109 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2110 2111 // __kmpc_end_serialized_parallel(&Loc, GTid); 2112 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2113 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2114 M, OMPRTL___kmpc_end_serialized_parallel), 2115 EndArgs); 2116 }; 2117 if (IfCond) { 2118 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2119 } else { 2120 RegionCodeGenTy ThenRCG(ThenGen); 2121 ThenRCG(CGF); 2122 } 2123 } 2124 2125 // If we're inside an (outlined) parallel region, use the region info's 2126 // thread-ID variable (it is passed in a first argument of the outlined function 2127 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2128 // regular serial code region, get thread ID by calling kmp_int32 2129 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2130 // return the address of that temp. 2131 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2132 SourceLocation Loc) { 2133 if (auto *OMPRegionInfo = 2134 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2135 if (OMPRegionInfo->getThreadIDVariable()) 2136 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2137 2138 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2139 QualType Int32Ty = 2140 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2141 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2142 CGF.EmitStoreOfScalar(ThreadID, 2143 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2144 2145 return ThreadIDTemp; 2146 } 2147 2148 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2149 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2150 SmallString<256> Buffer; 2151 llvm::raw_svector_ostream Out(Buffer); 2152 Out << Name; 2153 StringRef RuntimeName = Out.str(); 2154 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2155 if (Elem.second) { 2156 assert(Elem.second->getType()->getPointerElementType() == Ty && 2157 "OMP internal variable has different type than requested"); 2158 return &*Elem.second; 2159 } 2160 2161 return Elem.second = new llvm::GlobalVariable( 2162 CGM.getModule(), Ty, /*IsConstant*/ false, 2163 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2164 Elem.first(), /*InsertBefore=*/nullptr, 2165 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2166 } 2167 2168 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2169 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2170 std::string Name = getName({Prefix, "var"}); 2171 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2172 } 2173 2174 namespace { 2175 /// Common pre(post)-action for different OpenMP constructs. 2176 class CommonActionTy final : public PrePostActionTy { 2177 llvm::FunctionCallee EnterCallee; 2178 ArrayRef<llvm::Value *> EnterArgs; 2179 llvm::FunctionCallee ExitCallee; 2180 ArrayRef<llvm::Value *> ExitArgs; 2181 bool Conditional; 2182 llvm::BasicBlock *ContBlock = nullptr; 2183 2184 public: 2185 CommonActionTy(llvm::FunctionCallee EnterCallee, 2186 ArrayRef<llvm::Value *> EnterArgs, 2187 llvm::FunctionCallee ExitCallee, 2188 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2189 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2190 ExitArgs(ExitArgs), Conditional(Conditional) {} 2191 void Enter(CodeGenFunction &CGF) override { 2192 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2193 if (Conditional) { 2194 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2195 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2196 ContBlock = CGF.createBasicBlock("omp_if.end"); 2197 // Generate the branch (If-stmt) 2198 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2199 CGF.EmitBlock(ThenBlock); 2200 } 2201 } 2202 void Done(CodeGenFunction &CGF) { 2203 // Emit the rest of blocks/branches 2204 CGF.EmitBranch(ContBlock); 2205 CGF.EmitBlock(ContBlock, true); 2206 } 2207 void Exit(CodeGenFunction &CGF) override { 2208 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2209 } 2210 }; 2211 } // anonymous namespace 2212 2213 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2214 StringRef CriticalName, 2215 const RegionCodeGenTy &CriticalOpGen, 2216 SourceLocation Loc, const Expr *Hint) { 2217 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2218 // CriticalOpGen(); 2219 // __kmpc_end_critical(ident_t *, gtid, Lock); 2220 // Prepare arguments and build a call to __kmpc_critical 2221 if (!CGF.HaveInsertPoint()) 2222 return; 2223 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2224 getCriticalRegionLock(CriticalName)}; 2225 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2226 std::end(Args)); 2227 if (Hint) { 2228 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2229 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2230 } 2231 CommonActionTy Action( 2232 OMPBuilder.getOrCreateRuntimeFunction( 2233 CGM.getModule(), 2234 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2235 EnterArgs, 2236 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2237 OMPRTL___kmpc_end_critical), 2238 Args); 2239 CriticalOpGen.setAction(Action); 2240 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2241 } 2242 2243 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2244 const RegionCodeGenTy &MasterOpGen, 2245 SourceLocation Loc) { 2246 if (!CGF.HaveInsertPoint()) 2247 return; 2248 // if(__kmpc_master(ident_t *, gtid)) { 2249 // MasterOpGen(); 2250 // __kmpc_end_master(ident_t *, gtid); 2251 // } 2252 // Prepare arguments and build a call to __kmpc_master 2253 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2254 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2255 CGM.getModule(), OMPRTL___kmpc_master), 2256 Args, 2257 OMPBuilder.getOrCreateRuntimeFunction( 2258 CGM.getModule(), OMPRTL___kmpc_end_master), 2259 Args, 2260 /*Conditional=*/true); 2261 MasterOpGen.setAction(Action); 2262 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2263 Action.Done(CGF); 2264 } 2265 2266 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2267 SourceLocation Loc) { 2268 if (!CGF.HaveInsertPoint()) 2269 return; 2270 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2271 OMPBuilder.createTaskyield(CGF.Builder); 2272 } else { 2273 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2274 llvm::Value *Args[] = { 2275 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2276 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2277 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2278 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2279 Args); 2280 } 2281 2282 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2283 Region->emitUntiedSwitch(CGF); 2284 } 2285 2286 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2287 const RegionCodeGenTy &TaskgroupOpGen, 2288 SourceLocation Loc) { 2289 if (!CGF.HaveInsertPoint()) 2290 return; 2291 // __kmpc_taskgroup(ident_t *, gtid); 2292 // TaskgroupOpGen(); 2293 // __kmpc_end_taskgroup(ident_t *, gtid); 2294 // Prepare arguments and build a call to __kmpc_taskgroup 2295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2296 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2297 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2298 Args, 2299 OMPBuilder.getOrCreateRuntimeFunction( 2300 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2301 Args); 2302 TaskgroupOpGen.setAction(Action); 2303 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2304 } 2305 2306 /// Given an array of pointers to variables, project the address of a 2307 /// given variable. 2308 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2309 unsigned Index, const VarDecl *Var) { 2310 // Pull out the pointer to the variable. 2311 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2312 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2313 2314 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2315 Addr = CGF.Builder.CreateElementBitCast( 2316 Addr, CGF.ConvertTypeForMem(Var->getType())); 2317 return Addr; 2318 } 2319 2320 static llvm::Value *emitCopyprivateCopyFunction( 2321 CodeGenModule &CGM, llvm::Type *ArgsType, 2322 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2323 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2324 SourceLocation Loc) { 2325 ASTContext &C = CGM.getContext(); 2326 // void copy_func(void *LHSArg, void *RHSArg); 2327 FunctionArgList Args; 2328 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2329 ImplicitParamDecl::Other); 2330 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2331 ImplicitParamDecl::Other); 2332 Args.push_back(&LHSArg); 2333 Args.push_back(&RHSArg); 2334 const auto &CGFI = 2335 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2336 std::string Name = 2337 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2338 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2339 llvm::GlobalValue::InternalLinkage, Name, 2340 &CGM.getModule()); 2341 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2342 Fn->setDoesNotRecurse(); 2343 CodeGenFunction CGF(CGM); 2344 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2345 // Dest = (void*[n])(LHSArg); 2346 // Src = (void*[n])(RHSArg); 2347 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2348 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2349 ArgsType), CGF.getPointerAlign()); 2350 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2351 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2352 ArgsType), CGF.getPointerAlign()); 2353 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2354 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2355 // ... 2356 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2357 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2358 const auto *DestVar = 2359 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2360 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2361 2362 const auto *SrcVar = 2363 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2364 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2365 2366 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2367 QualType Type = VD->getType(); 2368 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2369 } 2370 CGF.FinishFunction(); 2371 return Fn; 2372 } 2373 2374 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2375 const RegionCodeGenTy &SingleOpGen, 2376 SourceLocation Loc, 2377 ArrayRef<const Expr *> CopyprivateVars, 2378 ArrayRef<const Expr *> SrcExprs, 2379 ArrayRef<const Expr *> DstExprs, 2380 ArrayRef<const Expr *> AssignmentOps) { 2381 if (!CGF.HaveInsertPoint()) 2382 return; 2383 assert(CopyprivateVars.size() == SrcExprs.size() && 2384 CopyprivateVars.size() == DstExprs.size() && 2385 CopyprivateVars.size() == AssignmentOps.size()); 2386 ASTContext &C = CGM.getContext(); 2387 // int32 did_it = 0; 2388 // if(__kmpc_single(ident_t *, gtid)) { 2389 // SingleOpGen(); 2390 // __kmpc_end_single(ident_t *, gtid); 2391 // did_it = 1; 2392 // } 2393 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2394 // <copy_func>, did_it); 2395 2396 Address DidIt = Address::invalid(); 2397 if (!CopyprivateVars.empty()) { 2398 // int32 did_it = 0; 2399 QualType KmpInt32Ty = 2400 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2401 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2402 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2403 } 2404 // Prepare arguments and build a call to __kmpc_single 2405 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2406 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2407 CGM.getModule(), OMPRTL___kmpc_single), 2408 Args, 2409 OMPBuilder.getOrCreateRuntimeFunction( 2410 CGM.getModule(), OMPRTL___kmpc_end_single), 2411 Args, 2412 /*Conditional=*/true); 2413 SingleOpGen.setAction(Action); 2414 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2415 if (DidIt.isValid()) { 2416 // did_it = 1; 2417 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2418 } 2419 Action.Done(CGF); 2420 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2421 // <copy_func>, did_it); 2422 if (DidIt.isValid()) { 2423 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2424 QualType CopyprivateArrayTy = C.getConstantArrayType( 2425 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2426 /*IndexTypeQuals=*/0); 2427 // Create a list of all private variables for copyprivate. 2428 Address CopyprivateList = 2429 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2430 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2431 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2432 CGF.Builder.CreateStore( 2433 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2434 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2435 CGF.VoidPtrTy), 2436 Elem); 2437 } 2438 // Build function that copies private values from single region to all other 2439 // threads in the corresponding parallel region. 2440 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2441 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2442 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2443 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2444 Address CL = 2445 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2446 CGF.VoidPtrTy); 2447 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2448 llvm::Value *Args[] = { 2449 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2450 getThreadID(CGF, Loc), // i32 <gtid> 2451 BufSize, // size_t <buf_size> 2452 CL.getPointer(), // void *<copyprivate list> 2453 CpyFn, // void (*) (void *, void *) <copy_func> 2454 DidItVal // i32 did_it 2455 }; 2456 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2457 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2458 Args); 2459 } 2460 } 2461 2462 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2463 const RegionCodeGenTy &OrderedOpGen, 2464 SourceLocation Loc, bool IsThreads) { 2465 if (!CGF.HaveInsertPoint()) 2466 return; 2467 // __kmpc_ordered(ident_t *, gtid); 2468 // OrderedOpGen(); 2469 // __kmpc_end_ordered(ident_t *, gtid); 2470 // Prepare arguments and build a call to __kmpc_ordered 2471 if (IsThreads) { 2472 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2473 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2474 CGM.getModule(), OMPRTL___kmpc_ordered), 2475 Args, 2476 OMPBuilder.getOrCreateRuntimeFunction( 2477 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2478 Args); 2479 OrderedOpGen.setAction(Action); 2480 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2481 return; 2482 } 2483 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2484 } 2485 2486 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2487 unsigned Flags; 2488 if (Kind == OMPD_for) 2489 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2490 else if (Kind == OMPD_sections) 2491 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2492 else if (Kind == OMPD_single) 2493 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2494 else if (Kind == OMPD_barrier) 2495 Flags = OMP_IDENT_BARRIER_EXPL; 2496 else 2497 Flags = OMP_IDENT_BARRIER_IMPL; 2498 return Flags; 2499 } 2500 2501 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2502 CodeGenFunction &CGF, const OMPLoopDirective &S, 2503 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2504 // Check if the loop directive is actually a doacross loop directive. In this 2505 // case choose static, 1 schedule. 2506 if (llvm::any_of( 2507 S.getClausesOfKind<OMPOrderedClause>(), 2508 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2509 ScheduleKind = OMPC_SCHEDULE_static; 2510 // Chunk size is 1 in this case. 2511 llvm::APInt ChunkSize(32, 1); 2512 ChunkExpr = IntegerLiteral::Create( 2513 CGF.getContext(), ChunkSize, 2514 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2515 SourceLocation()); 2516 } 2517 } 2518 2519 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2520 OpenMPDirectiveKind Kind, bool EmitChecks, 2521 bool ForceSimpleCall) { 2522 // Check if we should use the OMPBuilder 2523 auto *OMPRegionInfo = 2524 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2525 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2526 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2527 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2528 return; 2529 } 2530 2531 if (!CGF.HaveInsertPoint()) 2532 return; 2533 // Build call __kmpc_cancel_barrier(loc, thread_id); 2534 // Build call __kmpc_barrier(loc, thread_id); 2535 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2536 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2537 // thread_id); 2538 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2539 getThreadID(CGF, Loc)}; 2540 if (OMPRegionInfo) { 2541 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2542 llvm::Value *Result = CGF.EmitRuntimeCall( 2543 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2544 OMPRTL___kmpc_cancel_barrier), 2545 Args); 2546 if (EmitChecks) { 2547 // if (__kmpc_cancel_barrier()) { 2548 // exit from construct; 2549 // } 2550 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2551 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2552 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2553 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2554 CGF.EmitBlock(ExitBB); 2555 // exit from construct; 2556 CodeGenFunction::JumpDest CancelDestination = 2557 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2558 CGF.EmitBranchThroughCleanup(CancelDestination); 2559 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2560 } 2561 return; 2562 } 2563 } 2564 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2565 CGM.getModule(), OMPRTL___kmpc_barrier), 2566 Args); 2567 } 2568 2569 /// Map the OpenMP loop schedule to the runtime enumeration. 2570 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2571 bool Chunked, bool Ordered) { 2572 switch (ScheduleKind) { 2573 case OMPC_SCHEDULE_static: 2574 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2575 : (Ordered ? OMP_ord_static : OMP_sch_static); 2576 case OMPC_SCHEDULE_dynamic: 2577 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2578 case OMPC_SCHEDULE_guided: 2579 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2580 case OMPC_SCHEDULE_runtime: 2581 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2582 case OMPC_SCHEDULE_auto: 2583 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2584 case OMPC_SCHEDULE_unknown: 2585 assert(!Chunked && "chunk was specified but schedule kind not known"); 2586 return Ordered ? OMP_ord_static : OMP_sch_static; 2587 } 2588 llvm_unreachable("Unexpected runtime schedule"); 2589 } 2590 2591 /// Map the OpenMP distribute schedule to the runtime enumeration. 2592 static OpenMPSchedType 2593 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2594 // only static is allowed for dist_schedule 2595 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2596 } 2597 2598 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2599 bool Chunked) const { 2600 OpenMPSchedType Schedule = 2601 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2602 return Schedule == OMP_sch_static; 2603 } 2604 2605 bool CGOpenMPRuntime::isStaticNonchunked( 2606 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2607 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2608 return Schedule == OMP_dist_sch_static; 2609 } 2610 2611 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2612 bool Chunked) const { 2613 OpenMPSchedType Schedule = 2614 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2615 return Schedule == OMP_sch_static_chunked; 2616 } 2617 2618 bool CGOpenMPRuntime::isStaticChunked( 2619 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2620 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2621 return Schedule == OMP_dist_sch_static_chunked; 2622 } 2623 2624 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2625 OpenMPSchedType Schedule = 2626 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2627 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2628 return Schedule != OMP_sch_static; 2629 } 2630 2631 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2632 OpenMPScheduleClauseModifier M1, 2633 OpenMPScheduleClauseModifier M2) { 2634 int Modifier = 0; 2635 switch (M1) { 2636 case OMPC_SCHEDULE_MODIFIER_monotonic: 2637 Modifier = OMP_sch_modifier_monotonic; 2638 break; 2639 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2640 Modifier = OMP_sch_modifier_nonmonotonic; 2641 break; 2642 case OMPC_SCHEDULE_MODIFIER_simd: 2643 if (Schedule == OMP_sch_static_chunked) 2644 Schedule = OMP_sch_static_balanced_chunked; 2645 break; 2646 case OMPC_SCHEDULE_MODIFIER_last: 2647 case OMPC_SCHEDULE_MODIFIER_unknown: 2648 break; 2649 } 2650 switch (M2) { 2651 case OMPC_SCHEDULE_MODIFIER_monotonic: 2652 Modifier = OMP_sch_modifier_monotonic; 2653 break; 2654 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2655 Modifier = OMP_sch_modifier_nonmonotonic; 2656 break; 2657 case OMPC_SCHEDULE_MODIFIER_simd: 2658 if (Schedule == OMP_sch_static_chunked) 2659 Schedule = OMP_sch_static_balanced_chunked; 2660 break; 2661 case OMPC_SCHEDULE_MODIFIER_last: 2662 case OMPC_SCHEDULE_MODIFIER_unknown: 2663 break; 2664 } 2665 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2666 // If the static schedule kind is specified or if the ordered clause is 2667 // specified, and if the nonmonotonic modifier is not specified, the effect is 2668 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2669 // modifier is specified, the effect is as if the nonmonotonic modifier is 2670 // specified. 2671 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2672 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2673 Schedule == OMP_sch_static_balanced_chunked || 2674 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2675 Schedule == OMP_dist_sch_static_chunked || 2676 Schedule == OMP_dist_sch_static)) 2677 Modifier = OMP_sch_modifier_nonmonotonic; 2678 } 2679 return Schedule | Modifier; 2680 } 2681 2682 void CGOpenMPRuntime::emitForDispatchInit( 2683 CodeGenFunction &CGF, SourceLocation Loc, 2684 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2685 bool Ordered, const DispatchRTInput &DispatchValues) { 2686 if (!CGF.HaveInsertPoint()) 2687 return; 2688 OpenMPSchedType Schedule = getRuntimeSchedule( 2689 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2690 assert(Ordered || 2691 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2692 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2693 Schedule != OMP_sch_static_balanced_chunked)); 2694 // Call __kmpc_dispatch_init( 2695 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2696 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2697 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2698 2699 // If the Chunk was not specified in the clause - use default value 1. 2700 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2701 : CGF.Builder.getIntN(IVSize, 1); 2702 llvm::Value *Args[] = { 2703 emitUpdateLocation(CGF, Loc), 2704 getThreadID(CGF, Loc), 2705 CGF.Builder.getInt32(addMonoNonMonoModifier( 2706 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2707 DispatchValues.LB, // Lower 2708 DispatchValues.UB, // Upper 2709 CGF.Builder.getIntN(IVSize, 1), // Stride 2710 Chunk // Chunk 2711 }; 2712 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2713 } 2714 2715 static void emitForStaticInitCall( 2716 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2717 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2718 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2719 const CGOpenMPRuntime::StaticRTInput &Values) { 2720 if (!CGF.HaveInsertPoint()) 2721 return; 2722 2723 assert(!Values.Ordered); 2724 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2725 Schedule == OMP_sch_static_balanced_chunked || 2726 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2727 Schedule == OMP_dist_sch_static || 2728 Schedule == OMP_dist_sch_static_chunked); 2729 2730 // Call __kmpc_for_static_init( 2731 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2732 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2733 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2734 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2735 llvm::Value *Chunk = Values.Chunk; 2736 if (Chunk == nullptr) { 2737 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2738 Schedule == OMP_dist_sch_static) && 2739 "expected static non-chunked schedule"); 2740 // If the Chunk was not specified in the clause - use default value 1. 2741 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2742 } else { 2743 assert((Schedule == OMP_sch_static_chunked || 2744 Schedule == OMP_sch_static_balanced_chunked || 2745 Schedule == OMP_ord_static_chunked || 2746 Schedule == OMP_dist_sch_static_chunked) && 2747 "expected static chunked schedule"); 2748 } 2749 llvm::Value *Args[] = { 2750 UpdateLocation, 2751 ThreadId, 2752 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2753 M2)), // Schedule type 2754 Values.IL.getPointer(), // &isLastIter 2755 Values.LB.getPointer(), // &LB 2756 Values.UB.getPointer(), // &UB 2757 Values.ST.getPointer(), // &Stride 2758 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2759 Chunk // Chunk 2760 }; 2761 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2762 } 2763 2764 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2765 SourceLocation Loc, 2766 OpenMPDirectiveKind DKind, 2767 const OpenMPScheduleTy &ScheduleKind, 2768 const StaticRTInput &Values) { 2769 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2770 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2771 assert(isOpenMPWorksharingDirective(DKind) && 2772 "Expected loop-based or sections-based directive."); 2773 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2774 isOpenMPLoopDirective(DKind) 2775 ? OMP_IDENT_WORK_LOOP 2776 : OMP_IDENT_WORK_SECTIONS); 2777 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2778 llvm::FunctionCallee StaticInitFunction = 2779 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2780 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2781 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2782 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2783 } 2784 2785 void CGOpenMPRuntime::emitDistributeStaticInit( 2786 CodeGenFunction &CGF, SourceLocation Loc, 2787 OpenMPDistScheduleClauseKind SchedKind, 2788 const CGOpenMPRuntime::StaticRTInput &Values) { 2789 OpenMPSchedType ScheduleNum = 2790 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2791 llvm::Value *UpdatedLocation = 2792 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2793 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2794 llvm::FunctionCallee StaticInitFunction = 2795 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2796 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2797 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2798 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2799 } 2800 2801 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2802 SourceLocation Loc, 2803 OpenMPDirectiveKind DKind) { 2804 if (!CGF.HaveInsertPoint()) 2805 return; 2806 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2807 llvm::Value *Args[] = { 2808 emitUpdateLocation(CGF, Loc, 2809 isOpenMPDistributeDirective(DKind) 2810 ? OMP_IDENT_WORK_DISTRIBUTE 2811 : isOpenMPLoopDirective(DKind) 2812 ? OMP_IDENT_WORK_LOOP 2813 : OMP_IDENT_WORK_SECTIONS), 2814 getThreadID(CGF, Loc)}; 2815 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2816 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2817 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2818 Args); 2819 } 2820 2821 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2822 SourceLocation Loc, 2823 unsigned IVSize, 2824 bool IVSigned) { 2825 if (!CGF.HaveInsertPoint()) 2826 return; 2827 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2828 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2829 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2830 } 2831 2832 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2833 SourceLocation Loc, unsigned IVSize, 2834 bool IVSigned, Address IL, 2835 Address LB, Address UB, 2836 Address ST) { 2837 // Call __kmpc_dispatch_next( 2838 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2839 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2840 // kmp_int[32|64] *p_stride); 2841 llvm::Value *Args[] = { 2842 emitUpdateLocation(CGF, Loc), 2843 getThreadID(CGF, Loc), 2844 IL.getPointer(), // &isLastIter 2845 LB.getPointer(), // &Lower 2846 UB.getPointer(), // &Upper 2847 ST.getPointer() // &Stride 2848 }; 2849 llvm::Value *Call = 2850 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2851 return CGF.EmitScalarConversion( 2852 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2853 CGF.getContext().BoolTy, Loc); 2854 } 2855 2856 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2857 llvm::Value *NumThreads, 2858 SourceLocation Loc) { 2859 if (!CGF.HaveInsertPoint()) 2860 return; 2861 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2862 llvm::Value *Args[] = { 2863 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2864 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2865 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2866 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2867 Args); 2868 } 2869 2870 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2871 ProcBindKind ProcBind, 2872 SourceLocation Loc) { 2873 if (!CGF.HaveInsertPoint()) 2874 return; 2875 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2876 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2877 llvm::Value *Args[] = { 2878 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2879 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2880 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2881 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2882 Args); 2883 } 2884 2885 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2886 SourceLocation Loc, llvm::AtomicOrdering AO) { 2887 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2888 OMPBuilder.createFlush(CGF.Builder); 2889 } else { 2890 if (!CGF.HaveInsertPoint()) 2891 return; 2892 // Build call void __kmpc_flush(ident_t *loc) 2893 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2894 CGM.getModule(), OMPRTL___kmpc_flush), 2895 emitUpdateLocation(CGF, Loc)); 2896 } 2897 } 2898 2899 namespace { 2900 /// Indexes of fields for type kmp_task_t. 2901 enum KmpTaskTFields { 2902 /// List of shared variables. 2903 KmpTaskTShareds, 2904 /// Task routine. 2905 KmpTaskTRoutine, 2906 /// Partition id for the untied tasks. 2907 KmpTaskTPartId, 2908 /// Function with call of destructors for private variables. 2909 Data1, 2910 /// Task priority. 2911 Data2, 2912 /// (Taskloops only) Lower bound. 2913 KmpTaskTLowerBound, 2914 /// (Taskloops only) Upper bound. 2915 KmpTaskTUpperBound, 2916 /// (Taskloops only) Stride. 2917 KmpTaskTStride, 2918 /// (Taskloops only) Is last iteration flag. 2919 KmpTaskTLastIter, 2920 /// (Taskloops only) Reduction data. 2921 KmpTaskTReductions, 2922 }; 2923 } // anonymous namespace 2924 2925 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2926 return OffloadEntriesTargetRegion.empty() && 2927 OffloadEntriesDeviceGlobalVar.empty(); 2928 } 2929 2930 /// Initialize target region entry. 2931 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2932 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2933 StringRef ParentName, unsigned LineNum, 2934 unsigned Order) { 2935 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2936 "only required for the device " 2937 "code generation."); 2938 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2939 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2940 OMPTargetRegionEntryTargetRegion); 2941 ++OffloadingEntriesNum; 2942 } 2943 2944 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2945 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2946 StringRef ParentName, unsigned LineNum, 2947 llvm::Constant *Addr, llvm::Constant *ID, 2948 OMPTargetRegionEntryKind Flags) { 2949 // If we are emitting code for a target, the entry is already initialized, 2950 // only has to be registered. 2951 if (CGM.getLangOpts().OpenMPIsDevice) { 2952 // This could happen if the device compilation is invoked standalone. 2953 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2954 initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2955 OffloadingEntriesNum); 2956 auto &Entry = 2957 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2958 Entry.setAddress(Addr); 2959 Entry.setID(ID); 2960 Entry.setFlags(Flags); 2961 } else { 2962 if (Flags == 2963 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2964 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2965 /*IgnoreAddressId*/ true)) 2966 return; 2967 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2968 "Target region entry already registered!"); 2969 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2970 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2971 ++OffloadingEntriesNum; 2972 } 2973 } 2974 2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2976 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2977 bool IgnoreAddressId) const { 2978 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2979 if (PerDevice == OffloadEntriesTargetRegion.end()) 2980 return false; 2981 auto PerFile = PerDevice->second.find(FileID); 2982 if (PerFile == PerDevice->second.end()) 2983 return false; 2984 auto PerParentName = PerFile->second.find(ParentName); 2985 if (PerParentName == PerFile->second.end()) 2986 return false; 2987 auto PerLine = PerParentName->second.find(LineNum); 2988 if (PerLine == PerParentName->second.end()) 2989 return false; 2990 // Fail if this entry is already registered. 2991 if (!IgnoreAddressId && 2992 (PerLine->second.getAddress() || PerLine->second.getID())) 2993 return false; 2994 return true; 2995 } 2996 2997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2998 const OffloadTargetRegionEntryInfoActTy &Action) { 2999 // Scan all target region entries and perform the provided action. 3000 for (const auto &D : OffloadEntriesTargetRegion) 3001 for (const auto &F : D.second) 3002 for (const auto &P : F.second) 3003 for (const auto &L : P.second) 3004 Action(D.first, F.first, P.first(), L.first, L.second); 3005 } 3006 3007 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3008 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3009 OMPTargetGlobalVarEntryKind Flags, 3010 unsigned Order) { 3011 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3012 "only required for the device " 3013 "code generation."); 3014 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3015 ++OffloadingEntriesNum; 3016 } 3017 3018 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3019 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3020 CharUnits VarSize, 3021 OMPTargetGlobalVarEntryKind Flags, 3022 llvm::GlobalValue::LinkageTypes Linkage) { 3023 if (CGM.getLangOpts().OpenMPIsDevice) { 3024 // This could happen if the device compilation is invoked standalone. 3025 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3026 initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); 3027 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3028 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3029 "Resetting with the new address."); 3030 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3031 if (Entry.getVarSize().isZero()) { 3032 Entry.setVarSize(VarSize); 3033 Entry.setLinkage(Linkage); 3034 } 3035 return; 3036 } 3037 Entry.setVarSize(VarSize); 3038 Entry.setLinkage(Linkage); 3039 Entry.setAddress(Addr); 3040 } else { 3041 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3042 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3043 assert(Entry.isValid() && Entry.getFlags() == Flags && 3044 "Entry not initialized!"); 3045 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3046 "Resetting with the new address."); 3047 if (Entry.getVarSize().isZero()) { 3048 Entry.setVarSize(VarSize); 3049 Entry.setLinkage(Linkage); 3050 } 3051 return; 3052 } 3053 OffloadEntriesDeviceGlobalVar.try_emplace( 3054 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3055 ++OffloadingEntriesNum; 3056 } 3057 } 3058 3059 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3060 actOnDeviceGlobalVarEntriesInfo( 3061 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3062 // Scan all target region entries and perform the provided action. 3063 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3064 Action(E.getKey(), E.getValue()); 3065 } 3066 3067 void CGOpenMPRuntime::createOffloadEntry( 3068 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3069 llvm::GlobalValue::LinkageTypes Linkage) { 3070 StringRef Name = Addr->getName(); 3071 llvm::Module &M = CGM.getModule(); 3072 llvm::LLVMContext &C = M.getContext(); 3073 3074 // Create constant string with the name. 3075 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3076 3077 std::string StringName = getName({"omp_offloading", "entry_name"}); 3078 auto *Str = new llvm::GlobalVariable( 3079 M, StrPtrInit->getType(), /*isConstant=*/true, 3080 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3081 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3082 3083 llvm::Constant *Data[] = { 3084 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3085 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3086 llvm::ConstantInt::get(CGM.SizeTy, Size), 3087 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3088 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3089 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3090 llvm::GlobalVariable *Entry = createGlobalStruct( 3091 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3092 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3093 3094 // The entry has to be created in the section the linker expects it to be. 3095 Entry->setSection("omp_offloading_entries"); 3096 } 3097 3098 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3099 // Emit the offloading entries and metadata so that the device codegen side 3100 // can easily figure out what to emit. The produced metadata looks like 3101 // this: 3102 // 3103 // !omp_offload.info = !{!1, ...} 3104 // 3105 // Right now we only generate metadata for function that contain target 3106 // regions. 3107 3108 // If we are in simd mode or there are no entries, we don't need to do 3109 // anything. 3110 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3111 return; 3112 3113 llvm::Module &M = CGM.getModule(); 3114 llvm::LLVMContext &C = M.getContext(); 3115 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3116 SourceLocation, StringRef>, 3117 16> 3118 OrderedEntries(OffloadEntriesInfoManager.size()); 3119 llvm::SmallVector<StringRef, 16> ParentFunctions( 3120 OffloadEntriesInfoManager.size()); 3121 3122 // Auxiliary methods to create metadata values and strings. 3123 auto &&GetMDInt = [this](unsigned V) { 3124 return llvm::ConstantAsMetadata::get( 3125 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3126 }; 3127 3128 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3129 3130 // Create the offloading info metadata node. 3131 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3132 3133 // Create function that emits metadata for each target region entry; 3134 auto &&TargetRegionMetadataEmitter = 3135 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3136 &GetMDString]( 3137 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3138 unsigned Line, 3139 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3140 // Generate metadata for target regions. Each entry of this metadata 3141 // contains: 3142 // - Entry 0 -> Kind of this type of metadata (0). 3143 // - Entry 1 -> Device ID of the file where the entry was identified. 3144 // - Entry 2 -> File ID of the file where the entry was identified. 3145 // - Entry 3 -> Mangled name of the function where the entry was 3146 // identified. 3147 // - Entry 4 -> Line in the file where the entry was identified. 3148 // - Entry 5 -> Order the entry was created. 3149 // The first element of the metadata node is the kind. 3150 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3151 GetMDInt(FileID), GetMDString(ParentName), 3152 GetMDInt(Line), GetMDInt(E.getOrder())}; 3153 3154 SourceLocation Loc; 3155 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3156 E = CGM.getContext().getSourceManager().fileinfo_end(); 3157 I != E; ++I) { 3158 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3159 I->getFirst()->getUniqueID().getFile() == FileID) { 3160 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3161 I->getFirst(), Line, 1); 3162 break; 3163 } 3164 } 3165 // Save this entry in the right position of the ordered entries array. 3166 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3167 ParentFunctions[E.getOrder()] = ParentName; 3168 3169 // Add metadata to the named metadata node. 3170 MD->addOperand(llvm::MDNode::get(C, Ops)); 3171 }; 3172 3173 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3174 TargetRegionMetadataEmitter); 3175 3176 // Create function that emits metadata for each device global variable entry; 3177 auto &&DeviceGlobalVarMetadataEmitter = 3178 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3179 MD](StringRef MangledName, 3180 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3181 &E) { 3182 // Generate metadata for global variables. Each entry of this metadata 3183 // contains: 3184 // - Entry 0 -> Kind of this type of metadata (1). 3185 // - Entry 1 -> Mangled name of the variable. 3186 // - Entry 2 -> Declare target kind. 3187 // - Entry 3 -> Order the entry was created. 3188 // The first element of the metadata node is the kind. 3189 llvm::Metadata *Ops[] = { 3190 GetMDInt(E.getKind()), GetMDString(MangledName), 3191 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3192 3193 // Save this entry in the right position of the ordered entries array. 3194 OrderedEntries[E.getOrder()] = 3195 std::make_tuple(&E, SourceLocation(), MangledName); 3196 3197 // Add metadata to the named metadata node. 3198 MD->addOperand(llvm::MDNode::get(C, Ops)); 3199 }; 3200 3201 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3202 DeviceGlobalVarMetadataEmitter); 3203 3204 for (const auto &E : OrderedEntries) { 3205 assert(std::get<0>(E) && "All ordered entries must exist!"); 3206 if (const auto *CE = 3207 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3208 std::get<0>(E))) { 3209 if (!CE->getID() || !CE->getAddress()) { 3210 // Do not blame the entry if the parent funtion is not emitted. 3211 StringRef FnName = ParentFunctions[CE->getOrder()]; 3212 if (!CGM.GetGlobalValue(FnName)) 3213 continue; 3214 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3215 DiagnosticsEngine::Error, 3216 "Offloading entry for target region in %0 is incorrect: either the " 3217 "address or the ID is invalid."); 3218 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3219 continue; 3220 } 3221 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3222 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3223 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3224 OffloadEntryInfoDeviceGlobalVar>( 3225 std::get<0>(E))) { 3226 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3227 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3228 CE->getFlags()); 3229 switch (Flags) { 3230 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3231 if (CGM.getLangOpts().OpenMPIsDevice && 3232 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3233 continue; 3234 if (!CE->getAddress()) { 3235 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3236 DiagnosticsEngine::Error, "Offloading entry for declare target " 3237 "variable %0 is incorrect: the " 3238 "address is invalid."); 3239 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3240 continue; 3241 } 3242 // The vaiable has no definition - no need to add the entry. 3243 if (CE->getVarSize().isZero()) 3244 continue; 3245 break; 3246 } 3247 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3248 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3249 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3250 "Declaret target link address is set."); 3251 if (CGM.getLangOpts().OpenMPIsDevice) 3252 continue; 3253 if (!CE->getAddress()) { 3254 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3255 DiagnosticsEngine::Error, 3256 "Offloading entry for declare target variable is incorrect: the " 3257 "address is invalid."); 3258 CGM.getDiags().Report(DiagID); 3259 continue; 3260 } 3261 break; 3262 } 3263 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3264 CE->getVarSize().getQuantity(), Flags, 3265 CE->getLinkage()); 3266 } else { 3267 llvm_unreachable("Unsupported entry kind."); 3268 } 3269 } 3270 } 3271 3272 /// Loads all the offload entries information from the host IR 3273 /// metadata. 3274 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3275 // If we are in target mode, load the metadata from the host IR. This code has 3276 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3277 3278 if (!CGM.getLangOpts().OpenMPIsDevice) 3279 return; 3280 3281 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3282 return; 3283 3284 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3285 if (auto EC = Buf.getError()) { 3286 CGM.getDiags().Report(diag::err_cannot_open_file) 3287 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3288 return; 3289 } 3290 3291 llvm::LLVMContext C; 3292 auto ME = expectedToErrorOrAndEmitErrors( 3293 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3294 3295 if (auto EC = ME.getError()) { 3296 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3297 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3298 CGM.getDiags().Report(DiagID) 3299 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3300 return; 3301 } 3302 3303 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3304 if (!MD) 3305 return; 3306 3307 for (llvm::MDNode *MN : MD->operands()) { 3308 auto &&GetMDInt = [MN](unsigned Idx) { 3309 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3310 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3311 }; 3312 3313 auto &&GetMDString = [MN](unsigned Idx) { 3314 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3315 return V->getString(); 3316 }; 3317 3318 switch (GetMDInt(0)) { 3319 default: 3320 llvm_unreachable("Unexpected metadata!"); 3321 break; 3322 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3323 OffloadingEntryInfoTargetRegion: 3324 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3325 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3326 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3327 /*Order=*/GetMDInt(5)); 3328 break; 3329 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3330 OffloadingEntryInfoDeviceGlobalVar: 3331 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3332 /*MangledName=*/GetMDString(1), 3333 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3334 /*Flags=*/GetMDInt(2)), 3335 /*Order=*/GetMDInt(3)); 3336 break; 3337 } 3338 } 3339 } 3340 3341 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3342 if (!KmpRoutineEntryPtrTy) { 3343 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3344 ASTContext &C = CGM.getContext(); 3345 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3346 FunctionProtoType::ExtProtoInfo EPI; 3347 KmpRoutineEntryPtrQTy = C.getPointerType( 3348 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3349 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3350 } 3351 } 3352 3353 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3354 // Make sure the type of the entry is already created. This is the type we 3355 // have to create: 3356 // struct __tgt_offload_entry{ 3357 // void *addr; // Pointer to the offload entry info. 3358 // // (function or global) 3359 // char *name; // Name of the function or global. 3360 // size_t size; // Size of the entry info (0 if it a function). 3361 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3362 // int32_t reserved; // Reserved, to use by the runtime library. 3363 // }; 3364 if (TgtOffloadEntryQTy.isNull()) { 3365 ASTContext &C = CGM.getContext(); 3366 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3367 RD->startDefinition(); 3368 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3369 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3370 addFieldToRecordDecl(C, RD, C.getSizeType()); 3371 addFieldToRecordDecl( 3372 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3373 addFieldToRecordDecl( 3374 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3375 RD->completeDefinition(); 3376 RD->addAttr(PackedAttr::CreateImplicit(C)); 3377 TgtOffloadEntryQTy = C.getRecordType(RD); 3378 } 3379 return TgtOffloadEntryQTy; 3380 } 3381 3382 namespace { 3383 struct PrivateHelpersTy { 3384 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3385 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3386 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3387 PrivateElemInit(PrivateElemInit) {} 3388 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3389 const Expr *OriginalRef = nullptr; 3390 const VarDecl *Original = nullptr; 3391 const VarDecl *PrivateCopy = nullptr; 3392 const VarDecl *PrivateElemInit = nullptr; 3393 bool isLocalPrivate() const { 3394 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3395 } 3396 }; 3397 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3398 } // anonymous namespace 3399 3400 static bool isAllocatableDecl(const VarDecl *VD) { 3401 const VarDecl *CVD = VD->getCanonicalDecl(); 3402 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3403 return false; 3404 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3405 // Use the default allocation. 3406 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3407 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3408 !AA->getAllocator()); 3409 } 3410 3411 static RecordDecl * 3412 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3413 if (!Privates.empty()) { 3414 ASTContext &C = CGM.getContext(); 3415 // Build struct .kmp_privates_t. { 3416 // /* private vars */ 3417 // }; 3418 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3419 RD->startDefinition(); 3420 for (const auto &Pair : Privates) { 3421 const VarDecl *VD = Pair.second.Original; 3422 QualType Type = VD->getType().getNonReferenceType(); 3423 // If the private variable is a local variable with lvalue ref type, 3424 // allocate the pointer instead of the pointee type. 3425 if (Pair.second.isLocalPrivate()) { 3426 if (VD->getType()->isLValueReferenceType()) 3427 Type = C.getPointerType(Type); 3428 if (isAllocatableDecl(VD)) 3429 Type = C.getPointerType(Type); 3430 } 3431 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3432 if (VD->hasAttrs()) { 3433 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3434 E(VD->getAttrs().end()); 3435 I != E; ++I) 3436 FD->addAttr(*I); 3437 } 3438 } 3439 RD->completeDefinition(); 3440 return RD; 3441 } 3442 return nullptr; 3443 } 3444 3445 static RecordDecl * 3446 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3447 QualType KmpInt32Ty, 3448 QualType KmpRoutineEntryPointerQTy) { 3449 ASTContext &C = CGM.getContext(); 3450 // Build struct kmp_task_t { 3451 // void * shareds; 3452 // kmp_routine_entry_t routine; 3453 // kmp_int32 part_id; 3454 // kmp_cmplrdata_t data1; 3455 // kmp_cmplrdata_t data2; 3456 // For taskloops additional fields: 3457 // kmp_uint64 lb; 3458 // kmp_uint64 ub; 3459 // kmp_int64 st; 3460 // kmp_int32 liter; 3461 // void * reductions; 3462 // }; 3463 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3464 UD->startDefinition(); 3465 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3466 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3467 UD->completeDefinition(); 3468 QualType KmpCmplrdataTy = C.getRecordType(UD); 3469 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3470 RD->startDefinition(); 3471 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3472 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3473 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3474 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3475 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3476 if (isOpenMPTaskLoopDirective(Kind)) { 3477 QualType KmpUInt64Ty = 3478 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3479 QualType KmpInt64Ty = 3480 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3481 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3482 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3483 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3484 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3485 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3486 } 3487 RD->completeDefinition(); 3488 return RD; 3489 } 3490 3491 static RecordDecl * 3492 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3493 ArrayRef<PrivateDataTy> Privates) { 3494 ASTContext &C = CGM.getContext(); 3495 // Build struct kmp_task_t_with_privates { 3496 // kmp_task_t task_data; 3497 // .kmp_privates_t. privates; 3498 // }; 3499 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3500 RD->startDefinition(); 3501 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3502 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3503 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3504 RD->completeDefinition(); 3505 return RD; 3506 } 3507 3508 /// Emit a proxy function which accepts kmp_task_t as the second 3509 /// argument. 3510 /// \code 3511 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3512 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3513 /// For taskloops: 3514 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3515 /// tt->reductions, tt->shareds); 3516 /// return 0; 3517 /// } 3518 /// \endcode 3519 static llvm::Function * 3520 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3521 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3522 QualType KmpTaskTWithPrivatesPtrQTy, 3523 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3524 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3525 llvm::Value *TaskPrivatesMap) { 3526 ASTContext &C = CGM.getContext(); 3527 FunctionArgList Args; 3528 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3529 ImplicitParamDecl::Other); 3530 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3531 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3532 ImplicitParamDecl::Other); 3533 Args.push_back(&GtidArg); 3534 Args.push_back(&TaskTypeArg); 3535 const auto &TaskEntryFnInfo = 3536 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3537 llvm::FunctionType *TaskEntryTy = 3538 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3539 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3540 auto *TaskEntry = llvm::Function::Create( 3541 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3542 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3543 TaskEntry->setDoesNotRecurse(); 3544 CodeGenFunction CGF(CGM); 3545 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3546 Loc, Loc); 3547 3548 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3549 // tt, 3550 // For taskloops: 3551 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3552 // tt->task_data.shareds); 3553 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3554 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3555 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3556 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3557 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3558 const auto *KmpTaskTWithPrivatesQTyRD = 3559 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3560 LValue Base = 3561 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3562 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3563 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3564 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3565 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3566 3567 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3568 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3569 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3570 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3571 CGF.ConvertTypeForMem(SharedsPtrTy)); 3572 3573 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3574 llvm::Value *PrivatesParam; 3575 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3576 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3577 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3578 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3579 } else { 3580 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3581 } 3582 3583 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3584 TaskPrivatesMap, 3585 CGF.Builder 3586 .CreatePointerBitCastOrAddrSpaceCast( 3587 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3588 .getPointer()}; 3589 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3590 std::end(CommonArgs)); 3591 if (isOpenMPTaskLoopDirective(Kind)) { 3592 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3593 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3594 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3595 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3596 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3597 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3598 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3599 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3600 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3601 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3602 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3603 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3604 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3605 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3606 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3607 CallArgs.push_back(LBParam); 3608 CallArgs.push_back(UBParam); 3609 CallArgs.push_back(StParam); 3610 CallArgs.push_back(LIParam); 3611 CallArgs.push_back(RParam); 3612 } 3613 CallArgs.push_back(SharedsParam); 3614 3615 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3616 CallArgs); 3617 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3618 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3619 CGF.FinishFunction(); 3620 return TaskEntry; 3621 } 3622 3623 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3624 SourceLocation Loc, 3625 QualType KmpInt32Ty, 3626 QualType KmpTaskTWithPrivatesPtrQTy, 3627 QualType KmpTaskTWithPrivatesQTy) { 3628 ASTContext &C = CGM.getContext(); 3629 FunctionArgList Args; 3630 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3631 ImplicitParamDecl::Other); 3632 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3633 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3634 ImplicitParamDecl::Other); 3635 Args.push_back(&GtidArg); 3636 Args.push_back(&TaskTypeArg); 3637 const auto &DestructorFnInfo = 3638 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3639 llvm::FunctionType *DestructorFnTy = 3640 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3641 std::string Name = 3642 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3643 auto *DestructorFn = 3644 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3645 Name, &CGM.getModule()); 3646 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3647 DestructorFnInfo); 3648 DestructorFn->setDoesNotRecurse(); 3649 CodeGenFunction CGF(CGM); 3650 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3651 Args, Loc, Loc); 3652 3653 LValue Base = CGF.EmitLoadOfPointerLValue( 3654 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3655 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3656 const auto *KmpTaskTWithPrivatesQTyRD = 3657 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3658 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3659 Base = CGF.EmitLValueForField(Base, *FI); 3660 for (const auto *Field : 3661 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3662 if (QualType::DestructionKind DtorKind = 3663 Field->getType().isDestructedType()) { 3664 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3665 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3666 } 3667 } 3668 CGF.FinishFunction(); 3669 return DestructorFn; 3670 } 3671 3672 /// Emit a privates mapping function for correct handling of private and 3673 /// firstprivate variables. 3674 /// \code 3675 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3676 /// **noalias priv1,..., <tyn> **noalias privn) { 3677 /// *priv1 = &.privates.priv1; 3678 /// ...; 3679 /// *privn = &.privates.privn; 3680 /// } 3681 /// \endcode 3682 static llvm::Value * 3683 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3684 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3685 ArrayRef<PrivateDataTy> Privates) { 3686 ASTContext &C = CGM.getContext(); 3687 FunctionArgList Args; 3688 ImplicitParamDecl TaskPrivatesArg( 3689 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3690 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3691 ImplicitParamDecl::Other); 3692 Args.push_back(&TaskPrivatesArg); 3693 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3694 unsigned Counter = 1; 3695 for (const Expr *E : Data.PrivateVars) { 3696 Args.push_back(ImplicitParamDecl::Create( 3697 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3698 C.getPointerType(C.getPointerType(E->getType())) 3699 .withConst() 3700 .withRestrict(), 3701 ImplicitParamDecl::Other)); 3702 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3703 PrivateVarsPos[VD] = Counter; 3704 ++Counter; 3705 } 3706 for (const Expr *E : Data.FirstprivateVars) { 3707 Args.push_back(ImplicitParamDecl::Create( 3708 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3709 C.getPointerType(C.getPointerType(E->getType())) 3710 .withConst() 3711 .withRestrict(), 3712 ImplicitParamDecl::Other)); 3713 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3714 PrivateVarsPos[VD] = Counter; 3715 ++Counter; 3716 } 3717 for (const Expr *E : Data.LastprivateVars) { 3718 Args.push_back(ImplicitParamDecl::Create( 3719 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3720 C.getPointerType(C.getPointerType(E->getType())) 3721 .withConst() 3722 .withRestrict(), 3723 ImplicitParamDecl::Other)); 3724 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3725 PrivateVarsPos[VD] = Counter; 3726 ++Counter; 3727 } 3728 for (const VarDecl *VD : Data.PrivateLocals) { 3729 QualType Ty = VD->getType().getNonReferenceType(); 3730 if (VD->getType()->isLValueReferenceType()) 3731 Ty = C.getPointerType(Ty); 3732 if (isAllocatableDecl(VD)) 3733 Ty = C.getPointerType(Ty); 3734 Args.push_back(ImplicitParamDecl::Create( 3735 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3736 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3737 ImplicitParamDecl::Other)); 3738 PrivateVarsPos[VD] = Counter; 3739 ++Counter; 3740 } 3741 const auto &TaskPrivatesMapFnInfo = 3742 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3743 llvm::FunctionType *TaskPrivatesMapTy = 3744 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3745 std::string Name = 3746 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3747 auto *TaskPrivatesMap = llvm::Function::Create( 3748 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3749 &CGM.getModule()); 3750 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3751 TaskPrivatesMapFnInfo); 3752 if (CGM.getLangOpts().Optimize) { 3753 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3754 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3755 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3756 } 3757 CodeGenFunction CGF(CGM); 3758 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3759 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3760 3761 // *privi = &.privates.privi; 3762 LValue Base = CGF.EmitLoadOfPointerLValue( 3763 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3764 TaskPrivatesArg.getType()->castAs<PointerType>()); 3765 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3766 Counter = 0; 3767 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3768 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3769 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3770 LValue RefLVal = 3771 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3772 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3773 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3774 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3775 ++Counter; 3776 } 3777 CGF.FinishFunction(); 3778 return TaskPrivatesMap; 3779 } 3780 3781 /// Emit initialization for private variables in task-based directives. 3782 static void emitPrivatesInit(CodeGenFunction &CGF, 3783 const OMPExecutableDirective &D, 3784 Address KmpTaskSharedsPtr, LValue TDBase, 3785 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3786 QualType SharedsTy, QualType SharedsPtrTy, 3787 const OMPTaskDataTy &Data, 3788 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3789 ASTContext &C = CGF.getContext(); 3790 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3791 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3792 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3793 ? OMPD_taskloop 3794 : OMPD_task; 3795 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3796 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3797 LValue SrcBase; 3798 bool IsTargetTask = 3799 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3800 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3801 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3802 // PointersArray, SizesArray, and MappersArray. The original variables for 3803 // these arrays are not captured and we get their addresses explicitly. 3804 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3805 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3806 SrcBase = CGF.MakeAddrLValue( 3807 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3808 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3809 SharedsTy); 3810 } 3811 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3812 for (const PrivateDataTy &Pair : Privates) { 3813 // Do not initialize private locals. 3814 if (Pair.second.isLocalPrivate()) { 3815 ++FI; 3816 continue; 3817 } 3818 const VarDecl *VD = Pair.second.PrivateCopy; 3819 const Expr *Init = VD->getAnyInitializer(); 3820 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3821 !CGF.isTrivialInitializer(Init)))) { 3822 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3823 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3824 const VarDecl *OriginalVD = Pair.second.Original; 3825 // Check if the variable is the target-based BasePointersArray, 3826 // PointersArray, SizesArray, or MappersArray. 3827 LValue SharedRefLValue; 3828 QualType Type = PrivateLValue.getType(); 3829 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3830 if (IsTargetTask && !SharedField) { 3831 assert(isa<ImplicitParamDecl>(OriginalVD) && 3832 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3833 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3834 ->getNumParams() == 0 && 3835 isa<TranslationUnitDecl>( 3836 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3837 ->getDeclContext()) && 3838 "Expected artificial target data variable."); 3839 SharedRefLValue = 3840 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3841 } else if (ForDup) { 3842 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3843 SharedRefLValue = CGF.MakeAddrLValue( 3844 Address(SharedRefLValue.getPointer(CGF), 3845 C.getDeclAlign(OriginalVD)), 3846 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3847 SharedRefLValue.getTBAAInfo()); 3848 } else if (CGF.LambdaCaptureFields.count( 3849 Pair.second.Original->getCanonicalDecl()) > 0 || 3850 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3851 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3852 } else { 3853 // Processing for implicitly captured variables. 3854 InlinedOpenMPRegionRAII Region( 3855 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3856 /*HasCancel=*/false); 3857 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3858 } 3859 if (Type->isArrayType()) { 3860 // Initialize firstprivate array. 3861 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3862 // Perform simple memcpy. 3863 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3864 } else { 3865 // Initialize firstprivate array using element-by-element 3866 // initialization. 3867 CGF.EmitOMPAggregateAssign( 3868 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3869 Type, 3870 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3871 Address SrcElement) { 3872 // Clean up any temporaries needed by the initialization. 3873 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3874 InitScope.addPrivate( 3875 Elem, [SrcElement]() -> Address { return SrcElement; }); 3876 (void)InitScope.Privatize(); 3877 // Emit initialization for single element. 3878 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3879 CGF, &CapturesInfo); 3880 CGF.EmitAnyExprToMem(Init, DestElement, 3881 Init->getType().getQualifiers(), 3882 /*IsInitializer=*/false); 3883 }); 3884 } 3885 } else { 3886 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3887 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3888 return SharedRefLValue.getAddress(CGF); 3889 }); 3890 (void)InitScope.Privatize(); 3891 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3892 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3893 /*capturedByInit=*/false); 3894 } 3895 } else { 3896 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3897 } 3898 } 3899 ++FI; 3900 } 3901 } 3902 3903 /// Check if duplication function is required for taskloops. 3904 static bool checkInitIsRequired(CodeGenFunction &CGF, 3905 ArrayRef<PrivateDataTy> Privates) { 3906 bool InitRequired = false; 3907 for (const PrivateDataTy &Pair : Privates) { 3908 if (Pair.second.isLocalPrivate()) 3909 continue; 3910 const VarDecl *VD = Pair.second.PrivateCopy; 3911 const Expr *Init = VD->getAnyInitializer(); 3912 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3913 !CGF.isTrivialInitializer(Init)); 3914 if (InitRequired) 3915 break; 3916 } 3917 return InitRequired; 3918 } 3919 3920 3921 /// Emit task_dup function (for initialization of 3922 /// private/firstprivate/lastprivate vars and last_iter flag) 3923 /// \code 3924 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3925 /// lastpriv) { 3926 /// // setup lastprivate flag 3927 /// task_dst->last = lastpriv; 3928 /// // could be constructor calls here... 3929 /// } 3930 /// \endcode 3931 static llvm::Value * 3932 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3933 const OMPExecutableDirective &D, 3934 QualType KmpTaskTWithPrivatesPtrQTy, 3935 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3936 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3937 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3938 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3939 ASTContext &C = CGM.getContext(); 3940 FunctionArgList Args; 3941 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3942 KmpTaskTWithPrivatesPtrQTy, 3943 ImplicitParamDecl::Other); 3944 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3945 KmpTaskTWithPrivatesPtrQTy, 3946 ImplicitParamDecl::Other); 3947 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3948 ImplicitParamDecl::Other); 3949 Args.push_back(&DstArg); 3950 Args.push_back(&SrcArg); 3951 Args.push_back(&LastprivArg); 3952 const auto &TaskDupFnInfo = 3953 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3954 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3955 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3956 auto *TaskDup = llvm::Function::Create( 3957 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3958 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3959 TaskDup->setDoesNotRecurse(); 3960 CodeGenFunction CGF(CGM); 3961 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3962 Loc); 3963 3964 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3965 CGF.GetAddrOfLocalVar(&DstArg), 3966 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3967 // task_dst->liter = lastpriv; 3968 if (WithLastIter) { 3969 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3970 LValue Base = CGF.EmitLValueForField( 3971 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3972 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3973 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3974 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3975 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3976 } 3977 3978 // Emit initial values for private copies (if any). 3979 assert(!Privates.empty()); 3980 Address KmpTaskSharedsPtr = Address::invalid(); 3981 if (!Data.FirstprivateVars.empty()) { 3982 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3983 CGF.GetAddrOfLocalVar(&SrcArg), 3984 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3985 LValue Base = CGF.EmitLValueForField( 3986 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3987 KmpTaskSharedsPtr = Address( 3988 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3989 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3990 KmpTaskTShareds)), 3991 Loc), 3992 CGM.getNaturalTypeAlignment(SharedsTy)); 3993 } 3994 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3995 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3996 CGF.FinishFunction(); 3997 return TaskDup; 3998 } 3999 4000 /// Checks if destructor function is required to be generated. 4001 /// \return true if cleanups are required, false otherwise. 4002 static bool 4003 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4004 ArrayRef<PrivateDataTy> Privates) { 4005 for (const PrivateDataTy &P : Privates) { 4006 if (P.second.isLocalPrivate()) 4007 continue; 4008 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4009 if (Ty.isDestructedType()) 4010 return true; 4011 } 4012 return false; 4013 } 4014 4015 namespace { 4016 /// Loop generator for OpenMP iterator expression. 4017 class OMPIteratorGeneratorScope final 4018 : public CodeGenFunction::OMPPrivateScope { 4019 CodeGenFunction &CGF; 4020 const OMPIteratorExpr *E = nullptr; 4021 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4022 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4023 OMPIteratorGeneratorScope() = delete; 4024 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4025 4026 public: 4027 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4028 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4029 if (!E) 4030 return; 4031 SmallVector<llvm::Value *, 4> Uppers; 4032 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4033 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4034 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4035 addPrivate(VD, [&CGF, VD]() { 4036 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4037 }); 4038 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4039 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4040 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4041 "counter.addr"); 4042 }); 4043 } 4044 Privatize(); 4045 4046 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4047 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4048 LValue CLVal = 4049 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4050 HelperData.CounterVD->getType()); 4051 // Counter = 0; 4052 CGF.EmitStoreOfScalar( 4053 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4054 CLVal); 4055 CodeGenFunction::JumpDest &ContDest = 4056 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4057 CodeGenFunction::JumpDest &ExitDest = 4058 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4059 // N = <number-of_iterations>; 4060 llvm::Value *N = Uppers[I]; 4061 // cont: 4062 // if (Counter < N) goto body; else goto exit; 4063 CGF.EmitBlock(ContDest.getBlock()); 4064 auto *CVal = 4065 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4066 llvm::Value *Cmp = 4067 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4068 ? CGF.Builder.CreateICmpSLT(CVal, N) 4069 : CGF.Builder.CreateICmpULT(CVal, N); 4070 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4071 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4072 // body: 4073 CGF.EmitBlock(BodyBB); 4074 // Iteri = Begini + Counter * Stepi; 4075 CGF.EmitIgnoredExpr(HelperData.Update); 4076 } 4077 } 4078 ~OMPIteratorGeneratorScope() { 4079 if (!E) 4080 return; 4081 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4082 // Counter = Counter + 1; 4083 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4084 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4085 // goto cont; 4086 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4087 // exit: 4088 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4089 } 4090 } 4091 }; 4092 } // namespace 4093 4094 static std::pair<llvm::Value *, llvm::Value *> 4095 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4096 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4097 llvm::Value *Addr; 4098 if (OASE) { 4099 const Expr *Base = OASE->getBase(); 4100 Addr = CGF.EmitScalarExpr(Base); 4101 } else { 4102 Addr = CGF.EmitLValue(E).getPointer(CGF); 4103 } 4104 llvm::Value *SizeVal; 4105 QualType Ty = E->getType(); 4106 if (OASE) { 4107 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4108 for (const Expr *SE : OASE->getDimensions()) { 4109 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4110 Sz = CGF.EmitScalarConversion( 4111 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4112 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4113 } 4114 } else if (const auto *ASE = 4115 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4116 LValue UpAddrLVal = 4117 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4118 llvm::Value *UpAddr = 4119 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4120 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4121 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4122 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4123 } else { 4124 SizeVal = CGF.getTypeSize(Ty); 4125 } 4126 return std::make_pair(Addr, SizeVal); 4127 } 4128 4129 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4130 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4131 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4132 if (KmpTaskAffinityInfoTy.isNull()) { 4133 RecordDecl *KmpAffinityInfoRD = 4134 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4135 KmpAffinityInfoRD->startDefinition(); 4136 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4137 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4138 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4139 KmpAffinityInfoRD->completeDefinition(); 4140 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4141 } 4142 } 4143 4144 CGOpenMPRuntime::TaskResultTy 4145 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4146 const OMPExecutableDirective &D, 4147 llvm::Function *TaskFunction, QualType SharedsTy, 4148 Address Shareds, const OMPTaskDataTy &Data) { 4149 ASTContext &C = CGM.getContext(); 4150 llvm::SmallVector<PrivateDataTy, 4> Privates; 4151 // Aggregate privates and sort them by the alignment. 4152 const auto *I = Data.PrivateCopies.begin(); 4153 for (const Expr *E : Data.PrivateVars) { 4154 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4155 Privates.emplace_back( 4156 C.getDeclAlign(VD), 4157 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4158 /*PrivateElemInit=*/nullptr)); 4159 ++I; 4160 } 4161 I = Data.FirstprivateCopies.begin(); 4162 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4163 for (const Expr *E : Data.FirstprivateVars) { 4164 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4165 Privates.emplace_back( 4166 C.getDeclAlign(VD), 4167 PrivateHelpersTy( 4168 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4169 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4170 ++I; 4171 ++IElemInitRef; 4172 } 4173 I = Data.LastprivateCopies.begin(); 4174 for (const Expr *E : Data.LastprivateVars) { 4175 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4176 Privates.emplace_back( 4177 C.getDeclAlign(VD), 4178 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4179 /*PrivateElemInit=*/nullptr)); 4180 ++I; 4181 } 4182 for (const VarDecl *VD : Data.PrivateLocals) { 4183 if (isAllocatableDecl(VD)) 4184 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4185 else 4186 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4187 } 4188 llvm::stable_sort(Privates, 4189 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4190 return L.first > R.first; 4191 }); 4192 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4193 // Build type kmp_routine_entry_t (if not built yet). 4194 emitKmpRoutineEntryT(KmpInt32Ty); 4195 // Build type kmp_task_t (if not built yet). 4196 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4197 if (SavedKmpTaskloopTQTy.isNull()) { 4198 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4199 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4200 } 4201 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4202 } else { 4203 assert((D.getDirectiveKind() == OMPD_task || 4204 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4205 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4206 "Expected taskloop, task or target directive"); 4207 if (SavedKmpTaskTQTy.isNull()) { 4208 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4209 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4210 } 4211 KmpTaskTQTy = SavedKmpTaskTQTy; 4212 } 4213 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4214 // Build particular struct kmp_task_t for the given task. 4215 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4216 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4217 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4218 QualType KmpTaskTWithPrivatesPtrQTy = 4219 C.getPointerType(KmpTaskTWithPrivatesQTy); 4220 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4221 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4222 KmpTaskTWithPrivatesTy->getPointerTo(); 4223 llvm::Value *KmpTaskTWithPrivatesTySize = 4224 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4225 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4226 4227 // Emit initial values for private copies (if any). 4228 llvm::Value *TaskPrivatesMap = nullptr; 4229 llvm::Type *TaskPrivatesMapTy = 4230 std::next(TaskFunction->arg_begin(), 3)->getType(); 4231 if (!Privates.empty()) { 4232 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4233 TaskPrivatesMap = 4234 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4235 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4236 TaskPrivatesMap, TaskPrivatesMapTy); 4237 } else { 4238 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4239 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4240 } 4241 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4242 // kmp_task_t *tt); 4243 llvm::Function *TaskEntry = emitProxyTaskFunction( 4244 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4245 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4246 TaskPrivatesMap); 4247 4248 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4249 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4250 // kmp_routine_entry_t *task_entry); 4251 // Task flags. Format is taken from 4252 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4253 // description of kmp_tasking_flags struct. 4254 enum { 4255 TiedFlag = 0x1, 4256 FinalFlag = 0x2, 4257 DestructorsFlag = 0x8, 4258 PriorityFlag = 0x20, 4259 DetachableFlag = 0x40, 4260 }; 4261 unsigned Flags = Data.Tied ? TiedFlag : 0; 4262 bool NeedsCleanup = false; 4263 if (!Privates.empty()) { 4264 NeedsCleanup = 4265 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4266 if (NeedsCleanup) 4267 Flags = Flags | DestructorsFlag; 4268 } 4269 if (Data.Priority.getInt()) 4270 Flags = Flags | PriorityFlag; 4271 if (D.hasClausesOfKind<OMPDetachClause>()) 4272 Flags = Flags | DetachableFlag; 4273 llvm::Value *TaskFlags = 4274 Data.Final.getPointer() 4275 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4276 CGF.Builder.getInt32(FinalFlag), 4277 CGF.Builder.getInt32(/*C=*/0)) 4278 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4279 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4280 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4281 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4282 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4283 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4284 TaskEntry, KmpRoutineEntryPtrTy)}; 4285 llvm::Value *NewTask; 4286 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4287 // Check if we have any device clause associated with the directive. 4288 const Expr *Device = nullptr; 4289 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4290 Device = C->getDevice(); 4291 // Emit device ID if any otherwise use default value. 4292 llvm::Value *DeviceID; 4293 if (Device) 4294 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4295 CGF.Int64Ty, /*isSigned=*/true); 4296 else 4297 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4298 AllocArgs.push_back(DeviceID); 4299 NewTask = CGF.EmitRuntimeCall( 4300 OMPBuilder.getOrCreateRuntimeFunction( 4301 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4302 AllocArgs); 4303 } else { 4304 NewTask = 4305 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4306 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4307 AllocArgs); 4308 } 4309 // Emit detach clause initialization. 4310 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4311 // task_descriptor); 4312 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4313 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4314 LValue EvtLVal = CGF.EmitLValue(Evt); 4315 4316 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4317 // int gtid, kmp_task_t *task); 4318 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4319 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4320 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4321 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4322 OMPBuilder.getOrCreateRuntimeFunction( 4323 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4324 {Loc, Tid, NewTask}); 4325 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4326 Evt->getExprLoc()); 4327 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4328 } 4329 // Process affinity clauses. 4330 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4331 // Process list of affinity data. 4332 ASTContext &C = CGM.getContext(); 4333 Address AffinitiesArray = Address::invalid(); 4334 // Calculate number of elements to form the array of affinity data. 4335 llvm::Value *NumOfElements = nullptr; 4336 unsigned NumAffinities = 0; 4337 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4338 if (const Expr *Modifier = C->getModifier()) { 4339 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4340 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4341 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4342 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4343 NumOfElements = 4344 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4345 } 4346 } else { 4347 NumAffinities += C->varlist_size(); 4348 } 4349 } 4350 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4351 // Fields ids in kmp_task_affinity_info record. 4352 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4353 4354 QualType KmpTaskAffinityInfoArrayTy; 4355 if (NumOfElements) { 4356 NumOfElements = CGF.Builder.CreateNUWAdd( 4357 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4358 OpaqueValueExpr OVE( 4359 Loc, 4360 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4361 VK_RValue); 4362 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4363 RValue::get(NumOfElements)); 4364 KmpTaskAffinityInfoArrayTy = 4365 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4366 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4367 // Properly emit variable-sized array. 4368 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4369 ImplicitParamDecl::Other); 4370 CGF.EmitVarDecl(*PD); 4371 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4372 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4373 /*isSigned=*/false); 4374 } else { 4375 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4376 KmpTaskAffinityInfoTy, 4377 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4378 ArrayType::Normal, /*IndexTypeQuals=*/0); 4379 AffinitiesArray = 4380 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4381 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4382 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4383 /*isSigned=*/false); 4384 } 4385 4386 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4387 // Fill array by elements without iterators. 4388 unsigned Pos = 0; 4389 bool HasIterator = false; 4390 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4391 if (C->getModifier()) { 4392 HasIterator = true; 4393 continue; 4394 } 4395 for (const Expr *E : C->varlists()) { 4396 llvm::Value *Addr; 4397 llvm::Value *Size; 4398 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4399 LValue Base = 4400 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4401 KmpTaskAffinityInfoTy); 4402 // affs[i].base_addr = &<Affinities[i].second>; 4403 LValue BaseAddrLVal = CGF.EmitLValueForField( 4404 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4405 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4406 BaseAddrLVal); 4407 // affs[i].len = sizeof(<Affinities[i].second>); 4408 LValue LenLVal = CGF.EmitLValueForField( 4409 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4410 CGF.EmitStoreOfScalar(Size, LenLVal); 4411 ++Pos; 4412 } 4413 } 4414 LValue PosLVal; 4415 if (HasIterator) { 4416 PosLVal = CGF.MakeAddrLValue( 4417 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4418 C.getSizeType()); 4419 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4420 } 4421 // Process elements with iterators. 4422 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4423 const Expr *Modifier = C->getModifier(); 4424 if (!Modifier) 4425 continue; 4426 OMPIteratorGeneratorScope IteratorScope( 4427 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4428 for (const Expr *E : C->varlists()) { 4429 llvm::Value *Addr; 4430 llvm::Value *Size; 4431 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4432 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4433 LValue Base = CGF.MakeAddrLValue( 4434 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4435 AffinitiesArray.getAlignment()), 4436 KmpTaskAffinityInfoTy); 4437 // affs[i].base_addr = &<Affinities[i].second>; 4438 LValue BaseAddrLVal = CGF.EmitLValueForField( 4439 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4440 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4441 BaseAddrLVal); 4442 // affs[i].len = sizeof(<Affinities[i].second>); 4443 LValue LenLVal = CGF.EmitLValueForField( 4444 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4445 CGF.EmitStoreOfScalar(Size, LenLVal); 4446 Idx = CGF.Builder.CreateNUWAdd( 4447 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4448 CGF.EmitStoreOfScalar(Idx, PosLVal); 4449 } 4450 } 4451 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4452 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4453 // naffins, kmp_task_affinity_info_t *affin_list); 4454 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4455 llvm::Value *GTid = getThreadID(CGF, Loc); 4456 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4457 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4458 // FIXME: Emit the function and ignore its result for now unless the 4459 // runtime function is properly implemented. 4460 (void)CGF.EmitRuntimeCall( 4461 OMPBuilder.getOrCreateRuntimeFunction( 4462 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4463 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4464 } 4465 llvm::Value *NewTaskNewTaskTTy = 4466 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4467 NewTask, KmpTaskTWithPrivatesPtrTy); 4468 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4469 KmpTaskTWithPrivatesQTy); 4470 LValue TDBase = 4471 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4472 // Fill the data in the resulting kmp_task_t record. 4473 // Copy shareds if there are any. 4474 Address KmpTaskSharedsPtr = Address::invalid(); 4475 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4476 KmpTaskSharedsPtr = 4477 Address(CGF.EmitLoadOfScalar( 4478 CGF.EmitLValueForField( 4479 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4480 KmpTaskTShareds)), 4481 Loc), 4482 CGM.getNaturalTypeAlignment(SharedsTy)); 4483 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4484 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4485 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4486 } 4487 // Emit initial values for private copies (if any). 4488 TaskResultTy Result; 4489 if (!Privates.empty()) { 4490 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4491 SharedsTy, SharedsPtrTy, Data, Privates, 4492 /*ForDup=*/false); 4493 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4494 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4495 Result.TaskDupFn = emitTaskDupFunction( 4496 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4497 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4498 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4499 } 4500 } 4501 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4502 enum { Priority = 0, Destructors = 1 }; 4503 // Provide pointer to function with destructors for privates. 4504 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4505 const RecordDecl *KmpCmplrdataUD = 4506 (*FI)->getType()->getAsUnionType()->getDecl(); 4507 if (NeedsCleanup) { 4508 llvm::Value *DestructorFn = emitDestructorsFunction( 4509 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4510 KmpTaskTWithPrivatesQTy); 4511 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4512 LValue DestructorsLV = CGF.EmitLValueForField( 4513 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4514 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4515 DestructorFn, KmpRoutineEntryPtrTy), 4516 DestructorsLV); 4517 } 4518 // Set priority. 4519 if (Data.Priority.getInt()) { 4520 LValue Data2LV = CGF.EmitLValueForField( 4521 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4522 LValue PriorityLV = CGF.EmitLValueForField( 4523 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4524 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4525 } 4526 Result.NewTask = NewTask; 4527 Result.TaskEntry = TaskEntry; 4528 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4529 Result.TDBase = TDBase; 4530 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4531 return Result; 4532 } 4533 4534 namespace { 4535 /// Dependence kind for RTL. 4536 enum RTLDependenceKindTy { 4537 DepIn = 0x01, 4538 DepInOut = 0x3, 4539 DepMutexInOutSet = 0x4 4540 }; 4541 /// Fields ids in kmp_depend_info record. 4542 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4543 } // namespace 4544 4545 /// Translates internal dependency kind into the runtime kind. 4546 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4547 RTLDependenceKindTy DepKind; 4548 switch (K) { 4549 case OMPC_DEPEND_in: 4550 DepKind = DepIn; 4551 break; 4552 // Out and InOut dependencies must use the same code. 4553 case OMPC_DEPEND_out: 4554 case OMPC_DEPEND_inout: 4555 DepKind = DepInOut; 4556 break; 4557 case OMPC_DEPEND_mutexinoutset: 4558 DepKind = DepMutexInOutSet; 4559 break; 4560 case OMPC_DEPEND_source: 4561 case OMPC_DEPEND_sink: 4562 case OMPC_DEPEND_depobj: 4563 case OMPC_DEPEND_unknown: 4564 llvm_unreachable("Unknown task dependence type"); 4565 } 4566 return DepKind; 4567 } 4568 4569 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4570 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4571 QualType &FlagsTy) { 4572 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4573 if (KmpDependInfoTy.isNull()) { 4574 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4575 KmpDependInfoRD->startDefinition(); 4576 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4577 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4578 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4579 KmpDependInfoRD->completeDefinition(); 4580 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4581 } 4582 } 4583 4584 std::pair<llvm::Value *, LValue> 4585 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4586 SourceLocation Loc) { 4587 ASTContext &C = CGM.getContext(); 4588 QualType FlagsTy; 4589 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4590 RecordDecl *KmpDependInfoRD = 4591 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4592 LValue Base = CGF.EmitLoadOfPointerLValue( 4593 DepobjLVal.getAddress(CGF), 4594 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4595 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4596 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4597 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4598 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4599 Base.getTBAAInfo()); 4600 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4601 Addr.getPointer(), 4602 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4603 LValue NumDepsBase = CGF.MakeAddrLValue( 4604 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4605 Base.getBaseInfo(), Base.getTBAAInfo()); 4606 // NumDeps = deps[i].base_addr; 4607 LValue BaseAddrLVal = CGF.EmitLValueForField( 4608 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4609 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4610 return std::make_pair(NumDeps, Base); 4611 } 4612 4613 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4614 llvm::PointerUnion<unsigned *, LValue *> Pos, 4615 const OMPTaskDataTy::DependData &Data, 4616 Address DependenciesArray) { 4617 CodeGenModule &CGM = CGF.CGM; 4618 ASTContext &C = CGM.getContext(); 4619 QualType FlagsTy; 4620 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4621 RecordDecl *KmpDependInfoRD = 4622 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4623 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4624 4625 OMPIteratorGeneratorScope IteratorScope( 4626 CGF, cast_or_null<OMPIteratorExpr>( 4627 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4628 : nullptr)); 4629 for (const Expr *E : Data.DepExprs) { 4630 llvm::Value *Addr; 4631 llvm::Value *Size; 4632 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4633 LValue Base; 4634 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4635 Base = CGF.MakeAddrLValue( 4636 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4637 } else { 4638 LValue &PosLVal = *Pos.get<LValue *>(); 4639 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4640 Base = CGF.MakeAddrLValue( 4641 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4642 DependenciesArray.getAlignment()), 4643 KmpDependInfoTy); 4644 } 4645 // deps[i].base_addr = &<Dependencies[i].second>; 4646 LValue BaseAddrLVal = CGF.EmitLValueForField( 4647 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4648 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4649 BaseAddrLVal); 4650 // deps[i].len = sizeof(<Dependencies[i].second>); 4651 LValue LenLVal = CGF.EmitLValueForField( 4652 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4653 CGF.EmitStoreOfScalar(Size, LenLVal); 4654 // deps[i].flags = <Dependencies[i].first>; 4655 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4656 LValue FlagsLVal = CGF.EmitLValueForField( 4657 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4658 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4659 FlagsLVal); 4660 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4661 ++(*P); 4662 } else { 4663 LValue &PosLVal = *Pos.get<LValue *>(); 4664 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4665 Idx = CGF.Builder.CreateNUWAdd(Idx, 4666 llvm::ConstantInt::get(Idx->getType(), 1)); 4667 CGF.EmitStoreOfScalar(Idx, PosLVal); 4668 } 4669 } 4670 } 4671 4672 static SmallVector<llvm::Value *, 4> 4673 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4674 const OMPTaskDataTy::DependData &Data) { 4675 assert(Data.DepKind == OMPC_DEPEND_depobj && 4676 "Expected depobj dependecy kind."); 4677 SmallVector<llvm::Value *, 4> Sizes; 4678 SmallVector<LValue, 4> SizeLVals; 4679 ASTContext &C = CGF.getContext(); 4680 QualType FlagsTy; 4681 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4682 RecordDecl *KmpDependInfoRD = 4683 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4684 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4685 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4686 { 4687 OMPIteratorGeneratorScope IteratorScope( 4688 CGF, cast_or_null<OMPIteratorExpr>( 4689 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4690 : nullptr)); 4691 for (const Expr *E : Data.DepExprs) { 4692 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4693 LValue Base = CGF.EmitLoadOfPointerLValue( 4694 DepobjLVal.getAddress(CGF), 4695 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4696 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4697 Base.getAddress(CGF), KmpDependInfoPtrT); 4698 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4699 Base.getTBAAInfo()); 4700 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4701 Addr.getPointer(), 4702 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4703 LValue NumDepsBase = CGF.MakeAddrLValue( 4704 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4705 Base.getBaseInfo(), Base.getTBAAInfo()); 4706 // NumDeps = deps[i].base_addr; 4707 LValue BaseAddrLVal = CGF.EmitLValueForField( 4708 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4709 llvm::Value *NumDeps = 4710 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4711 LValue NumLVal = CGF.MakeAddrLValue( 4712 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4713 C.getUIntPtrType()); 4714 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4715 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4716 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4717 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4718 CGF.EmitStoreOfScalar(Add, NumLVal); 4719 SizeLVals.push_back(NumLVal); 4720 } 4721 } 4722 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4723 llvm::Value *Size = 4724 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4725 Sizes.push_back(Size); 4726 } 4727 return Sizes; 4728 } 4729 4730 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4731 LValue PosLVal, 4732 const OMPTaskDataTy::DependData &Data, 4733 Address DependenciesArray) { 4734 assert(Data.DepKind == OMPC_DEPEND_depobj && 4735 "Expected depobj dependecy kind."); 4736 ASTContext &C = CGF.getContext(); 4737 QualType FlagsTy; 4738 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4739 RecordDecl *KmpDependInfoRD = 4740 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4741 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4742 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4743 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4744 { 4745 OMPIteratorGeneratorScope IteratorScope( 4746 CGF, cast_or_null<OMPIteratorExpr>( 4747 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4748 : nullptr)); 4749 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4750 const Expr *E = Data.DepExprs[I]; 4751 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4752 LValue Base = CGF.EmitLoadOfPointerLValue( 4753 DepobjLVal.getAddress(CGF), 4754 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4755 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4756 Base.getAddress(CGF), KmpDependInfoPtrT); 4757 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4758 Base.getTBAAInfo()); 4759 4760 // Get number of elements in a single depobj. 4761 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4762 Addr.getPointer(), 4763 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4764 LValue NumDepsBase = CGF.MakeAddrLValue( 4765 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4766 Base.getBaseInfo(), Base.getTBAAInfo()); 4767 // NumDeps = deps[i].base_addr; 4768 LValue BaseAddrLVal = CGF.EmitLValueForField( 4769 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4770 llvm::Value *NumDeps = 4771 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4772 4773 // memcopy dependency data. 4774 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4775 ElSize, 4776 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4777 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4778 Address DepAddr = 4779 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4780 DependenciesArray.getAlignment()); 4781 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4782 4783 // Increase pos. 4784 // pos += size; 4785 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4786 CGF.EmitStoreOfScalar(Add, PosLVal); 4787 } 4788 } 4789 } 4790 4791 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4792 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4793 SourceLocation Loc) { 4794 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4795 return D.DepExprs.empty(); 4796 })) 4797 return std::make_pair(nullptr, Address::invalid()); 4798 // Process list of dependencies. 4799 ASTContext &C = CGM.getContext(); 4800 Address DependenciesArray = Address::invalid(); 4801 llvm::Value *NumOfElements = nullptr; 4802 unsigned NumDependencies = std::accumulate( 4803 Dependencies.begin(), Dependencies.end(), 0, 4804 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4805 return D.DepKind == OMPC_DEPEND_depobj 4806 ? V 4807 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4808 }); 4809 QualType FlagsTy; 4810 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4811 bool HasDepobjDeps = false; 4812 bool HasRegularWithIterators = false; 4813 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4814 llvm::Value *NumOfRegularWithIterators = 4815 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4816 // Calculate number of depobj dependecies and regular deps with the iterators. 4817 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4818 if (D.DepKind == OMPC_DEPEND_depobj) { 4819 SmallVector<llvm::Value *, 4> Sizes = 4820 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4821 for (llvm::Value *Size : Sizes) { 4822 NumOfDepobjElements = 4823 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4824 } 4825 HasDepobjDeps = true; 4826 continue; 4827 } 4828 // Include number of iterations, if any. 4829 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4830 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4831 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4832 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4833 NumOfRegularWithIterators = 4834 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4835 } 4836 HasRegularWithIterators = true; 4837 continue; 4838 } 4839 } 4840 4841 QualType KmpDependInfoArrayTy; 4842 if (HasDepobjDeps || HasRegularWithIterators) { 4843 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4844 /*isSigned=*/false); 4845 if (HasDepobjDeps) { 4846 NumOfElements = 4847 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4848 } 4849 if (HasRegularWithIterators) { 4850 NumOfElements = 4851 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4852 } 4853 OpaqueValueExpr OVE(Loc, 4854 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4855 VK_RValue); 4856 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4857 RValue::get(NumOfElements)); 4858 KmpDependInfoArrayTy = 4859 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4860 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4861 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4862 // Properly emit variable-sized array. 4863 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4864 ImplicitParamDecl::Other); 4865 CGF.EmitVarDecl(*PD); 4866 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4867 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4868 /*isSigned=*/false); 4869 } else { 4870 KmpDependInfoArrayTy = C.getConstantArrayType( 4871 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4872 ArrayType::Normal, /*IndexTypeQuals=*/0); 4873 DependenciesArray = 4874 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4875 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4876 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4877 /*isSigned=*/false); 4878 } 4879 unsigned Pos = 0; 4880 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4881 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4882 Dependencies[I].IteratorExpr) 4883 continue; 4884 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4885 DependenciesArray); 4886 } 4887 // Copy regular dependecies with iterators. 4888 LValue PosLVal = CGF.MakeAddrLValue( 4889 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4890 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4891 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4892 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4893 !Dependencies[I].IteratorExpr) 4894 continue; 4895 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4896 DependenciesArray); 4897 } 4898 // Copy final depobj arrays without iterators. 4899 if (HasDepobjDeps) { 4900 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4901 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4902 continue; 4903 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4904 DependenciesArray); 4905 } 4906 } 4907 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4908 DependenciesArray, CGF.VoidPtrTy); 4909 return std::make_pair(NumOfElements, DependenciesArray); 4910 } 4911 4912 Address CGOpenMPRuntime::emitDepobjDependClause( 4913 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4914 SourceLocation Loc) { 4915 if (Dependencies.DepExprs.empty()) 4916 return Address::invalid(); 4917 // Process list of dependencies. 4918 ASTContext &C = CGM.getContext(); 4919 Address DependenciesArray = Address::invalid(); 4920 unsigned NumDependencies = Dependencies.DepExprs.size(); 4921 QualType FlagsTy; 4922 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4923 RecordDecl *KmpDependInfoRD = 4924 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4925 4926 llvm::Value *Size; 4927 // Define type kmp_depend_info[<Dependencies.size()>]; 4928 // For depobj reserve one extra element to store the number of elements. 4929 // It is required to handle depobj(x) update(in) construct. 4930 // kmp_depend_info[<Dependencies.size()>] deps; 4931 llvm::Value *NumDepsVal; 4932 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4933 if (const auto *IE = 4934 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4935 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4936 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4937 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4938 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4939 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4940 } 4941 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4942 NumDepsVal); 4943 CharUnits SizeInBytes = 4944 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4945 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4946 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4947 NumDepsVal = 4948 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4949 } else { 4950 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4951 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4952 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4953 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4954 Size = CGM.getSize(Sz.alignTo(Align)); 4955 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4956 } 4957 // Need to allocate on the dynamic memory. 4958 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4959 // Use default allocator. 4960 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4961 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4962 4963 llvm::Value *Addr = 4964 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4965 CGM.getModule(), OMPRTL___kmpc_alloc), 4966 Args, ".dep.arr.addr"); 4967 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4968 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4969 DependenciesArray = Address(Addr, Align); 4970 // Write number of elements in the first element of array for depobj. 4971 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4972 // deps[i].base_addr = NumDependencies; 4973 LValue BaseAddrLVal = CGF.EmitLValueForField( 4974 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4975 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4976 llvm::PointerUnion<unsigned *, LValue *> Pos; 4977 unsigned Idx = 1; 4978 LValue PosLVal; 4979 if (Dependencies.IteratorExpr) { 4980 PosLVal = CGF.MakeAddrLValue( 4981 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4982 C.getSizeType()); 4983 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4984 /*IsInit=*/true); 4985 Pos = &PosLVal; 4986 } else { 4987 Pos = &Idx; 4988 } 4989 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4990 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4991 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4992 return DependenciesArray; 4993 } 4994 4995 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4996 SourceLocation Loc) { 4997 ASTContext &C = CGM.getContext(); 4998 QualType FlagsTy; 4999 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5000 LValue Base = CGF.EmitLoadOfPointerLValue( 5001 DepobjLVal.getAddress(CGF), 5002 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5003 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5004 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5005 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5006 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5007 Addr.getPointer(), 5008 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5009 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5010 CGF.VoidPtrTy); 5011 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5012 // Use default allocator. 5013 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5014 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5015 5016 // _kmpc_free(gtid, addr, nullptr); 5017 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5018 CGM.getModule(), OMPRTL___kmpc_free), 5019 Args); 5020 } 5021 5022 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5023 OpenMPDependClauseKind NewDepKind, 5024 SourceLocation Loc) { 5025 ASTContext &C = CGM.getContext(); 5026 QualType FlagsTy; 5027 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5028 RecordDecl *KmpDependInfoRD = 5029 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5030 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5031 llvm::Value *NumDeps; 5032 LValue Base; 5033 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5034 5035 Address Begin = Base.getAddress(CGF); 5036 // Cast from pointer to array type to pointer to single element. 5037 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5038 // The basic structure here is a while-do loop. 5039 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5040 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5041 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5042 CGF.EmitBlock(BodyBB); 5043 llvm::PHINode *ElementPHI = 5044 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5045 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5046 Begin = Address(ElementPHI, Begin.getAlignment()); 5047 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5048 Base.getTBAAInfo()); 5049 // deps[i].flags = NewDepKind; 5050 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5051 LValue FlagsLVal = CGF.EmitLValueForField( 5052 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5053 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5054 FlagsLVal); 5055 5056 // Shift the address forward by one element. 5057 Address ElementNext = 5058 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5059 ElementPHI->addIncoming(ElementNext.getPointer(), 5060 CGF.Builder.GetInsertBlock()); 5061 llvm::Value *IsEmpty = 5062 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5063 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5064 // Done. 5065 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5066 } 5067 5068 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5069 const OMPExecutableDirective &D, 5070 llvm::Function *TaskFunction, 5071 QualType SharedsTy, Address Shareds, 5072 const Expr *IfCond, 5073 const OMPTaskDataTy &Data) { 5074 if (!CGF.HaveInsertPoint()) 5075 return; 5076 5077 TaskResultTy Result = 5078 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5079 llvm::Value *NewTask = Result.NewTask; 5080 llvm::Function *TaskEntry = Result.TaskEntry; 5081 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5082 LValue TDBase = Result.TDBase; 5083 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5084 // Process list of dependences. 5085 Address DependenciesArray = Address::invalid(); 5086 llvm::Value *NumOfElements; 5087 std::tie(NumOfElements, DependenciesArray) = 5088 emitDependClause(CGF, Data.Dependences, Loc); 5089 5090 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5091 // libcall. 5092 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5093 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5094 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5095 // list is not empty 5096 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5097 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5098 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5099 llvm::Value *DepTaskArgs[7]; 5100 if (!Data.Dependences.empty()) { 5101 DepTaskArgs[0] = UpLoc; 5102 DepTaskArgs[1] = ThreadID; 5103 DepTaskArgs[2] = NewTask; 5104 DepTaskArgs[3] = NumOfElements; 5105 DepTaskArgs[4] = DependenciesArray.getPointer(); 5106 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5107 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5108 } 5109 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5110 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5111 if (!Data.Tied) { 5112 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5113 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5114 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5115 } 5116 if (!Data.Dependences.empty()) { 5117 CGF.EmitRuntimeCall( 5118 OMPBuilder.getOrCreateRuntimeFunction( 5119 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5120 DepTaskArgs); 5121 } else { 5122 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5123 CGM.getModule(), OMPRTL___kmpc_omp_task), 5124 TaskArgs); 5125 } 5126 // Check if parent region is untied and build return for untied task; 5127 if (auto *Region = 5128 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5129 Region->emitUntiedSwitch(CGF); 5130 }; 5131 5132 llvm::Value *DepWaitTaskArgs[6]; 5133 if (!Data.Dependences.empty()) { 5134 DepWaitTaskArgs[0] = UpLoc; 5135 DepWaitTaskArgs[1] = ThreadID; 5136 DepWaitTaskArgs[2] = NumOfElements; 5137 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5138 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5139 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5140 } 5141 auto &M = CGM.getModule(); 5142 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5143 TaskEntry, &Data, &DepWaitTaskArgs, 5144 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5145 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5146 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5147 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5148 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5149 // is specified. 5150 if (!Data.Dependences.empty()) 5151 CGF.EmitRuntimeCall( 5152 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5153 DepWaitTaskArgs); 5154 // Call proxy_task_entry(gtid, new_task); 5155 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5156 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5157 Action.Enter(CGF); 5158 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5159 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5160 OutlinedFnArgs); 5161 }; 5162 5163 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5164 // kmp_task_t *new_task); 5165 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5166 // kmp_task_t *new_task); 5167 RegionCodeGenTy RCG(CodeGen); 5168 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5169 M, OMPRTL___kmpc_omp_task_begin_if0), 5170 TaskArgs, 5171 OMPBuilder.getOrCreateRuntimeFunction( 5172 M, OMPRTL___kmpc_omp_task_complete_if0), 5173 TaskArgs); 5174 RCG.setAction(Action); 5175 RCG(CGF); 5176 }; 5177 5178 if (IfCond) { 5179 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5180 } else { 5181 RegionCodeGenTy ThenRCG(ThenCodeGen); 5182 ThenRCG(CGF); 5183 } 5184 } 5185 5186 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5187 const OMPLoopDirective &D, 5188 llvm::Function *TaskFunction, 5189 QualType SharedsTy, Address Shareds, 5190 const Expr *IfCond, 5191 const OMPTaskDataTy &Data) { 5192 if (!CGF.HaveInsertPoint()) 5193 return; 5194 TaskResultTy Result = 5195 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5196 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5197 // libcall. 5198 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5199 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5200 // sched, kmp_uint64 grainsize, void *task_dup); 5201 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5202 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5203 llvm::Value *IfVal; 5204 if (IfCond) { 5205 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5206 /*isSigned=*/true); 5207 } else { 5208 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5209 } 5210 5211 LValue LBLVal = CGF.EmitLValueForField( 5212 Result.TDBase, 5213 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5214 const auto *LBVar = 5215 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5216 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5217 LBLVal.getQuals(), 5218 /*IsInitializer=*/true); 5219 LValue UBLVal = CGF.EmitLValueForField( 5220 Result.TDBase, 5221 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5222 const auto *UBVar = 5223 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5224 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5225 UBLVal.getQuals(), 5226 /*IsInitializer=*/true); 5227 LValue StLVal = CGF.EmitLValueForField( 5228 Result.TDBase, 5229 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5230 const auto *StVar = 5231 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5232 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5233 StLVal.getQuals(), 5234 /*IsInitializer=*/true); 5235 // Store reductions address. 5236 LValue RedLVal = CGF.EmitLValueForField( 5237 Result.TDBase, 5238 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5239 if (Data.Reductions) { 5240 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5241 } else { 5242 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5243 CGF.getContext().VoidPtrTy); 5244 } 5245 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5246 llvm::Value *TaskArgs[] = { 5247 UpLoc, 5248 ThreadID, 5249 Result.NewTask, 5250 IfVal, 5251 LBLVal.getPointer(CGF), 5252 UBLVal.getPointer(CGF), 5253 CGF.EmitLoadOfScalar(StLVal, Loc), 5254 llvm::ConstantInt::getSigned( 5255 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5256 llvm::ConstantInt::getSigned( 5257 CGF.IntTy, Data.Schedule.getPointer() 5258 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5259 : NoSchedule), 5260 Data.Schedule.getPointer() 5261 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5262 /*isSigned=*/false) 5263 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5264 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5265 Result.TaskDupFn, CGF.VoidPtrTy) 5266 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5267 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5268 CGM.getModule(), OMPRTL___kmpc_taskloop), 5269 TaskArgs); 5270 } 5271 5272 /// Emit reduction operation for each element of array (required for 5273 /// array sections) LHS op = RHS. 5274 /// \param Type Type of array. 5275 /// \param LHSVar Variable on the left side of the reduction operation 5276 /// (references element of array in original variable). 5277 /// \param RHSVar Variable on the right side of the reduction operation 5278 /// (references element of array in original variable). 5279 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5280 /// RHSVar. 5281 static void EmitOMPAggregateReduction( 5282 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5283 const VarDecl *RHSVar, 5284 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5285 const Expr *, const Expr *)> &RedOpGen, 5286 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5287 const Expr *UpExpr = nullptr) { 5288 // Perform element-by-element initialization. 5289 QualType ElementTy; 5290 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5291 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5292 5293 // Drill down to the base element type on both arrays. 5294 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5295 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5296 5297 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5298 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5299 // Cast from pointer to array type to pointer to single element. 5300 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5301 // The basic structure here is a while-do loop. 5302 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5303 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5304 llvm::Value *IsEmpty = 5305 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5306 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5307 5308 // Enter the loop body, making that address the current address. 5309 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5310 CGF.EmitBlock(BodyBB); 5311 5312 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5313 5314 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5315 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5316 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5317 Address RHSElementCurrent = 5318 Address(RHSElementPHI, 5319 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5320 5321 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5322 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5323 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5324 Address LHSElementCurrent = 5325 Address(LHSElementPHI, 5326 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5327 5328 // Emit copy. 5329 CodeGenFunction::OMPPrivateScope Scope(CGF); 5330 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5331 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5332 Scope.Privatize(); 5333 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5334 Scope.ForceCleanup(); 5335 5336 // Shift the address forward by one element. 5337 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5338 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5339 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5340 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5341 // Check whether we've reached the end. 5342 llvm::Value *Done = 5343 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5344 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5345 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5346 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5347 5348 // Done. 5349 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5350 } 5351 5352 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5353 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5354 /// UDR combiner function. 5355 static void emitReductionCombiner(CodeGenFunction &CGF, 5356 const Expr *ReductionOp) { 5357 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5358 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5359 if (const auto *DRE = 5360 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5361 if (const auto *DRD = 5362 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5363 std::pair<llvm::Function *, llvm::Function *> Reduction = 5364 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5365 RValue Func = RValue::get(Reduction.first); 5366 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5367 CGF.EmitIgnoredExpr(ReductionOp); 5368 return; 5369 } 5370 CGF.EmitIgnoredExpr(ReductionOp); 5371 } 5372 5373 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5374 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5375 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5376 ArrayRef<const Expr *> ReductionOps) { 5377 ASTContext &C = CGM.getContext(); 5378 5379 // void reduction_func(void *LHSArg, void *RHSArg); 5380 FunctionArgList Args; 5381 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5382 ImplicitParamDecl::Other); 5383 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5384 ImplicitParamDecl::Other); 5385 Args.push_back(&LHSArg); 5386 Args.push_back(&RHSArg); 5387 const auto &CGFI = 5388 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5389 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5390 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5391 llvm::GlobalValue::InternalLinkage, Name, 5392 &CGM.getModule()); 5393 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5394 Fn->setDoesNotRecurse(); 5395 CodeGenFunction CGF(CGM); 5396 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5397 5398 // Dst = (void*[n])(LHSArg); 5399 // Src = (void*[n])(RHSArg); 5400 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5401 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5402 ArgsType), CGF.getPointerAlign()); 5403 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5405 ArgsType), CGF.getPointerAlign()); 5406 5407 // ... 5408 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5409 // ... 5410 CodeGenFunction::OMPPrivateScope Scope(CGF); 5411 auto IPriv = Privates.begin(); 5412 unsigned Idx = 0; 5413 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5414 const auto *RHSVar = 5415 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5416 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5417 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5418 }); 5419 const auto *LHSVar = 5420 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5421 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5422 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5423 }); 5424 QualType PrivTy = (*IPriv)->getType(); 5425 if (PrivTy->isVariablyModifiedType()) { 5426 // Get array size and emit VLA type. 5427 ++Idx; 5428 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5429 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5430 const VariableArrayType *VLA = 5431 CGF.getContext().getAsVariableArrayType(PrivTy); 5432 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5433 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5434 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5435 CGF.EmitVariablyModifiedType(PrivTy); 5436 } 5437 } 5438 Scope.Privatize(); 5439 IPriv = Privates.begin(); 5440 auto ILHS = LHSExprs.begin(); 5441 auto IRHS = RHSExprs.begin(); 5442 for (const Expr *E : ReductionOps) { 5443 if ((*IPriv)->getType()->isArrayType()) { 5444 // Emit reduction for array section. 5445 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5446 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5447 EmitOMPAggregateReduction( 5448 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5449 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5450 emitReductionCombiner(CGF, E); 5451 }); 5452 } else { 5453 // Emit reduction for array subscript or single variable. 5454 emitReductionCombiner(CGF, E); 5455 } 5456 ++IPriv; 5457 ++ILHS; 5458 ++IRHS; 5459 } 5460 Scope.ForceCleanup(); 5461 CGF.FinishFunction(); 5462 return Fn; 5463 } 5464 5465 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5466 const Expr *ReductionOp, 5467 const Expr *PrivateRef, 5468 const DeclRefExpr *LHS, 5469 const DeclRefExpr *RHS) { 5470 if (PrivateRef->getType()->isArrayType()) { 5471 // Emit reduction for array section. 5472 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5473 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5474 EmitOMPAggregateReduction( 5475 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5476 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5477 emitReductionCombiner(CGF, ReductionOp); 5478 }); 5479 } else { 5480 // Emit reduction for array subscript or single variable. 5481 emitReductionCombiner(CGF, ReductionOp); 5482 } 5483 } 5484 5485 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5486 ArrayRef<const Expr *> Privates, 5487 ArrayRef<const Expr *> LHSExprs, 5488 ArrayRef<const Expr *> RHSExprs, 5489 ArrayRef<const Expr *> ReductionOps, 5490 ReductionOptionsTy Options) { 5491 if (!CGF.HaveInsertPoint()) 5492 return; 5493 5494 bool WithNowait = Options.WithNowait; 5495 bool SimpleReduction = Options.SimpleReduction; 5496 5497 // Next code should be emitted for reduction: 5498 // 5499 // static kmp_critical_name lock = { 0 }; 5500 // 5501 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5502 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5503 // ... 5504 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5505 // *(Type<n>-1*)rhs[<n>-1]); 5506 // } 5507 // 5508 // ... 5509 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5510 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5511 // RedList, reduce_func, &<lock>)) { 5512 // case 1: 5513 // ... 5514 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5515 // ... 5516 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5517 // break; 5518 // case 2: 5519 // ... 5520 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5521 // ... 5522 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5523 // break; 5524 // default:; 5525 // } 5526 // 5527 // if SimpleReduction is true, only the next code is generated: 5528 // ... 5529 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5530 // ... 5531 5532 ASTContext &C = CGM.getContext(); 5533 5534 if (SimpleReduction) { 5535 CodeGenFunction::RunCleanupsScope Scope(CGF); 5536 auto IPriv = Privates.begin(); 5537 auto ILHS = LHSExprs.begin(); 5538 auto IRHS = RHSExprs.begin(); 5539 for (const Expr *E : ReductionOps) { 5540 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5541 cast<DeclRefExpr>(*IRHS)); 5542 ++IPriv; 5543 ++ILHS; 5544 ++IRHS; 5545 } 5546 return; 5547 } 5548 5549 // 1. Build a list of reduction variables. 5550 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5551 auto Size = RHSExprs.size(); 5552 for (const Expr *E : Privates) { 5553 if (E->getType()->isVariablyModifiedType()) 5554 // Reserve place for array size. 5555 ++Size; 5556 } 5557 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5558 QualType ReductionArrayTy = 5559 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5560 /*IndexTypeQuals=*/0); 5561 Address ReductionList = 5562 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5563 auto IPriv = Privates.begin(); 5564 unsigned Idx = 0; 5565 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5566 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5567 CGF.Builder.CreateStore( 5568 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5569 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5570 Elem); 5571 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5572 // Store array size. 5573 ++Idx; 5574 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5575 llvm::Value *Size = CGF.Builder.CreateIntCast( 5576 CGF.getVLASize( 5577 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5578 .NumElts, 5579 CGF.SizeTy, /*isSigned=*/false); 5580 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5581 Elem); 5582 } 5583 } 5584 5585 // 2. Emit reduce_func(). 5586 llvm::Function *ReductionFn = emitReductionFunction( 5587 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5588 LHSExprs, RHSExprs, ReductionOps); 5589 5590 // 3. Create static kmp_critical_name lock = { 0 }; 5591 std::string Name = getName({"reduction"}); 5592 llvm::Value *Lock = getCriticalRegionLock(Name); 5593 5594 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5595 // RedList, reduce_func, &<lock>); 5596 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5597 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5598 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5599 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5600 ReductionList.getPointer(), CGF.VoidPtrTy); 5601 llvm::Value *Args[] = { 5602 IdentTLoc, // ident_t *<loc> 5603 ThreadId, // i32 <gtid> 5604 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5605 ReductionArrayTySize, // size_type sizeof(RedList) 5606 RL, // void *RedList 5607 ReductionFn, // void (*) (void *, void *) <reduce_func> 5608 Lock // kmp_critical_name *&<lock> 5609 }; 5610 llvm::Value *Res = CGF.EmitRuntimeCall( 5611 OMPBuilder.getOrCreateRuntimeFunction( 5612 CGM.getModule(), 5613 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5614 Args); 5615 5616 // 5. Build switch(res) 5617 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5618 llvm::SwitchInst *SwInst = 5619 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5620 5621 // 6. Build case 1: 5622 // ... 5623 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5624 // ... 5625 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5626 // break; 5627 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5628 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5629 CGF.EmitBlock(Case1BB); 5630 5631 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5632 llvm::Value *EndArgs[] = { 5633 IdentTLoc, // ident_t *<loc> 5634 ThreadId, // i32 <gtid> 5635 Lock // kmp_critical_name *&<lock> 5636 }; 5637 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5638 CodeGenFunction &CGF, PrePostActionTy &Action) { 5639 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5640 auto IPriv = Privates.begin(); 5641 auto ILHS = LHSExprs.begin(); 5642 auto IRHS = RHSExprs.begin(); 5643 for (const Expr *E : ReductionOps) { 5644 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5645 cast<DeclRefExpr>(*IRHS)); 5646 ++IPriv; 5647 ++ILHS; 5648 ++IRHS; 5649 } 5650 }; 5651 RegionCodeGenTy RCG(CodeGen); 5652 CommonActionTy Action( 5653 nullptr, llvm::None, 5654 OMPBuilder.getOrCreateRuntimeFunction( 5655 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5656 : OMPRTL___kmpc_end_reduce), 5657 EndArgs); 5658 RCG.setAction(Action); 5659 RCG(CGF); 5660 5661 CGF.EmitBranch(DefaultBB); 5662 5663 // 7. Build case 2: 5664 // ... 5665 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5666 // ... 5667 // break; 5668 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5669 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5670 CGF.EmitBlock(Case2BB); 5671 5672 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5673 CodeGenFunction &CGF, PrePostActionTy &Action) { 5674 auto ILHS = LHSExprs.begin(); 5675 auto IRHS = RHSExprs.begin(); 5676 auto IPriv = Privates.begin(); 5677 for (const Expr *E : ReductionOps) { 5678 const Expr *XExpr = nullptr; 5679 const Expr *EExpr = nullptr; 5680 const Expr *UpExpr = nullptr; 5681 BinaryOperatorKind BO = BO_Comma; 5682 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5683 if (BO->getOpcode() == BO_Assign) { 5684 XExpr = BO->getLHS(); 5685 UpExpr = BO->getRHS(); 5686 } 5687 } 5688 // Try to emit update expression as a simple atomic. 5689 const Expr *RHSExpr = UpExpr; 5690 if (RHSExpr) { 5691 // Analyze RHS part of the whole expression. 5692 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5693 RHSExpr->IgnoreParenImpCasts())) { 5694 // If this is a conditional operator, analyze its condition for 5695 // min/max reduction operator. 5696 RHSExpr = ACO->getCond(); 5697 } 5698 if (const auto *BORHS = 5699 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5700 EExpr = BORHS->getRHS(); 5701 BO = BORHS->getOpcode(); 5702 } 5703 } 5704 if (XExpr) { 5705 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5706 auto &&AtomicRedGen = [BO, VD, 5707 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5708 const Expr *EExpr, const Expr *UpExpr) { 5709 LValue X = CGF.EmitLValue(XExpr); 5710 RValue E; 5711 if (EExpr) 5712 E = CGF.EmitAnyExpr(EExpr); 5713 CGF.EmitOMPAtomicSimpleUpdateExpr( 5714 X, E, BO, /*IsXLHSInRHSPart=*/true, 5715 llvm::AtomicOrdering::Monotonic, Loc, 5716 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5717 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5718 PrivateScope.addPrivate( 5719 VD, [&CGF, VD, XRValue, Loc]() { 5720 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5721 CGF.emitOMPSimpleStore( 5722 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5723 VD->getType().getNonReferenceType(), Loc); 5724 return LHSTemp; 5725 }); 5726 (void)PrivateScope.Privatize(); 5727 return CGF.EmitAnyExpr(UpExpr); 5728 }); 5729 }; 5730 if ((*IPriv)->getType()->isArrayType()) { 5731 // Emit atomic reduction for array section. 5732 const auto *RHSVar = 5733 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5734 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5735 AtomicRedGen, XExpr, EExpr, UpExpr); 5736 } else { 5737 // Emit atomic reduction for array subscript or single variable. 5738 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5739 } 5740 } else { 5741 // Emit as a critical region. 5742 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5743 const Expr *, const Expr *) { 5744 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5745 std::string Name = RT.getName({"atomic_reduction"}); 5746 RT.emitCriticalRegion( 5747 CGF, Name, 5748 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5749 Action.Enter(CGF); 5750 emitReductionCombiner(CGF, E); 5751 }, 5752 Loc); 5753 }; 5754 if ((*IPriv)->getType()->isArrayType()) { 5755 const auto *LHSVar = 5756 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5757 const auto *RHSVar = 5758 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5759 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5760 CritRedGen); 5761 } else { 5762 CritRedGen(CGF, nullptr, nullptr, nullptr); 5763 } 5764 } 5765 ++ILHS; 5766 ++IRHS; 5767 ++IPriv; 5768 } 5769 }; 5770 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5771 if (!WithNowait) { 5772 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5773 llvm::Value *EndArgs[] = { 5774 IdentTLoc, // ident_t *<loc> 5775 ThreadId, // i32 <gtid> 5776 Lock // kmp_critical_name *&<lock> 5777 }; 5778 CommonActionTy Action(nullptr, llvm::None, 5779 OMPBuilder.getOrCreateRuntimeFunction( 5780 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5781 EndArgs); 5782 AtomicRCG.setAction(Action); 5783 AtomicRCG(CGF); 5784 } else { 5785 AtomicRCG(CGF); 5786 } 5787 5788 CGF.EmitBranch(DefaultBB); 5789 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5790 } 5791 5792 /// Generates unique name for artificial threadprivate variables. 5793 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5794 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5795 const Expr *Ref) { 5796 SmallString<256> Buffer; 5797 llvm::raw_svector_ostream Out(Buffer); 5798 const clang::DeclRefExpr *DE; 5799 const VarDecl *D = ::getBaseDecl(Ref, DE); 5800 if (!D) 5801 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5802 D = D->getCanonicalDecl(); 5803 std::string Name = CGM.getOpenMPRuntime().getName( 5804 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5805 Out << Prefix << Name << "_" 5806 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5807 return std::string(Out.str()); 5808 } 5809 5810 /// Emits reduction initializer function: 5811 /// \code 5812 /// void @.red_init(void* %arg, void* %orig) { 5813 /// %0 = bitcast void* %arg to <type>* 5814 /// store <type> <init>, <type>* %0 5815 /// ret void 5816 /// } 5817 /// \endcode 5818 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5819 SourceLocation Loc, 5820 ReductionCodeGen &RCG, unsigned N) { 5821 ASTContext &C = CGM.getContext(); 5822 QualType VoidPtrTy = C.VoidPtrTy; 5823 VoidPtrTy.addRestrict(); 5824 FunctionArgList Args; 5825 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5826 ImplicitParamDecl::Other); 5827 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5828 ImplicitParamDecl::Other); 5829 Args.emplace_back(&Param); 5830 Args.emplace_back(&ParamOrig); 5831 const auto &FnInfo = 5832 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5833 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5834 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5835 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5836 Name, &CGM.getModule()); 5837 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5838 Fn->setDoesNotRecurse(); 5839 CodeGenFunction CGF(CGM); 5840 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5841 Address PrivateAddr = CGF.EmitLoadOfPointer( 5842 CGF.GetAddrOfLocalVar(&Param), 5843 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5844 llvm::Value *Size = nullptr; 5845 // If the size of the reduction item is non-constant, load it from global 5846 // threadprivate variable. 5847 if (RCG.getSizes(N).second) { 5848 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5849 CGF, CGM.getContext().getSizeType(), 5850 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5851 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5852 CGM.getContext().getSizeType(), Loc); 5853 } 5854 RCG.emitAggregateType(CGF, N, Size); 5855 LValue OrigLVal; 5856 // If initializer uses initializer from declare reduction construct, emit a 5857 // pointer to the address of the original reduction item (reuired by reduction 5858 // initializer) 5859 if (RCG.usesReductionInitializer(N)) { 5860 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5861 SharedAddr = CGF.EmitLoadOfPointer( 5862 SharedAddr, 5863 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5864 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5865 } else { 5866 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5867 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5868 CGM.getContext().VoidPtrTy); 5869 } 5870 // Emit the initializer: 5871 // %0 = bitcast void* %arg to <type>* 5872 // store <type> <init>, <type>* %0 5873 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5874 [](CodeGenFunction &) { return false; }); 5875 CGF.FinishFunction(); 5876 return Fn; 5877 } 5878 5879 /// Emits reduction combiner function: 5880 /// \code 5881 /// void @.red_comb(void* %arg0, void* %arg1) { 5882 /// %lhs = bitcast void* %arg0 to <type>* 5883 /// %rhs = bitcast void* %arg1 to <type>* 5884 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5885 /// store <type> %2, <type>* %lhs 5886 /// ret void 5887 /// } 5888 /// \endcode 5889 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5890 SourceLocation Loc, 5891 ReductionCodeGen &RCG, unsigned N, 5892 const Expr *ReductionOp, 5893 const Expr *LHS, const Expr *RHS, 5894 const Expr *PrivateRef) { 5895 ASTContext &C = CGM.getContext(); 5896 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5897 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5898 FunctionArgList Args; 5899 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5900 C.VoidPtrTy, ImplicitParamDecl::Other); 5901 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5902 ImplicitParamDecl::Other); 5903 Args.emplace_back(&ParamInOut); 5904 Args.emplace_back(&ParamIn); 5905 const auto &FnInfo = 5906 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5907 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5908 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5909 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5910 Name, &CGM.getModule()); 5911 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5912 Fn->setDoesNotRecurse(); 5913 CodeGenFunction CGF(CGM); 5914 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5915 llvm::Value *Size = nullptr; 5916 // If the size of the reduction item is non-constant, load it from global 5917 // threadprivate variable. 5918 if (RCG.getSizes(N).second) { 5919 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5920 CGF, CGM.getContext().getSizeType(), 5921 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5922 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5923 CGM.getContext().getSizeType(), Loc); 5924 } 5925 RCG.emitAggregateType(CGF, N, Size); 5926 // Remap lhs and rhs variables to the addresses of the function arguments. 5927 // %lhs = bitcast void* %arg0 to <type>* 5928 // %rhs = bitcast void* %arg1 to <type>* 5929 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5930 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5931 // Pull out the pointer to the variable. 5932 Address PtrAddr = CGF.EmitLoadOfPointer( 5933 CGF.GetAddrOfLocalVar(&ParamInOut), 5934 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5935 return CGF.Builder.CreateElementBitCast( 5936 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5937 }); 5938 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5939 // Pull out the pointer to the variable. 5940 Address PtrAddr = CGF.EmitLoadOfPointer( 5941 CGF.GetAddrOfLocalVar(&ParamIn), 5942 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5943 return CGF.Builder.CreateElementBitCast( 5944 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5945 }); 5946 PrivateScope.Privatize(); 5947 // Emit the combiner body: 5948 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5949 // store <type> %2, <type>* %lhs 5950 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5951 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5952 cast<DeclRefExpr>(RHS)); 5953 CGF.FinishFunction(); 5954 return Fn; 5955 } 5956 5957 /// Emits reduction finalizer function: 5958 /// \code 5959 /// void @.red_fini(void* %arg) { 5960 /// %0 = bitcast void* %arg to <type>* 5961 /// <destroy>(<type>* %0) 5962 /// ret void 5963 /// } 5964 /// \endcode 5965 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5966 SourceLocation Loc, 5967 ReductionCodeGen &RCG, unsigned N) { 5968 if (!RCG.needCleanups(N)) 5969 return nullptr; 5970 ASTContext &C = CGM.getContext(); 5971 FunctionArgList Args; 5972 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5973 ImplicitParamDecl::Other); 5974 Args.emplace_back(&Param); 5975 const auto &FnInfo = 5976 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5977 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5978 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5979 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5980 Name, &CGM.getModule()); 5981 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5982 Fn->setDoesNotRecurse(); 5983 CodeGenFunction CGF(CGM); 5984 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5985 Address PrivateAddr = CGF.EmitLoadOfPointer( 5986 CGF.GetAddrOfLocalVar(&Param), 5987 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5988 llvm::Value *Size = nullptr; 5989 // If the size of the reduction item is non-constant, load it from global 5990 // threadprivate variable. 5991 if (RCG.getSizes(N).second) { 5992 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5993 CGF, CGM.getContext().getSizeType(), 5994 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5995 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5996 CGM.getContext().getSizeType(), Loc); 5997 } 5998 RCG.emitAggregateType(CGF, N, Size); 5999 // Emit the finalizer body: 6000 // <destroy>(<type>* %0) 6001 RCG.emitCleanups(CGF, N, PrivateAddr); 6002 CGF.FinishFunction(Loc); 6003 return Fn; 6004 } 6005 6006 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6007 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6008 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6009 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6010 return nullptr; 6011 6012 // Build typedef struct: 6013 // kmp_taskred_input { 6014 // void *reduce_shar; // shared reduction item 6015 // void *reduce_orig; // original reduction item used for initialization 6016 // size_t reduce_size; // size of data item 6017 // void *reduce_init; // data initialization routine 6018 // void *reduce_fini; // data finalization routine 6019 // void *reduce_comb; // data combiner routine 6020 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6021 // } kmp_taskred_input_t; 6022 ASTContext &C = CGM.getContext(); 6023 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6024 RD->startDefinition(); 6025 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6026 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6027 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6028 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6029 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6030 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6031 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6032 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6033 RD->completeDefinition(); 6034 QualType RDType = C.getRecordType(RD); 6035 unsigned Size = Data.ReductionVars.size(); 6036 llvm::APInt ArraySize(/*numBits=*/64, Size); 6037 QualType ArrayRDType = C.getConstantArrayType( 6038 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6039 // kmp_task_red_input_t .rd_input.[Size]; 6040 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6041 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6042 Data.ReductionCopies, Data.ReductionOps); 6043 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6044 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6045 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6046 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6047 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6048 TaskRedInput.getPointer(), Idxs, 6049 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6050 ".rd_input.gep."); 6051 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6052 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6053 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6054 RCG.emitSharedOrigLValue(CGF, Cnt); 6055 llvm::Value *CastedShared = 6056 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6057 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6058 // ElemLVal.reduce_orig = &Origs[Cnt]; 6059 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6060 llvm::Value *CastedOrig = 6061 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6062 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6063 RCG.emitAggregateType(CGF, Cnt); 6064 llvm::Value *SizeValInChars; 6065 llvm::Value *SizeVal; 6066 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6067 // We use delayed creation/initialization for VLAs and array sections. It is 6068 // required because runtime does not provide the way to pass the sizes of 6069 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6070 // threadprivate global variables are used to store these values and use 6071 // them in the functions. 6072 bool DelayedCreation = !!SizeVal; 6073 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6074 /*isSigned=*/false); 6075 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6076 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6077 // ElemLVal.reduce_init = init; 6078 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6079 llvm::Value *InitAddr = 6080 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6081 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6082 // ElemLVal.reduce_fini = fini; 6083 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6084 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6085 llvm::Value *FiniAddr = Fini 6086 ? CGF.EmitCastToVoidPtr(Fini) 6087 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6088 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6089 // ElemLVal.reduce_comb = comb; 6090 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6091 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6092 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6093 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6094 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6095 // ElemLVal.flags = 0; 6096 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6097 if (DelayedCreation) { 6098 CGF.EmitStoreOfScalar( 6099 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6100 FlagsLVal); 6101 } else 6102 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6103 FlagsLVal.getType()); 6104 } 6105 if (Data.IsReductionWithTaskMod) { 6106 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6107 // is_ws, int num, void *data); 6108 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6109 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6110 CGM.IntTy, /*isSigned=*/true); 6111 llvm::Value *Args[] = { 6112 IdentTLoc, GTid, 6113 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6114 /*isSigned=*/true), 6115 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6116 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6117 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6118 return CGF.EmitRuntimeCall( 6119 OMPBuilder.getOrCreateRuntimeFunction( 6120 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6121 Args); 6122 } 6123 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6124 llvm::Value *Args[] = { 6125 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6126 /*isSigned=*/true), 6127 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6128 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6129 CGM.VoidPtrTy)}; 6130 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6131 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6132 Args); 6133 } 6134 6135 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6136 SourceLocation Loc, 6137 bool IsWorksharingReduction) { 6138 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6139 // is_ws, int num, void *data); 6140 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6141 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6142 CGM.IntTy, /*isSigned=*/true); 6143 llvm::Value *Args[] = {IdentTLoc, GTid, 6144 llvm::ConstantInt::get(CGM.IntTy, 6145 IsWorksharingReduction ? 1 : 0, 6146 /*isSigned=*/true)}; 6147 (void)CGF.EmitRuntimeCall( 6148 OMPBuilder.getOrCreateRuntimeFunction( 6149 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6150 Args); 6151 } 6152 6153 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6154 SourceLocation Loc, 6155 ReductionCodeGen &RCG, 6156 unsigned N) { 6157 auto Sizes = RCG.getSizes(N); 6158 // Emit threadprivate global variable if the type is non-constant 6159 // (Sizes.second = nullptr). 6160 if (Sizes.second) { 6161 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6162 /*isSigned=*/false); 6163 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6164 CGF, CGM.getContext().getSizeType(), 6165 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6166 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6167 } 6168 } 6169 6170 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6171 SourceLocation Loc, 6172 llvm::Value *ReductionsPtr, 6173 LValue SharedLVal) { 6174 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6175 // *d); 6176 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6177 CGM.IntTy, 6178 /*isSigned=*/true), 6179 ReductionsPtr, 6180 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6181 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6182 return Address( 6183 CGF.EmitRuntimeCall( 6184 OMPBuilder.getOrCreateRuntimeFunction( 6185 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6186 Args), 6187 SharedLVal.getAlignment()); 6188 } 6189 6190 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6191 SourceLocation Loc) { 6192 if (!CGF.HaveInsertPoint()) 6193 return; 6194 6195 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6196 OMPBuilder.createTaskwait(CGF.Builder); 6197 } else { 6198 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6199 // global_tid); 6200 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6201 // Ignore return result until untied tasks are supported. 6202 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6203 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6204 Args); 6205 } 6206 6207 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6208 Region->emitUntiedSwitch(CGF); 6209 } 6210 6211 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6212 OpenMPDirectiveKind InnerKind, 6213 const RegionCodeGenTy &CodeGen, 6214 bool HasCancel) { 6215 if (!CGF.HaveInsertPoint()) 6216 return; 6217 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6218 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6219 } 6220 6221 namespace { 6222 enum RTCancelKind { 6223 CancelNoreq = 0, 6224 CancelParallel = 1, 6225 CancelLoop = 2, 6226 CancelSections = 3, 6227 CancelTaskgroup = 4 6228 }; 6229 } // anonymous namespace 6230 6231 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6232 RTCancelKind CancelKind = CancelNoreq; 6233 if (CancelRegion == OMPD_parallel) 6234 CancelKind = CancelParallel; 6235 else if (CancelRegion == OMPD_for) 6236 CancelKind = CancelLoop; 6237 else if (CancelRegion == OMPD_sections) 6238 CancelKind = CancelSections; 6239 else { 6240 assert(CancelRegion == OMPD_taskgroup); 6241 CancelKind = CancelTaskgroup; 6242 } 6243 return CancelKind; 6244 } 6245 6246 void CGOpenMPRuntime::emitCancellationPointCall( 6247 CodeGenFunction &CGF, SourceLocation Loc, 6248 OpenMPDirectiveKind CancelRegion) { 6249 if (!CGF.HaveInsertPoint()) 6250 return; 6251 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6252 // global_tid, kmp_int32 cncl_kind); 6253 if (auto *OMPRegionInfo = 6254 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6255 // For 'cancellation point taskgroup', the task region info may not have a 6256 // cancel. This may instead happen in another adjacent task. 6257 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6258 llvm::Value *Args[] = { 6259 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6260 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6261 // Ignore return result until untied tasks are supported. 6262 llvm::Value *Result = CGF.EmitRuntimeCall( 6263 OMPBuilder.getOrCreateRuntimeFunction( 6264 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6265 Args); 6266 // if (__kmpc_cancellationpoint()) { 6267 // exit from construct; 6268 // } 6269 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6270 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6271 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6272 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6273 CGF.EmitBlock(ExitBB); 6274 // exit from construct; 6275 CodeGenFunction::JumpDest CancelDest = 6276 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6277 CGF.EmitBranchThroughCleanup(CancelDest); 6278 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6279 } 6280 } 6281 } 6282 6283 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6284 const Expr *IfCond, 6285 OpenMPDirectiveKind CancelRegion) { 6286 if (!CGF.HaveInsertPoint()) 6287 return; 6288 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6289 // kmp_int32 cncl_kind); 6290 auto &M = CGM.getModule(); 6291 if (auto *OMPRegionInfo = 6292 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6293 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6294 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6295 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6296 llvm::Value *Args[] = { 6297 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6298 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6299 // Ignore return result until untied tasks are supported. 6300 llvm::Value *Result = CGF.EmitRuntimeCall( 6301 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6302 // if (__kmpc_cancel()) { 6303 // exit from construct; 6304 // } 6305 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6306 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6307 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6308 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6309 CGF.EmitBlock(ExitBB); 6310 // exit from construct; 6311 CodeGenFunction::JumpDest CancelDest = 6312 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6313 CGF.EmitBranchThroughCleanup(CancelDest); 6314 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6315 }; 6316 if (IfCond) { 6317 emitIfClause(CGF, IfCond, ThenGen, 6318 [](CodeGenFunction &, PrePostActionTy &) {}); 6319 } else { 6320 RegionCodeGenTy ThenRCG(ThenGen); 6321 ThenRCG(CGF); 6322 } 6323 } 6324 } 6325 6326 namespace { 6327 /// Cleanup action for uses_allocators support. 6328 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6329 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6330 6331 public: 6332 OMPUsesAllocatorsActionTy( 6333 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6334 : Allocators(Allocators) {} 6335 void Enter(CodeGenFunction &CGF) override { 6336 if (!CGF.HaveInsertPoint()) 6337 return; 6338 for (const auto &AllocatorData : Allocators) { 6339 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6340 CGF, AllocatorData.first, AllocatorData.second); 6341 } 6342 } 6343 void Exit(CodeGenFunction &CGF) override { 6344 if (!CGF.HaveInsertPoint()) 6345 return; 6346 for (const auto &AllocatorData : Allocators) { 6347 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6348 AllocatorData.first); 6349 } 6350 } 6351 }; 6352 } // namespace 6353 6354 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6355 const OMPExecutableDirective &D, StringRef ParentName, 6356 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6357 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6358 assert(!ParentName.empty() && "Invalid target region parent name!"); 6359 HasEmittedTargetRegion = true; 6360 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6361 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6362 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6363 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6364 if (!D.AllocatorTraits) 6365 continue; 6366 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6367 } 6368 } 6369 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6370 CodeGen.setAction(UsesAllocatorAction); 6371 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6372 IsOffloadEntry, CodeGen); 6373 } 6374 6375 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6376 const Expr *Allocator, 6377 const Expr *AllocatorTraits) { 6378 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6379 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6380 // Use default memspace handle. 6381 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6382 llvm::Value *NumTraits = llvm::ConstantInt::get( 6383 CGF.IntTy, cast<ConstantArrayType>( 6384 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6385 ->getSize() 6386 .getLimitedValue()); 6387 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6388 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6389 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6390 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6391 AllocatorTraitsLVal.getBaseInfo(), 6392 AllocatorTraitsLVal.getTBAAInfo()); 6393 llvm::Value *Traits = 6394 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6395 6396 llvm::Value *AllocatorVal = 6397 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6398 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6399 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6400 // Store to allocator. 6401 CGF.EmitVarDecl(*cast<VarDecl>( 6402 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6403 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6404 AllocatorVal = 6405 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6406 Allocator->getType(), Allocator->getExprLoc()); 6407 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6408 } 6409 6410 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6411 const Expr *Allocator) { 6412 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6413 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6414 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6415 llvm::Value *AllocatorVal = 6416 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6417 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6418 CGF.getContext().VoidPtrTy, 6419 Allocator->getExprLoc()); 6420 (void)CGF.EmitRuntimeCall( 6421 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6422 OMPRTL___kmpc_destroy_allocator), 6423 {ThreadId, AllocatorVal}); 6424 } 6425 6426 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6427 const OMPExecutableDirective &D, StringRef ParentName, 6428 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6429 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6430 // Create a unique name for the entry function using the source location 6431 // information of the current target region. The name will be something like: 6432 // 6433 // __omp_offloading_DD_FFFF_PP_lBB 6434 // 6435 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6436 // mangled name of the function that encloses the target region and BB is the 6437 // line number of the target region. 6438 6439 unsigned DeviceID; 6440 unsigned FileID; 6441 unsigned Line; 6442 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6443 Line); 6444 SmallString<64> EntryFnName; 6445 { 6446 llvm::raw_svector_ostream OS(EntryFnName); 6447 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6448 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6449 } 6450 6451 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6452 6453 CodeGenFunction CGF(CGM, true); 6454 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6455 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6456 6457 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6458 6459 // If this target outline function is not an offload entry, we don't need to 6460 // register it. 6461 if (!IsOffloadEntry) 6462 return; 6463 6464 // The target region ID is used by the runtime library to identify the current 6465 // target region, so it only has to be unique and not necessarily point to 6466 // anything. It could be the pointer to the outlined function that implements 6467 // the target region, but we aren't using that so that the compiler doesn't 6468 // need to keep that, and could therefore inline the host function if proven 6469 // worthwhile during optimization. In the other hand, if emitting code for the 6470 // device, the ID has to be the function address so that it can retrieved from 6471 // the offloading entry and launched by the runtime library. We also mark the 6472 // outlined function to have external linkage in case we are emitting code for 6473 // the device, because these functions will be entry points to the device. 6474 6475 if (CGM.getLangOpts().OpenMPIsDevice) { 6476 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6477 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6478 OutlinedFn->setDSOLocal(false); 6479 if (CGM.getTriple().isAMDGCN()) 6480 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6481 } else { 6482 std::string Name = getName({EntryFnName, "region_id"}); 6483 OutlinedFnID = new llvm::GlobalVariable( 6484 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6485 llvm::GlobalValue::WeakAnyLinkage, 6486 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6487 } 6488 6489 // Register the information for the entry associated with this target region. 6490 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6491 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6492 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6493 } 6494 6495 /// Checks if the expression is constant or does not have non-trivial function 6496 /// calls. 6497 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6498 // We can skip constant expressions. 6499 // We can skip expressions with trivial calls or simple expressions. 6500 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6501 !E->hasNonTrivialCall(Ctx)) && 6502 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6503 } 6504 6505 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6506 const Stmt *Body) { 6507 const Stmt *Child = Body->IgnoreContainers(); 6508 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6509 Child = nullptr; 6510 for (const Stmt *S : C->body()) { 6511 if (const auto *E = dyn_cast<Expr>(S)) { 6512 if (isTrivial(Ctx, E)) 6513 continue; 6514 } 6515 // Some of the statements can be ignored. 6516 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6517 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6518 continue; 6519 // Analyze declarations. 6520 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6521 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6522 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6523 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6524 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6525 isa<UsingDirectiveDecl>(D) || 6526 isa<OMPDeclareReductionDecl>(D) || 6527 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6528 return true; 6529 const auto *VD = dyn_cast<VarDecl>(D); 6530 if (!VD) 6531 return false; 6532 return VD->isConstexpr() || 6533 ((VD->getType().isTrivialType(Ctx) || 6534 VD->getType()->isReferenceType()) && 6535 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6536 })) 6537 continue; 6538 } 6539 // Found multiple children - cannot get the one child only. 6540 if (Child) 6541 return nullptr; 6542 Child = S; 6543 } 6544 if (Child) 6545 Child = Child->IgnoreContainers(); 6546 } 6547 return Child; 6548 } 6549 6550 /// Emit the number of teams for a target directive. Inspect the num_teams 6551 /// clause associated with a teams construct combined or closely nested 6552 /// with the target directive. 6553 /// 6554 /// Emit a team of size one for directives such as 'target parallel' that 6555 /// have no associated teams construct. 6556 /// 6557 /// Otherwise, return nullptr. 6558 static llvm::Value * 6559 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6560 const OMPExecutableDirective &D) { 6561 assert(!CGF.getLangOpts().OpenMPIsDevice && 6562 "Clauses associated with the teams directive expected to be emitted " 6563 "only for the host!"); 6564 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6565 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6566 "Expected target-based executable directive."); 6567 CGBuilderTy &Bld = CGF.Builder; 6568 switch (DirectiveKind) { 6569 case OMPD_target: { 6570 const auto *CS = D.getInnermostCapturedStmt(); 6571 const auto *Body = 6572 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6573 const Stmt *ChildStmt = 6574 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6575 if (const auto *NestedDir = 6576 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6577 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6578 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6579 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6580 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6581 const Expr *NumTeams = 6582 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6583 llvm::Value *NumTeamsVal = 6584 CGF.EmitScalarExpr(NumTeams, 6585 /*IgnoreResultAssign*/ true); 6586 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6587 /*isSigned=*/true); 6588 } 6589 return Bld.getInt32(0); 6590 } 6591 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6592 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6593 return Bld.getInt32(1); 6594 return Bld.getInt32(0); 6595 } 6596 return nullptr; 6597 } 6598 case OMPD_target_teams: 6599 case OMPD_target_teams_distribute: 6600 case OMPD_target_teams_distribute_simd: 6601 case OMPD_target_teams_distribute_parallel_for: 6602 case OMPD_target_teams_distribute_parallel_for_simd: { 6603 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6604 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6605 const Expr *NumTeams = 6606 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6607 llvm::Value *NumTeamsVal = 6608 CGF.EmitScalarExpr(NumTeams, 6609 /*IgnoreResultAssign*/ true); 6610 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6611 /*isSigned=*/true); 6612 } 6613 return Bld.getInt32(0); 6614 } 6615 case OMPD_target_parallel: 6616 case OMPD_target_parallel_for: 6617 case OMPD_target_parallel_for_simd: 6618 case OMPD_target_simd: 6619 return Bld.getInt32(1); 6620 case OMPD_parallel: 6621 case OMPD_for: 6622 case OMPD_parallel_for: 6623 case OMPD_parallel_master: 6624 case OMPD_parallel_sections: 6625 case OMPD_for_simd: 6626 case OMPD_parallel_for_simd: 6627 case OMPD_cancel: 6628 case OMPD_cancellation_point: 6629 case OMPD_ordered: 6630 case OMPD_threadprivate: 6631 case OMPD_allocate: 6632 case OMPD_task: 6633 case OMPD_simd: 6634 case OMPD_tile: 6635 case OMPD_sections: 6636 case OMPD_section: 6637 case OMPD_single: 6638 case OMPD_master: 6639 case OMPD_critical: 6640 case OMPD_taskyield: 6641 case OMPD_barrier: 6642 case OMPD_taskwait: 6643 case OMPD_taskgroup: 6644 case OMPD_atomic: 6645 case OMPD_flush: 6646 case OMPD_depobj: 6647 case OMPD_scan: 6648 case OMPD_teams: 6649 case OMPD_target_data: 6650 case OMPD_target_exit_data: 6651 case OMPD_target_enter_data: 6652 case OMPD_distribute: 6653 case OMPD_distribute_simd: 6654 case OMPD_distribute_parallel_for: 6655 case OMPD_distribute_parallel_for_simd: 6656 case OMPD_teams_distribute: 6657 case OMPD_teams_distribute_simd: 6658 case OMPD_teams_distribute_parallel_for: 6659 case OMPD_teams_distribute_parallel_for_simd: 6660 case OMPD_target_update: 6661 case OMPD_declare_simd: 6662 case OMPD_declare_variant: 6663 case OMPD_begin_declare_variant: 6664 case OMPD_end_declare_variant: 6665 case OMPD_declare_target: 6666 case OMPD_end_declare_target: 6667 case OMPD_declare_reduction: 6668 case OMPD_declare_mapper: 6669 case OMPD_taskloop: 6670 case OMPD_taskloop_simd: 6671 case OMPD_master_taskloop: 6672 case OMPD_master_taskloop_simd: 6673 case OMPD_parallel_master_taskloop: 6674 case OMPD_parallel_master_taskloop_simd: 6675 case OMPD_requires: 6676 case OMPD_unknown: 6677 break; 6678 default: 6679 break; 6680 } 6681 llvm_unreachable("Unexpected directive kind."); 6682 } 6683 6684 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6685 llvm::Value *DefaultThreadLimitVal) { 6686 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6687 CGF.getContext(), CS->getCapturedStmt()); 6688 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6689 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6690 llvm::Value *NumThreads = nullptr; 6691 llvm::Value *CondVal = nullptr; 6692 // Handle if clause. If if clause present, the number of threads is 6693 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6694 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6695 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6696 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6697 const OMPIfClause *IfClause = nullptr; 6698 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6699 if (C->getNameModifier() == OMPD_unknown || 6700 C->getNameModifier() == OMPD_parallel) { 6701 IfClause = C; 6702 break; 6703 } 6704 } 6705 if (IfClause) { 6706 const Expr *Cond = IfClause->getCondition(); 6707 bool Result; 6708 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6709 if (!Result) 6710 return CGF.Builder.getInt32(1); 6711 } else { 6712 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6713 if (const auto *PreInit = 6714 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6715 for (const auto *I : PreInit->decls()) { 6716 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6717 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6718 } else { 6719 CodeGenFunction::AutoVarEmission Emission = 6720 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6721 CGF.EmitAutoVarCleanups(Emission); 6722 } 6723 } 6724 } 6725 CondVal = CGF.EvaluateExprAsBool(Cond); 6726 } 6727 } 6728 } 6729 // Check the value of num_threads clause iff if clause was not specified 6730 // or is not evaluated to false. 6731 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6732 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6733 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6734 const auto *NumThreadsClause = 6735 Dir->getSingleClause<OMPNumThreadsClause>(); 6736 CodeGenFunction::LexicalScope Scope( 6737 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6738 if (const auto *PreInit = 6739 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6740 for (const auto *I : PreInit->decls()) { 6741 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6742 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6743 } else { 6744 CodeGenFunction::AutoVarEmission Emission = 6745 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6746 CGF.EmitAutoVarCleanups(Emission); 6747 } 6748 } 6749 } 6750 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6751 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6752 /*isSigned=*/false); 6753 if (DefaultThreadLimitVal) 6754 NumThreads = CGF.Builder.CreateSelect( 6755 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6756 DefaultThreadLimitVal, NumThreads); 6757 } else { 6758 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6759 : CGF.Builder.getInt32(0); 6760 } 6761 // Process condition of the if clause. 6762 if (CondVal) { 6763 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6764 CGF.Builder.getInt32(1)); 6765 } 6766 return NumThreads; 6767 } 6768 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6769 return CGF.Builder.getInt32(1); 6770 return DefaultThreadLimitVal; 6771 } 6772 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6773 : CGF.Builder.getInt32(0); 6774 } 6775 6776 /// Emit the number of threads for a target directive. Inspect the 6777 /// thread_limit clause associated with a teams construct combined or closely 6778 /// nested with the target directive. 6779 /// 6780 /// Emit the num_threads clause for directives such as 'target parallel' that 6781 /// have no associated teams construct. 6782 /// 6783 /// Otherwise, return nullptr. 6784 static llvm::Value * 6785 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6786 const OMPExecutableDirective &D) { 6787 assert(!CGF.getLangOpts().OpenMPIsDevice && 6788 "Clauses associated with the teams directive expected to be emitted " 6789 "only for the host!"); 6790 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6791 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6792 "Expected target-based executable directive."); 6793 CGBuilderTy &Bld = CGF.Builder; 6794 llvm::Value *ThreadLimitVal = nullptr; 6795 llvm::Value *NumThreadsVal = nullptr; 6796 switch (DirectiveKind) { 6797 case OMPD_target: { 6798 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6799 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6800 return NumThreads; 6801 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6802 CGF.getContext(), CS->getCapturedStmt()); 6803 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6804 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6805 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6806 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6807 const auto *ThreadLimitClause = 6808 Dir->getSingleClause<OMPThreadLimitClause>(); 6809 CodeGenFunction::LexicalScope Scope( 6810 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6811 if (const auto *PreInit = 6812 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6813 for (const auto *I : PreInit->decls()) { 6814 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6815 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6816 } else { 6817 CodeGenFunction::AutoVarEmission Emission = 6818 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6819 CGF.EmitAutoVarCleanups(Emission); 6820 } 6821 } 6822 } 6823 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6824 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6825 ThreadLimitVal = 6826 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6827 } 6828 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6829 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6830 CS = Dir->getInnermostCapturedStmt(); 6831 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6832 CGF.getContext(), CS->getCapturedStmt()); 6833 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6834 } 6835 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6836 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6837 CS = Dir->getInnermostCapturedStmt(); 6838 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6839 return NumThreads; 6840 } 6841 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6842 return Bld.getInt32(1); 6843 } 6844 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6845 } 6846 case OMPD_target_teams: { 6847 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6848 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6849 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6850 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6851 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6852 ThreadLimitVal = 6853 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6854 } 6855 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6856 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6857 return NumThreads; 6858 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6859 CGF.getContext(), CS->getCapturedStmt()); 6860 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6861 if (Dir->getDirectiveKind() == OMPD_distribute) { 6862 CS = Dir->getInnermostCapturedStmt(); 6863 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6864 return NumThreads; 6865 } 6866 } 6867 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6868 } 6869 case OMPD_target_teams_distribute: 6870 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6871 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6872 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6873 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6874 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6875 ThreadLimitVal = 6876 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6877 } 6878 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6879 case OMPD_target_parallel: 6880 case OMPD_target_parallel_for: 6881 case OMPD_target_parallel_for_simd: 6882 case OMPD_target_teams_distribute_parallel_for: 6883 case OMPD_target_teams_distribute_parallel_for_simd: { 6884 llvm::Value *CondVal = nullptr; 6885 // Handle if clause. If if clause present, the number of threads is 6886 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6887 if (D.hasClausesOfKind<OMPIfClause>()) { 6888 const OMPIfClause *IfClause = nullptr; 6889 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6890 if (C->getNameModifier() == OMPD_unknown || 6891 C->getNameModifier() == OMPD_parallel) { 6892 IfClause = C; 6893 break; 6894 } 6895 } 6896 if (IfClause) { 6897 const Expr *Cond = IfClause->getCondition(); 6898 bool Result; 6899 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6900 if (!Result) 6901 return Bld.getInt32(1); 6902 } else { 6903 CodeGenFunction::RunCleanupsScope Scope(CGF); 6904 CondVal = CGF.EvaluateExprAsBool(Cond); 6905 } 6906 } 6907 } 6908 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6909 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6910 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6911 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6912 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6913 ThreadLimitVal = 6914 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6915 } 6916 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6917 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6918 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6919 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6920 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6921 NumThreadsVal = 6922 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6923 ThreadLimitVal = ThreadLimitVal 6924 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6925 ThreadLimitVal), 6926 NumThreadsVal, ThreadLimitVal) 6927 : NumThreadsVal; 6928 } 6929 if (!ThreadLimitVal) 6930 ThreadLimitVal = Bld.getInt32(0); 6931 if (CondVal) 6932 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6933 return ThreadLimitVal; 6934 } 6935 case OMPD_target_teams_distribute_simd: 6936 case OMPD_target_simd: 6937 return Bld.getInt32(1); 6938 case OMPD_parallel: 6939 case OMPD_for: 6940 case OMPD_parallel_for: 6941 case OMPD_parallel_master: 6942 case OMPD_parallel_sections: 6943 case OMPD_for_simd: 6944 case OMPD_parallel_for_simd: 6945 case OMPD_cancel: 6946 case OMPD_cancellation_point: 6947 case OMPD_ordered: 6948 case OMPD_threadprivate: 6949 case OMPD_allocate: 6950 case OMPD_task: 6951 case OMPD_simd: 6952 case OMPD_tile: 6953 case OMPD_sections: 6954 case OMPD_section: 6955 case OMPD_single: 6956 case OMPD_master: 6957 case OMPD_critical: 6958 case OMPD_taskyield: 6959 case OMPD_barrier: 6960 case OMPD_taskwait: 6961 case OMPD_taskgroup: 6962 case OMPD_atomic: 6963 case OMPD_flush: 6964 case OMPD_depobj: 6965 case OMPD_scan: 6966 case OMPD_teams: 6967 case OMPD_target_data: 6968 case OMPD_target_exit_data: 6969 case OMPD_target_enter_data: 6970 case OMPD_distribute: 6971 case OMPD_distribute_simd: 6972 case OMPD_distribute_parallel_for: 6973 case OMPD_distribute_parallel_for_simd: 6974 case OMPD_teams_distribute: 6975 case OMPD_teams_distribute_simd: 6976 case OMPD_teams_distribute_parallel_for: 6977 case OMPD_teams_distribute_parallel_for_simd: 6978 case OMPD_target_update: 6979 case OMPD_declare_simd: 6980 case OMPD_declare_variant: 6981 case OMPD_begin_declare_variant: 6982 case OMPD_end_declare_variant: 6983 case OMPD_declare_target: 6984 case OMPD_end_declare_target: 6985 case OMPD_declare_reduction: 6986 case OMPD_declare_mapper: 6987 case OMPD_taskloop: 6988 case OMPD_taskloop_simd: 6989 case OMPD_master_taskloop: 6990 case OMPD_master_taskloop_simd: 6991 case OMPD_parallel_master_taskloop: 6992 case OMPD_parallel_master_taskloop_simd: 6993 case OMPD_requires: 6994 case OMPD_unknown: 6995 break; 6996 default: 6997 break; 6998 } 6999 llvm_unreachable("Unsupported directive kind."); 7000 } 7001 7002 namespace { 7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7004 7005 // Utility to handle information from clauses associated with a given 7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7007 // It provides a convenient interface to obtain the information and generate 7008 // code for that information. 7009 class MappableExprsHandler { 7010 public: 7011 /// Values for bit flags used to specify the mapping type for 7012 /// offloading. 7013 enum OpenMPOffloadMappingFlags : uint64_t { 7014 /// No flags 7015 OMP_MAP_NONE = 0x0, 7016 /// Allocate memory on the device and move data from host to device. 7017 OMP_MAP_TO = 0x01, 7018 /// Allocate memory on the device and move data from device to host. 7019 OMP_MAP_FROM = 0x02, 7020 /// Always perform the requested mapping action on the element, even 7021 /// if it was already mapped before. 7022 OMP_MAP_ALWAYS = 0x04, 7023 /// Delete the element from the device environment, ignoring the 7024 /// current reference count associated with the element. 7025 OMP_MAP_DELETE = 0x08, 7026 /// The element being mapped is a pointer-pointee pair; both the 7027 /// pointer and the pointee should be mapped. 7028 OMP_MAP_PTR_AND_OBJ = 0x10, 7029 /// This flags signals that the base address of an entry should be 7030 /// passed to the target kernel as an argument. 7031 OMP_MAP_TARGET_PARAM = 0x20, 7032 /// Signal that the runtime library has to return the device pointer 7033 /// in the current position for the data being mapped. Used when we have the 7034 /// use_device_ptr or use_device_addr clause. 7035 OMP_MAP_RETURN_PARAM = 0x40, 7036 /// This flag signals that the reference being passed is a pointer to 7037 /// private data. 7038 OMP_MAP_PRIVATE = 0x80, 7039 /// Pass the element to the device by value. 7040 OMP_MAP_LITERAL = 0x100, 7041 /// Implicit map 7042 OMP_MAP_IMPLICIT = 0x200, 7043 /// Close is a hint to the runtime to allocate memory close to 7044 /// the target device. 7045 OMP_MAP_CLOSE = 0x400, 7046 /// 0x800 is reserved for compatibility with XLC. 7047 /// Produce a runtime error if the data is not already allocated. 7048 OMP_MAP_PRESENT = 0x1000, 7049 /// Signal that the runtime library should use args as an array of 7050 /// descriptor_dim pointers and use args_size as dims. Used when we have 7051 /// non-contiguous list items in target update directive 7052 OMP_MAP_NON_CONTIG = 0x100000000000, 7053 /// The 16 MSBs of the flags indicate whether the entry is member of some 7054 /// struct/class. 7055 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7056 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7057 }; 7058 7059 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7060 static unsigned getFlagMemberOffset() { 7061 unsigned Offset = 0; 7062 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7063 Remain = Remain >> 1) 7064 Offset++; 7065 return Offset; 7066 } 7067 7068 /// Class that holds debugging information for a data mapping to be passed to 7069 /// the runtime library. 7070 class MappingExprInfo { 7071 /// The variable declaration used for the data mapping. 7072 const ValueDecl *MapDecl = nullptr; 7073 /// The original expression used in the map clause, or null if there is 7074 /// none. 7075 const Expr *MapExpr = nullptr; 7076 7077 public: 7078 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7079 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7080 7081 const ValueDecl *getMapDecl() const { return MapDecl; } 7082 const Expr *getMapExpr() const { return MapExpr; } 7083 }; 7084 7085 /// Class that associates information with a base pointer to be passed to the 7086 /// runtime library. 7087 class BasePointerInfo { 7088 /// The base pointer. 7089 llvm::Value *Ptr = nullptr; 7090 /// The base declaration that refers to this device pointer, or null if 7091 /// there is none. 7092 const ValueDecl *DevPtrDecl = nullptr; 7093 7094 public: 7095 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7096 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7097 llvm::Value *operator*() const { return Ptr; } 7098 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7099 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7100 }; 7101 7102 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7103 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7104 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7105 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7106 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7107 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7108 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7109 7110 /// This structure contains combined information generated for mappable 7111 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7112 /// mappers, and non-contiguous information. 7113 struct MapCombinedInfoTy { 7114 struct StructNonContiguousInfo { 7115 bool IsNonContiguous = false; 7116 MapDimArrayTy Dims; 7117 MapNonContiguousArrayTy Offsets; 7118 MapNonContiguousArrayTy Counts; 7119 MapNonContiguousArrayTy Strides; 7120 }; 7121 MapExprsArrayTy Exprs; 7122 MapBaseValuesArrayTy BasePointers; 7123 MapValuesArrayTy Pointers; 7124 MapValuesArrayTy Sizes; 7125 MapFlagsArrayTy Types; 7126 MapMappersArrayTy Mappers; 7127 StructNonContiguousInfo NonContigInfo; 7128 7129 /// Append arrays in \a CurInfo. 7130 void append(MapCombinedInfoTy &CurInfo) { 7131 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7132 BasePointers.append(CurInfo.BasePointers.begin(), 7133 CurInfo.BasePointers.end()); 7134 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7135 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7136 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7137 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7138 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7139 CurInfo.NonContigInfo.Dims.end()); 7140 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7141 CurInfo.NonContigInfo.Offsets.end()); 7142 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7143 CurInfo.NonContigInfo.Counts.end()); 7144 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7145 CurInfo.NonContigInfo.Strides.end()); 7146 } 7147 }; 7148 7149 /// Map between a struct and the its lowest & highest elements which have been 7150 /// mapped. 7151 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7152 /// HE(FieldIndex, Pointer)} 7153 struct StructRangeInfoTy { 7154 MapCombinedInfoTy PreliminaryMapData; 7155 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7156 0, Address::invalid()}; 7157 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7158 0, Address::invalid()}; 7159 Address Base = Address::invalid(); 7160 Address LB = Address::invalid(); 7161 bool IsArraySection = false; 7162 bool HasCompleteRecord = false; 7163 }; 7164 7165 private: 7166 /// Kind that defines how a device pointer has to be returned. 7167 struct MapInfo { 7168 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7169 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7170 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7171 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7172 bool ReturnDevicePointer = false; 7173 bool IsImplicit = false; 7174 const ValueDecl *Mapper = nullptr; 7175 const Expr *VarRef = nullptr; 7176 bool ForDeviceAddr = false; 7177 7178 MapInfo() = default; 7179 MapInfo( 7180 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7181 OpenMPMapClauseKind MapType, 7182 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7183 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7184 bool ReturnDevicePointer, bool IsImplicit, 7185 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7186 bool ForDeviceAddr = false) 7187 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7188 MotionModifiers(MotionModifiers), 7189 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7190 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7191 }; 7192 7193 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7194 /// member and there is no map information about it, then emission of that 7195 /// entry is deferred until the whole struct has been processed. 7196 struct DeferredDevicePtrEntryTy { 7197 const Expr *IE = nullptr; 7198 const ValueDecl *VD = nullptr; 7199 bool ForDeviceAddr = false; 7200 7201 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7202 bool ForDeviceAddr) 7203 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7204 }; 7205 7206 /// The target directive from where the mappable clauses were extracted. It 7207 /// is either a executable directive or a user-defined mapper directive. 7208 llvm::PointerUnion<const OMPExecutableDirective *, 7209 const OMPDeclareMapperDecl *> 7210 CurDir; 7211 7212 /// Function the directive is being generated for. 7213 CodeGenFunction &CGF; 7214 7215 /// Set of all first private variables in the current directive. 7216 /// bool data is set to true if the variable is implicitly marked as 7217 /// firstprivate, false otherwise. 7218 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7219 7220 /// Map between device pointer declarations and their expression components. 7221 /// The key value for declarations in 'this' is null. 7222 llvm::DenseMap< 7223 const ValueDecl *, 7224 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7225 DevPointersMap; 7226 7227 llvm::Value *getExprTypeSize(const Expr *E) const { 7228 QualType ExprTy = E->getType().getCanonicalType(); 7229 7230 // Calculate the size for array shaping expression. 7231 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7232 llvm::Value *Size = 7233 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7234 for (const Expr *SE : OAE->getDimensions()) { 7235 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7236 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7237 CGF.getContext().getSizeType(), 7238 SE->getExprLoc()); 7239 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7240 } 7241 return Size; 7242 } 7243 7244 // Reference types are ignored for mapping purposes. 7245 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7246 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7247 7248 // Given that an array section is considered a built-in type, we need to 7249 // do the calculation based on the length of the section instead of relying 7250 // on CGF.getTypeSize(E->getType()). 7251 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7252 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7253 OAE->getBase()->IgnoreParenImpCasts()) 7254 .getCanonicalType(); 7255 7256 // If there is no length associated with the expression and lower bound is 7257 // not specified too, that means we are using the whole length of the 7258 // base. 7259 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7260 !OAE->getLowerBound()) 7261 return CGF.getTypeSize(BaseTy); 7262 7263 llvm::Value *ElemSize; 7264 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7265 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7266 } else { 7267 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7268 assert(ATy && "Expecting array type if not a pointer type."); 7269 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7270 } 7271 7272 // If we don't have a length at this point, that is because we have an 7273 // array section with a single element. 7274 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7275 return ElemSize; 7276 7277 if (const Expr *LenExpr = OAE->getLength()) { 7278 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7279 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7280 CGF.getContext().getSizeType(), 7281 LenExpr->getExprLoc()); 7282 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7283 } 7284 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7285 OAE->getLowerBound() && "expected array_section[lb:]."); 7286 // Size = sizetype - lb * elemtype; 7287 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7288 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7289 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7290 CGF.getContext().getSizeType(), 7291 OAE->getLowerBound()->getExprLoc()); 7292 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7293 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7294 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7295 LengthVal = CGF.Builder.CreateSelect( 7296 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7297 return LengthVal; 7298 } 7299 return CGF.getTypeSize(ExprTy); 7300 } 7301 7302 /// Return the corresponding bits for a given map clause modifier. Add 7303 /// a flag marking the map as a pointer if requested. Add a flag marking the 7304 /// map as the first one of a series of maps that relate to the same map 7305 /// expression. 7306 OpenMPOffloadMappingFlags getMapTypeBits( 7307 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7308 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7309 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7310 OpenMPOffloadMappingFlags Bits = 7311 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7312 switch (MapType) { 7313 case OMPC_MAP_alloc: 7314 case OMPC_MAP_release: 7315 // alloc and release is the default behavior in the runtime library, i.e. 7316 // if we don't pass any bits alloc/release that is what the runtime is 7317 // going to do. Therefore, we don't need to signal anything for these two 7318 // type modifiers. 7319 break; 7320 case OMPC_MAP_to: 7321 Bits |= OMP_MAP_TO; 7322 break; 7323 case OMPC_MAP_from: 7324 Bits |= OMP_MAP_FROM; 7325 break; 7326 case OMPC_MAP_tofrom: 7327 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7328 break; 7329 case OMPC_MAP_delete: 7330 Bits |= OMP_MAP_DELETE; 7331 break; 7332 case OMPC_MAP_unknown: 7333 llvm_unreachable("Unexpected map type!"); 7334 } 7335 if (AddPtrFlag) 7336 Bits |= OMP_MAP_PTR_AND_OBJ; 7337 if (AddIsTargetParamFlag) 7338 Bits |= OMP_MAP_TARGET_PARAM; 7339 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7340 != MapModifiers.end()) 7341 Bits |= OMP_MAP_ALWAYS; 7342 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7343 != MapModifiers.end()) 7344 Bits |= OMP_MAP_CLOSE; 7345 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7346 MapModifiers.end() || 7347 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7348 MotionModifiers.end()) 7349 Bits |= OMP_MAP_PRESENT; 7350 if (IsNonContiguous) 7351 Bits |= OMP_MAP_NON_CONTIG; 7352 return Bits; 7353 } 7354 7355 /// Return true if the provided expression is a final array section. A 7356 /// final array section, is one whose length can't be proved to be one. 7357 bool isFinalArraySectionExpression(const Expr *E) const { 7358 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7359 7360 // It is not an array section and therefore not a unity-size one. 7361 if (!OASE) 7362 return false; 7363 7364 // An array section with no colon always refer to a single element. 7365 if (OASE->getColonLocFirst().isInvalid()) 7366 return false; 7367 7368 const Expr *Length = OASE->getLength(); 7369 7370 // If we don't have a length we have to check if the array has size 1 7371 // for this dimension. Also, we should always expect a length if the 7372 // base type is pointer. 7373 if (!Length) { 7374 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7375 OASE->getBase()->IgnoreParenImpCasts()) 7376 .getCanonicalType(); 7377 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7378 return ATy->getSize().getSExtValue() != 1; 7379 // If we don't have a constant dimension length, we have to consider 7380 // the current section as having any size, so it is not necessarily 7381 // unitary. If it happen to be unity size, that's user fault. 7382 return true; 7383 } 7384 7385 // Check if the length evaluates to 1. 7386 Expr::EvalResult Result; 7387 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7388 return true; // Can have more that size 1. 7389 7390 llvm::APSInt ConstLength = Result.Val.getInt(); 7391 return ConstLength.getSExtValue() != 1; 7392 } 7393 7394 /// Generate the base pointers, section pointers, sizes, map type bits, and 7395 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7396 /// map type, map or motion modifiers, and expression components. 7397 /// \a IsFirstComponent should be set to true if the provided set of 7398 /// components is the first associated with a capture. 7399 void generateInfoForComponentList( 7400 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7401 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7402 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7403 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7404 bool IsFirstComponentList, bool IsImplicit, 7405 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7406 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7407 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7408 OverlappedElements = llvm::None) const { 7409 // The following summarizes what has to be generated for each map and the 7410 // types below. The generated information is expressed in this order: 7411 // base pointer, section pointer, size, flags 7412 // (to add to the ones that come from the map type and modifier). 7413 // 7414 // double d; 7415 // int i[100]; 7416 // float *p; 7417 // 7418 // struct S1 { 7419 // int i; 7420 // float f[50]; 7421 // } 7422 // struct S2 { 7423 // int i; 7424 // float f[50]; 7425 // S1 s; 7426 // double *p; 7427 // struct S2 *ps; 7428 // } 7429 // S2 s; 7430 // S2 *ps; 7431 // 7432 // map(d) 7433 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7434 // 7435 // map(i) 7436 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7437 // 7438 // map(i[1:23]) 7439 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7440 // 7441 // map(p) 7442 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7443 // 7444 // map(p[1:24]) 7445 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7446 // in unified shared memory mode or for local pointers 7447 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7448 // 7449 // map(s) 7450 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7451 // 7452 // map(s.i) 7453 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7454 // 7455 // map(s.s.f) 7456 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7457 // 7458 // map(s.p) 7459 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7460 // 7461 // map(to: s.p[:22]) 7462 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7463 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7464 // &(s.p), &(s.p[0]), 22*sizeof(double), 7465 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7466 // (*) alloc space for struct members, only this is a target parameter 7467 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7468 // optimizes this entry out, same in the examples below) 7469 // (***) map the pointee (map: to) 7470 // 7471 // map(s.ps) 7472 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7473 // 7474 // map(from: s.ps->s.i) 7475 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7476 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7477 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7478 // 7479 // map(to: s.ps->ps) 7480 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7481 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7482 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7483 // 7484 // map(s.ps->ps->ps) 7485 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7486 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7487 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7488 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7489 // 7490 // map(to: s.ps->ps->s.f[:22]) 7491 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7492 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7493 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7494 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7495 // 7496 // map(ps) 7497 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7498 // 7499 // map(ps->i) 7500 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7501 // 7502 // map(ps->s.f) 7503 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7504 // 7505 // map(from: ps->p) 7506 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7507 // 7508 // map(to: ps->p[:22]) 7509 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7510 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7511 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7512 // 7513 // map(ps->ps) 7514 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7515 // 7516 // map(from: ps->ps->s.i) 7517 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7518 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7519 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7520 // 7521 // map(from: ps->ps->ps) 7522 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7523 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7524 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7525 // 7526 // map(ps->ps->ps->ps) 7527 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7528 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7529 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7530 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7531 // 7532 // map(to: ps->ps->ps->s.f[:22]) 7533 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7534 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7535 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7536 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7537 // 7538 // map(to: s.f[:22]) map(from: s.p[:33]) 7539 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7540 // sizeof(double*) (**), TARGET_PARAM 7541 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7542 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7543 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7544 // (*) allocate contiguous space needed to fit all mapped members even if 7545 // we allocate space for members not mapped (in this example, 7546 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7547 // them as well because they fall between &s.f[0] and &s.p) 7548 // 7549 // map(from: s.f[:22]) map(to: ps->p[:33]) 7550 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7551 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7552 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7553 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7554 // (*) the struct this entry pertains to is the 2nd element in the list of 7555 // arguments, hence MEMBER_OF(2) 7556 // 7557 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7558 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7559 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7560 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7561 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7562 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7563 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7564 // (*) the struct this entry pertains to is the 4th element in the list 7565 // of arguments, hence MEMBER_OF(4) 7566 7567 // Track if the map information being generated is the first for a capture. 7568 bool IsCaptureFirstInfo = IsFirstComponentList; 7569 // When the variable is on a declare target link or in a to clause with 7570 // unified memory, a reference is needed to hold the host/device address 7571 // of the variable. 7572 bool RequiresReference = false; 7573 7574 // Scan the components from the base to the complete expression. 7575 auto CI = Components.rbegin(); 7576 auto CE = Components.rend(); 7577 auto I = CI; 7578 7579 // Track if the map information being generated is the first for a list of 7580 // components. 7581 bool IsExpressionFirstInfo = true; 7582 bool FirstPointerInComplexData = false; 7583 Address BP = Address::invalid(); 7584 const Expr *AssocExpr = I->getAssociatedExpression(); 7585 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7586 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7587 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7588 7589 if (isa<MemberExpr>(AssocExpr)) { 7590 // The base is the 'this' pointer. The content of the pointer is going 7591 // to be the base of the field being mapped. 7592 BP = CGF.LoadCXXThisAddress(); 7593 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7594 (OASE && 7595 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7596 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7597 } else if (OAShE && 7598 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7599 BP = Address( 7600 CGF.EmitScalarExpr(OAShE->getBase()), 7601 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7602 } else { 7603 // The base is the reference to the variable. 7604 // BP = &Var. 7605 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7606 if (const auto *VD = 7607 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7608 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7609 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7610 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7611 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7612 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7613 RequiresReference = true; 7614 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7615 } 7616 } 7617 } 7618 7619 // If the variable is a pointer and is being dereferenced (i.e. is not 7620 // the last component), the base has to be the pointer itself, not its 7621 // reference. References are ignored for mapping purposes. 7622 QualType Ty = 7623 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7624 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7625 // No need to generate individual map information for the pointer, it 7626 // can be associated with the combined storage if shared memory mode is 7627 // active or the base declaration is not global variable. 7628 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7629 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7630 !VD || VD->hasLocalStorage()) 7631 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7632 else 7633 FirstPointerInComplexData = true; 7634 ++I; 7635 } 7636 } 7637 7638 // Track whether a component of the list should be marked as MEMBER_OF some 7639 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7640 // in a component list should be marked as MEMBER_OF, all subsequent entries 7641 // do not belong to the base struct. E.g. 7642 // struct S2 s; 7643 // s.ps->ps->ps->f[:] 7644 // (1) (2) (3) (4) 7645 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7646 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7647 // is the pointee of ps(2) which is not member of struct s, so it should not 7648 // be marked as such (it is still PTR_AND_OBJ). 7649 // The variable is initialized to false so that PTR_AND_OBJ entries which 7650 // are not struct members are not considered (e.g. array of pointers to 7651 // data). 7652 bool ShouldBeMemberOf = false; 7653 7654 // Variable keeping track of whether or not we have encountered a component 7655 // in the component list which is a member expression. Useful when we have a 7656 // pointer or a final array section, in which case it is the previous 7657 // component in the list which tells us whether we have a member expression. 7658 // E.g. X.f[:] 7659 // While processing the final array section "[:]" it is "f" which tells us 7660 // whether we are dealing with a member of a declared struct. 7661 const MemberExpr *EncounteredME = nullptr; 7662 7663 // Track for the total number of dimension. Start from one for the dummy 7664 // dimension. 7665 uint64_t DimSize = 1; 7666 7667 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7668 7669 for (; I != CE; ++I) { 7670 // If the current component is member of a struct (parent struct) mark it. 7671 if (!EncounteredME) { 7672 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7673 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7674 // as MEMBER_OF the parent struct. 7675 if (EncounteredME) { 7676 ShouldBeMemberOf = true; 7677 // Do not emit as complex pointer if this is actually not array-like 7678 // expression. 7679 if (FirstPointerInComplexData) { 7680 QualType Ty = std::prev(I) 7681 ->getAssociatedDeclaration() 7682 ->getType() 7683 .getNonReferenceType(); 7684 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7685 FirstPointerInComplexData = false; 7686 } 7687 } 7688 } 7689 7690 auto Next = std::next(I); 7691 7692 // We need to generate the addresses and sizes if this is the last 7693 // component, if the component is a pointer or if it is an array section 7694 // whose length can't be proved to be one. If this is a pointer, it 7695 // becomes the base address for the following components. 7696 7697 // A final array section, is one whose length can't be proved to be one. 7698 // If the map item is non-contiguous then we don't treat any array section 7699 // as final array section. 7700 bool IsFinalArraySection = 7701 !IsNonContiguous && 7702 isFinalArraySectionExpression(I->getAssociatedExpression()); 7703 7704 // If we have a declaration for the mapping use that, otherwise use 7705 // the base declaration of the map clause. 7706 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7707 ? I->getAssociatedDeclaration() 7708 : BaseDecl; 7709 7710 // Get information on whether the element is a pointer. Have to do a 7711 // special treatment for array sections given that they are built-in 7712 // types. 7713 const auto *OASE = 7714 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7715 const auto *OAShE = 7716 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7717 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7718 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7719 bool IsPointer = 7720 OAShE || 7721 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7722 .getCanonicalType() 7723 ->isAnyPointerType()) || 7724 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7725 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7726 7727 if (OASE) 7728 ++DimSize; 7729 7730 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7731 // If this is not the last component, we expect the pointer to be 7732 // associated with an array expression or member expression. 7733 assert((Next == CE || 7734 isa<MemberExpr>(Next->getAssociatedExpression()) || 7735 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7736 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7737 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7738 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7739 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7740 "Unexpected expression"); 7741 7742 Address LB = Address::invalid(); 7743 if (OAShE) { 7744 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7745 CGF.getContext().getTypeAlignInChars( 7746 OAShE->getBase()->getType())); 7747 } else { 7748 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7749 .getAddress(CGF); 7750 } 7751 7752 // If this component is a pointer inside the base struct then we don't 7753 // need to create any entry for it - it will be combined with the object 7754 // it is pointing to into a single PTR_AND_OBJ entry. 7755 bool IsMemberPointerOrAddr = 7756 (IsPointer || ForDeviceAddr) && EncounteredME && 7757 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7758 EncounteredME); 7759 if (!OverlappedElements.empty() && Next == CE) { 7760 // Handle base element with the info for overlapped elements. 7761 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7762 assert(!IsPointer && 7763 "Unexpected base element with the pointer type."); 7764 // Mark the whole struct as the struct that requires allocation on the 7765 // device. 7766 PartialStruct.LowestElem = {0, LB}; 7767 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7768 I->getAssociatedExpression()->getType()); 7769 Address HB = CGF.Builder.CreateConstGEP( 7770 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7771 CGF.VoidPtrTy), 7772 TypeSize.getQuantity() - 1); 7773 PartialStruct.HighestElem = { 7774 std::numeric_limits<decltype( 7775 PartialStruct.HighestElem.first)>::max(), 7776 HB}; 7777 PartialStruct.Base = BP; 7778 PartialStruct.LB = LB; 7779 assert( 7780 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7781 "Overlapped elements must be used only once for the variable."); 7782 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7783 // Emit data for non-overlapped data. 7784 OpenMPOffloadMappingFlags Flags = 7785 OMP_MAP_MEMBER_OF | 7786 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7787 /*AddPtrFlag=*/false, 7788 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7789 llvm::Value *Size = nullptr; 7790 // Do bitcopy of all non-overlapped structure elements. 7791 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7792 Component : OverlappedElements) { 7793 Address ComponentLB = Address::invalid(); 7794 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7795 Component) { 7796 if (MC.getAssociatedDeclaration()) { 7797 ComponentLB = 7798 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7799 .getAddress(CGF); 7800 Size = CGF.Builder.CreatePtrDiff( 7801 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7802 CGF.EmitCastToVoidPtr(LB.getPointer())); 7803 break; 7804 } 7805 } 7806 assert(Size && "Failed to determine structure size"); 7807 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7808 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7809 CombinedInfo.Pointers.push_back(LB.getPointer()); 7810 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7811 Size, CGF.Int64Ty, /*isSigned=*/true)); 7812 CombinedInfo.Types.push_back(Flags); 7813 CombinedInfo.Mappers.push_back(nullptr); 7814 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7815 : 1); 7816 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7817 } 7818 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7819 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7820 CombinedInfo.Pointers.push_back(LB.getPointer()); 7821 Size = CGF.Builder.CreatePtrDiff( 7822 CGF.EmitCastToVoidPtr( 7823 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7824 CGF.EmitCastToVoidPtr(LB.getPointer())); 7825 CombinedInfo.Sizes.push_back( 7826 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7827 CombinedInfo.Types.push_back(Flags); 7828 CombinedInfo.Mappers.push_back(nullptr); 7829 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7830 : 1); 7831 break; 7832 } 7833 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7834 if (!IsMemberPointerOrAddr || 7835 (Next == CE && MapType != OMPC_MAP_unknown)) { 7836 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7837 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7838 CombinedInfo.Pointers.push_back(LB.getPointer()); 7839 CombinedInfo.Sizes.push_back( 7840 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7841 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7842 : 1); 7843 7844 // If Mapper is valid, the last component inherits the mapper. 7845 bool HasMapper = Mapper && Next == CE; 7846 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7847 7848 // We need to add a pointer flag for each map that comes from the 7849 // same expression except for the first one. We also need to signal 7850 // this map is the first one that relates with the current capture 7851 // (there is a set of entries for each capture). 7852 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7853 MapType, MapModifiers, MotionModifiers, IsImplicit, 7854 !IsExpressionFirstInfo || RequiresReference || 7855 FirstPointerInComplexData, 7856 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7857 7858 if (!IsExpressionFirstInfo) { 7859 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7860 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7861 if (IsPointer) 7862 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7863 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7864 7865 if (ShouldBeMemberOf) { 7866 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7867 // should be later updated with the correct value of MEMBER_OF. 7868 Flags |= OMP_MAP_MEMBER_OF; 7869 // From now on, all subsequent PTR_AND_OBJ entries should not be 7870 // marked as MEMBER_OF. 7871 ShouldBeMemberOf = false; 7872 } 7873 } 7874 7875 CombinedInfo.Types.push_back(Flags); 7876 } 7877 7878 // If we have encountered a member expression so far, keep track of the 7879 // mapped member. If the parent is "*this", then the value declaration 7880 // is nullptr. 7881 if (EncounteredME) { 7882 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7883 unsigned FieldIndex = FD->getFieldIndex(); 7884 7885 // Update info about the lowest and highest elements for this struct 7886 if (!PartialStruct.Base.isValid()) { 7887 PartialStruct.LowestElem = {FieldIndex, LB}; 7888 if (IsFinalArraySection) { 7889 Address HB = 7890 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7891 .getAddress(CGF); 7892 PartialStruct.HighestElem = {FieldIndex, HB}; 7893 } else { 7894 PartialStruct.HighestElem = {FieldIndex, LB}; 7895 } 7896 PartialStruct.Base = BP; 7897 PartialStruct.LB = BP; 7898 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7899 PartialStruct.LowestElem = {FieldIndex, LB}; 7900 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7901 PartialStruct.HighestElem = {FieldIndex, LB}; 7902 } 7903 } 7904 7905 // Need to emit combined struct for array sections. 7906 if (IsFinalArraySection || IsNonContiguous) 7907 PartialStruct.IsArraySection = true; 7908 7909 // If we have a final array section, we are done with this expression. 7910 if (IsFinalArraySection) 7911 break; 7912 7913 // The pointer becomes the base for the next element. 7914 if (Next != CE) 7915 BP = LB; 7916 7917 IsExpressionFirstInfo = false; 7918 IsCaptureFirstInfo = false; 7919 FirstPointerInComplexData = false; 7920 } else if (FirstPointerInComplexData) { 7921 QualType Ty = Components.rbegin() 7922 ->getAssociatedDeclaration() 7923 ->getType() 7924 .getNonReferenceType(); 7925 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7926 FirstPointerInComplexData = false; 7927 } 7928 } 7929 // If ran into the whole component - allocate the space for the whole 7930 // record. 7931 if (!EncounteredME) 7932 PartialStruct.HasCompleteRecord = true; 7933 7934 if (!IsNonContiguous) 7935 return; 7936 7937 const ASTContext &Context = CGF.getContext(); 7938 7939 // For supporting stride in array section, we need to initialize the first 7940 // dimension size as 1, first offset as 0, and first count as 1 7941 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7942 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7943 MapValuesArrayTy CurStrides; 7944 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7945 uint64_t ElementTypeSize; 7946 7947 // Collect Size information for each dimension and get the element size as 7948 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7949 // should be [10, 10] and the first stride is 4 btyes. 7950 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7951 Components) { 7952 const Expr *AssocExpr = Component.getAssociatedExpression(); 7953 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7954 7955 if (!OASE) 7956 continue; 7957 7958 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7959 auto *CAT = Context.getAsConstantArrayType(Ty); 7960 auto *VAT = Context.getAsVariableArrayType(Ty); 7961 7962 // We need all the dimension size except for the last dimension. 7963 assert((VAT || CAT || &Component == &*Components.begin()) && 7964 "Should be either ConstantArray or VariableArray if not the " 7965 "first Component"); 7966 7967 // Get element size if CurStrides is empty. 7968 if (CurStrides.empty()) { 7969 const Type *ElementType = nullptr; 7970 if (CAT) 7971 ElementType = CAT->getElementType().getTypePtr(); 7972 else if (VAT) 7973 ElementType = VAT->getElementType().getTypePtr(); 7974 else 7975 assert(&Component == &*Components.begin() && 7976 "Only expect pointer (non CAT or VAT) when this is the " 7977 "first Component"); 7978 // If ElementType is null, then it means the base is a pointer 7979 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7980 // for next iteration. 7981 if (ElementType) { 7982 // For the case that having pointer as base, we need to remove one 7983 // level of indirection. 7984 if (&Component != &*Components.begin()) 7985 ElementType = ElementType->getPointeeOrArrayElementType(); 7986 ElementTypeSize = 7987 Context.getTypeSizeInChars(ElementType).getQuantity(); 7988 CurStrides.push_back( 7989 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7990 } 7991 } 7992 // Get dimension value except for the last dimension since we don't need 7993 // it. 7994 if (DimSizes.size() < Components.size() - 1) { 7995 if (CAT) 7996 DimSizes.push_back(llvm::ConstantInt::get( 7997 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7998 else if (VAT) 7999 DimSizes.push_back(CGF.Builder.CreateIntCast( 8000 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8001 /*IsSigned=*/false)); 8002 } 8003 } 8004 8005 // Skip the dummy dimension since we have already have its information. 8006 auto DI = DimSizes.begin() + 1; 8007 // Product of dimension. 8008 llvm::Value *DimProd = 8009 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8010 8011 // Collect info for non-contiguous. Notice that offset, count, and stride 8012 // are only meaningful for array-section, so we insert a null for anything 8013 // other than array-section. 8014 // Also, the size of offset, count, and stride are not the same as 8015 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8016 // count, and stride are the same as the number of non-contiguous 8017 // declaration in target update to/from clause. 8018 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8019 Components) { 8020 const Expr *AssocExpr = Component.getAssociatedExpression(); 8021 8022 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8023 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8024 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8025 /*isSigned=*/false); 8026 CurOffsets.push_back(Offset); 8027 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8028 CurStrides.push_back(CurStrides.back()); 8029 continue; 8030 } 8031 8032 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8033 8034 if (!OASE) 8035 continue; 8036 8037 // Offset 8038 const Expr *OffsetExpr = OASE->getLowerBound(); 8039 llvm::Value *Offset = nullptr; 8040 if (!OffsetExpr) { 8041 // If offset is absent, then we just set it to zero. 8042 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8043 } else { 8044 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8045 CGF.Int64Ty, 8046 /*isSigned=*/false); 8047 } 8048 CurOffsets.push_back(Offset); 8049 8050 // Count 8051 const Expr *CountExpr = OASE->getLength(); 8052 llvm::Value *Count = nullptr; 8053 if (!CountExpr) { 8054 // In Clang, once a high dimension is an array section, we construct all 8055 // the lower dimension as array section, however, for case like 8056 // arr[0:2][2], Clang construct the inner dimension as an array section 8057 // but it actually is not in an array section form according to spec. 8058 if (!OASE->getColonLocFirst().isValid() && 8059 !OASE->getColonLocSecond().isValid()) { 8060 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8061 } else { 8062 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8063 // When the length is absent it defaults to ⌈(size − 8064 // lower-bound)/stride⌉, where size is the size of the array 8065 // dimension. 8066 const Expr *StrideExpr = OASE->getStride(); 8067 llvm::Value *Stride = 8068 StrideExpr 8069 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8070 CGF.Int64Ty, /*isSigned=*/false) 8071 : nullptr; 8072 if (Stride) 8073 Count = CGF.Builder.CreateUDiv( 8074 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8075 else 8076 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8077 } 8078 } else { 8079 Count = CGF.EmitScalarExpr(CountExpr); 8080 } 8081 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8082 CurCounts.push_back(Count); 8083 8084 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8085 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8086 // Offset Count Stride 8087 // D0 0 1 4 (int) <- dummy dimension 8088 // D1 0 2 8 (2 * (1) * 4) 8089 // D2 1 2 20 (1 * (1 * 5) * 4) 8090 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8091 const Expr *StrideExpr = OASE->getStride(); 8092 llvm::Value *Stride = 8093 StrideExpr 8094 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8095 CGF.Int64Ty, /*isSigned=*/false) 8096 : nullptr; 8097 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8098 if (Stride) 8099 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8100 else 8101 CurStrides.push_back(DimProd); 8102 if (DI != DimSizes.end()) 8103 ++DI; 8104 } 8105 8106 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8107 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8108 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8109 } 8110 8111 /// Return the adjusted map modifiers if the declaration a capture refers to 8112 /// appears in a first-private clause. This is expected to be used only with 8113 /// directives that start with 'target'. 8114 MappableExprsHandler::OpenMPOffloadMappingFlags 8115 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8116 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8117 8118 // A first private variable captured by reference will use only the 8119 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8120 // declaration is known as first-private in this handler. 8121 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8122 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8123 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8124 return MappableExprsHandler::OMP_MAP_ALWAYS | 8125 MappableExprsHandler::OMP_MAP_TO; 8126 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8127 return MappableExprsHandler::OMP_MAP_TO | 8128 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8129 return MappableExprsHandler::OMP_MAP_PRIVATE | 8130 MappableExprsHandler::OMP_MAP_TO; 8131 } 8132 return MappableExprsHandler::OMP_MAP_TO | 8133 MappableExprsHandler::OMP_MAP_FROM; 8134 } 8135 8136 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8137 // Rotate by getFlagMemberOffset() bits. 8138 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8139 << getFlagMemberOffset()); 8140 } 8141 8142 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8143 OpenMPOffloadMappingFlags MemberOfFlag) { 8144 // If the entry is PTR_AND_OBJ but has not been marked with the special 8145 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8146 // marked as MEMBER_OF. 8147 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8148 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8149 return; 8150 8151 // Reset the placeholder value to prepare the flag for the assignment of the 8152 // proper MEMBER_OF value. 8153 Flags &= ~OMP_MAP_MEMBER_OF; 8154 Flags |= MemberOfFlag; 8155 } 8156 8157 void getPlainLayout(const CXXRecordDecl *RD, 8158 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8159 bool AsBase) const { 8160 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8161 8162 llvm::StructType *St = 8163 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8164 8165 unsigned NumElements = St->getNumElements(); 8166 llvm::SmallVector< 8167 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8168 RecordLayout(NumElements); 8169 8170 // Fill bases. 8171 for (const auto &I : RD->bases()) { 8172 if (I.isVirtual()) 8173 continue; 8174 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8175 // Ignore empty bases. 8176 if (Base->isEmpty() || CGF.getContext() 8177 .getASTRecordLayout(Base) 8178 .getNonVirtualSize() 8179 .isZero()) 8180 continue; 8181 8182 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8183 RecordLayout[FieldIndex] = Base; 8184 } 8185 // Fill in virtual bases. 8186 for (const auto &I : RD->vbases()) { 8187 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8188 // Ignore empty bases. 8189 if (Base->isEmpty()) 8190 continue; 8191 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8192 if (RecordLayout[FieldIndex]) 8193 continue; 8194 RecordLayout[FieldIndex] = Base; 8195 } 8196 // Fill in all the fields. 8197 assert(!RD->isUnion() && "Unexpected union."); 8198 for (const auto *Field : RD->fields()) { 8199 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8200 // will fill in later.) 8201 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8202 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8203 RecordLayout[FieldIndex] = Field; 8204 } 8205 } 8206 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8207 &Data : RecordLayout) { 8208 if (Data.isNull()) 8209 continue; 8210 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8211 getPlainLayout(Base, Layout, /*AsBase=*/true); 8212 else 8213 Layout.push_back(Data.get<const FieldDecl *>()); 8214 } 8215 } 8216 8217 /// Generate all the base pointers, section pointers, sizes, map types, and 8218 /// mappers for the extracted mappable expressions (all included in \a 8219 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8220 /// pair of the relevant declaration and index where it occurs is appended to 8221 /// the device pointers info array. 8222 void generateAllInfoForClauses( 8223 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8224 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8225 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8226 // We have to process the component lists that relate with the same 8227 // declaration in a single chunk so that we can generate the map flags 8228 // correctly. Therefore, we organize all lists in a map. 8229 enum MapKind { Present, Allocs, Other, Total }; 8230 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8231 SmallVector<SmallVector<MapInfo, 8>, 4>> 8232 Info; 8233 8234 // Helper function to fill the information map for the different supported 8235 // clauses. 8236 auto &&InfoGen = 8237 [&Info, &SkipVarSet]( 8238 const ValueDecl *D, MapKind Kind, 8239 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8240 OpenMPMapClauseKind MapType, 8241 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8242 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8243 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8244 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8245 if (SkipVarSet.contains(D)) 8246 return; 8247 auto It = Info.find(D); 8248 if (It == Info.end()) 8249 It = Info 8250 .insert(std::make_pair( 8251 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8252 .first; 8253 It->second[Kind].emplace_back( 8254 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8255 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8256 }; 8257 8258 for (const auto *Cl : Clauses) { 8259 const auto *C = dyn_cast<OMPMapClause>(Cl); 8260 if (!C) 8261 continue; 8262 MapKind Kind = Other; 8263 if (!C->getMapTypeModifiers().empty() && 8264 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8265 return K == OMPC_MAP_MODIFIER_present; 8266 })) 8267 Kind = Present; 8268 else if (C->getMapType() == OMPC_MAP_alloc) 8269 Kind = Allocs; 8270 const auto *EI = C->getVarRefs().begin(); 8271 for (const auto L : C->component_lists()) { 8272 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8273 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8274 C->getMapTypeModifiers(), llvm::None, 8275 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8276 E); 8277 ++EI; 8278 } 8279 } 8280 for (const auto *Cl : Clauses) { 8281 const auto *C = dyn_cast<OMPToClause>(Cl); 8282 if (!C) 8283 continue; 8284 MapKind Kind = Other; 8285 if (!C->getMotionModifiers().empty() && 8286 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8287 return K == OMPC_MOTION_MODIFIER_present; 8288 })) 8289 Kind = Present; 8290 const auto *EI = C->getVarRefs().begin(); 8291 for (const auto L : C->component_lists()) { 8292 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8293 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8294 C->isImplicit(), std::get<2>(L), *EI); 8295 ++EI; 8296 } 8297 } 8298 for (const auto *Cl : Clauses) { 8299 const auto *C = dyn_cast<OMPFromClause>(Cl); 8300 if (!C) 8301 continue; 8302 MapKind Kind = Other; 8303 if (!C->getMotionModifiers().empty() && 8304 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8305 return K == OMPC_MOTION_MODIFIER_present; 8306 })) 8307 Kind = Present; 8308 const auto *EI = C->getVarRefs().begin(); 8309 for (const auto L : C->component_lists()) { 8310 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8311 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8312 C->isImplicit(), std::get<2>(L), *EI); 8313 ++EI; 8314 } 8315 } 8316 8317 // Look at the use_device_ptr clause information and mark the existing map 8318 // entries as such. If there is no map information for an entry in the 8319 // use_device_ptr list, we create one with map type 'alloc' and zero size 8320 // section. It is the user fault if that was not mapped before. If there is 8321 // no map information and the pointer is a struct member, then we defer the 8322 // emission of that entry until the whole struct has been processed. 8323 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8324 SmallVector<DeferredDevicePtrEntryTy, 4>> 8325 DeferredInfo; 8326 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8327 8328 for (const auto *Cl : Clauses) { 8329 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8330 if (!C) 8331 continue; 8332 for (const auto L : C->component_lists()) { 8333 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8334 std::get<1>(L); 8335 assert(!Components.empty() && 8336 "Not expecting empty list of components!"); 8337 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8338 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8339 const Expr *IE = Components.back().getAssociatedExpression(); 8340 // If the first component is a member expression, we have to look into 8341 // 'this', which maps to null in the map of map information. Otherwise 8342 // look directly for the information. 8343 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8344 8345 // We potentially have map information for this declaration already. 8346 // Look for the first set of components that refer to it. 8347 if (It != Info.end()) { 8348 bool Found = false; 8349 for (auto &Data : It->second) { 8350 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8351 return MI.Components.back().getAssociatedDeclaration() == VD; 8352 }); 8353 // If we found a map entry, signal that the pointer has to be 8354 // returned and move on to the next declaration. Exclude cases where 8355 // the base pointer is mapped as array subscript, array section or 8356 // array shaping. The base address is passed as a pointer to base in 8357 // this case and cannot be used as a base for use_device_ptr list 8358 // item. 8359 if (CI != Data.end()) { 8360 auto PrevCI = std::next(CI->Components.rbegin()); 8361 const auto *VarD = dyn_cast<VarDecl>(VD); 8362 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8363 isa<MemberExpr>(IE) || 8364 !VD->getType().getNonReferenceType()->isPointerType() || 8365 PrevCI == CI->Components.rend() || 8366 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8367 VarD->hasLocalStorage()) { 8368 CI->ReturnDevicePointer = true; 8369 Found = true; 8370 break; 8371 } 8372 } 8373 } 8374 if (Found) 8375 continue; 8376 } 8377 8378 // We didn't find any match in our map information - generate a zero 8379 // size array section - if the pointer is a struct member we defer this 8380 // action until the whole struct has been processed. 8381 if (isa<MemberExpr>(IE)) { 8382 // Insert the pointer into Info to be processed by 8383 // generateInfoForComponentList. Because it is a member pointer 8384 // without a pointee, no entry will be generated for it, therefore 8385 // we need to generate one after the whole struct has been processed. 8386 // Nonetheless, generateInfoForComponentList must be called to take 8387 // the pointer into account for the calculation of the range of the 8388 // partial struct. 8389 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8390 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8391 nullptr); 8392 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8393 } else { 8394 llvm::Value *Ptr = 8395 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8396 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8397 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8398 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8399 UseDevicePtrCombinedInfo.Sizes.push_back( 8400 llvm::Constant::getNullValue(CGF.Int64Ty)); 8401 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8402 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8403 } 8404 } 8405 } 8406 8407 // Look at the use_device_addr clause information and mark the existing map 8408 // entries as such. If there is no map information for an entry in the 8409 // use_device_addr list, we create one with map type 'alloc' and zero size 8410 // section. It is the user fault if that was not mapped before. If there is 8411 // no map information and the pointer is a struct member, then we defer the 8412 // emission of that entry until the whole struct has been processed. 8413 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8414 for (const auto *Cl : Clauses) { 8415 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8416 if (!C) 8417 continue; 8418 for (const auto L : C->component_lists()) { 8419 assert(!std::get<1>(L).empty() && 8420 "Not expecting empty list of components!"); 8421 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8422 if (!Processed.insert(VD).second) 8423 continue; 8424 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8425 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8426 // If the first component is a member expression, we have to look into 8427 // 'this', which maps to null in the map of map information. Otherwise 8428 // look directly for the information. 8429 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8430 8431 // We potentially have map information for this declaration already. 8432 // Look for the first set of components that refer to it. 8433 if (It != Info.end()) { 8434 bool Found = false; 8435 for (auto &Data : It->second) { 8436 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8437 return MI.Components.back().getAssociatedDeclaration() == VD; 8438 }); 8439 // If we found a map entry, signal that the pointer has to be 8440 // returned and move on to the next declaration. 8441 if (CI != Data.end()) { 8442 CI->ReturnDevicePointer = true; 8443 Found = true; 8444 break; 8445 } 8446 } 8447 if (Found) 8448 continue; 8449 } 8450 8451 // We didn't find any match in our map information - generate a zero 8452 // size array section - if the pointer is a struct member we defer this 8453 // action until the whole struct has been processed. 8454 if (isa<MemberExpr>(IE)) { 8455 // Insert the pointer into Info to be processed by 8456 // generateInfoForComponentList. Because it is a member pointer 8457 // without a pointee, no entry will be generated for it, therefore 8458 // we need to generate one after the whole struct has been processed. 8459 // Nonetheless, generateInfoForComponentList must be called to take 8460 // the pointer into account for the calculation of the range of the 8461 // partial struct. 8462 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8463 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8464 nullptr, nullptr, /*ForDeviceAddr=*/true); 8465 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8466 } else { 8467 llvm::Value *Ptr; 8468 if (IE->isGLValue()) 8469 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8470 else 8471 Ptr = CGF.EmitScalarExpr(IE); 8472 CombinedInfo.Exprs.push_back(VD); 8473 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8474 CombinedInfo.Pointers.push_back(Ptr); 8475 CombinedInfo.Sizes.push_back( 8476 llvm::Constant::getNullValue(CGF.Int64Ty)); 8477 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8478 CombinedInfo.Mappers.push_back(nullptr); 8479 } 8480 } 8481 } 8482 8483 for (const auto &Data : Info) { 8484 StructRangeInfoTy PartialStruct; 8485 // Temporary generated information. 8486 MapCombinedInfoTy CurInfo; 8487 const Decl *D = Data.first; 8488 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8489 for (const auto &M : Data.second) { 8490 for (const MapInfo &L : M) { 8491 assert(!L.Components.empty() && 8492 "Not expecting declaration with no component lists."); 8493 8494 // Remember the current base pointer index. 8495 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8496 CurInfo.NonContigInfo.IsNonContiguous = 8497 L.Components.back().isNonContiguous(); 8498 generateInfoForComponentList( 8499 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8500 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8501 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8502 8503 // If this entry relates with a device pointer, set the relevant 8504 // declaration and add the 'return pointer' flag. 8505 if (L.ReturnDevicePointer) { 8506 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8507 "Unexpected number of mapped base pointers."); 8508 8509 const ValueDecl *RelevantVD = 8510 L.Components.back().getAssociatedDeclaration(); 8511 assert(RelevantVD && 8512 "No relevant declaration related with device pointer??"); 8513 8514 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8515 RelevantVD); 8516 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8517 } 8518 } 8519 } 8520 8521 // Append any pending zero-length pointers which are struct members and 8522 // used with use_device_ptr or use_device_addr. 8523 auto CI = DeferredInfo.find(Data.first); 8524 if (CI != DeferredInfo.end()) { 8525 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8526 llvm::Value *BasePtr; 8527 llvm::Value *Ptr; 8528 if (L.ForDeviceAddr) { 8529 if (L.IE->isGLValue()) 8530 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8531 else 8532 Ptr = this->CGF.EmitScalarExpr(L.IE); 8533 BasePtr = Ptr; 8534 // Entry is RETURN_PARAM. Also, set the placeholder value 8535 // MEMBER_OF=FFFF so that the entry is later updated with the 8536 // correct value of MEMBER_OF. 8537 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8538 } else { 8539 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8540 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8541 L.IE->getExprLoc()); 8542 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8543 // placeholder value MEMBER_OF=FFFF so that the entry is later 8544 // updated with the correct value of MEMBER_OF. 8545 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8546 OMP_MAP_MEMBER_OF); 8547 } 8548 CurInfo.Exprs.push_back(L.VD); 8549 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8550 CurInfo.Pointers.push_back(Ptr); 8551 CurInfo.Sizes.push_back( 8552 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8553 CurInfo.Mappers.push_back(nullptr); 8554 } 8555 } 8556 // If there is an entry in PartialStruct it means we have a struct with 8557 // individual members mapped. Emit an extra combined entry. 8558 if (PartialStruct.Base.isValid()) { 8559 CurInfo.NonContigInfo.Dims.push_back(0); 8560 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8561 } 8562 8563 // We need to append the results of this capture to what we already 8564 // have. 8565 CombinedInfo.append(CurInfo); 8566 } 8567 // Append data for use_device_ptr clauses. 8568 CombinedInfo.append(UseDevicePtrCombinedInfo); 8569 } 8570 8571 public: 8572 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8573 : CurDir(&Dir), CGF(CGF) { 8574 // Extract firstprivate clause information. 8575 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8576 for (const auto *D : C->varlists()) 8577 FirstPrivateDecls.try_emplace( 8578 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8579 // Extract implicit firstprivates from uses_allocators clauses. 8580 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8581 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8582 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8583 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8584 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8585 /*Implicit=*/true); 8586 else if (const auto *VD = dyn_cast<VarDecl>( 8587 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8588 ->getDecl())) 8589 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8590 } 8591 } 8592 // Extract device pointer clause information. 8593 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8594 for (auto L : C->component_lists()) 8595 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8596 } 8597 8598 /// Constructor for the declare mapper directive. 8599 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8600 : CurDir(&Dir), CGF(CGF) {} 8601 8602 /// Generate code for the combined entry if we have a partially mapped struct 8603 /// and take care of the mapping flags of the arguments corresponding to 8604 /// individual struct members. 8605 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8606 MapFlagsArrayTy &CurTypes, 8607 const StructRangeInfoTy &PartialStruct, 8608 const ValueDecl *VD = nullptr, 8609 bool NotTargetParams = true) const { 8610 if (CurTypes.size() == 1 && 8611 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8612 !PartialStruct.IsArraySection) 8613 return; 8614 Address LBAddr = PartialStruct.LowestElem.second; 8615 Address HBAddr = PartialStruct.HighestElem.second; 8616 if (PartialStruct.HasCompleteRecord) { 8617 LBAddr = PartialStruct.LB; 8618 HBAddr = PartialStruct.LB; 8619 } 8620 CombinedInfo.Exprs.push_back(VD); 8621 // Base is the base of the struct 8622 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8623 // Pointer is the address of the lowest element 8624 llvm::Value *LB = LBAddr.getPointer(); 8625 CombinedInfo.Pointers.push_back(LB); 8626 // There should not be a mapper for a combined entry. 8627 CombinedInfo.Mappers.push_back(nullptr); 8628 // Size is (addr of {highest+1} element) - (addr of lowest element) 8629 llvm::Value *HB = HBAddr.getPointer(); 8630 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8631 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8632 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8633 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8634 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8635 /*isSigned=*/false); 8636 CombinedInfo.Sizes.push_back(Size); 8637 // Map type is always TARGET_PARAM, if generate info for captures. 8638 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8639 : OMP_MAP_TARGET_PARAM); 8640 // If any element has the present modifier, then make sure the runtime 8641 // doesn't attempt to allocate the struct. 8642 if (CurTypes.end() != 8643 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8644 return Type & OMP_MAP_PRESENT; 8645 })) 8646 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8647 // Remove TARGET_PARAM flag from the first element 8648 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8649 8650 // All other current entries will be MEMBER_OF the combined entry 8651 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8652 // 0xFFFF in the MEMBER_OF field). 8653 OpenMPOffloadMappingFlags MemberOfFlag = 8654 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8655 for (auto &M : CurTypes) 8656 setCorrectMemberOfFlag(M, MemberOfFlag); 8657 } 8658 8659 /// Generate all the base pointers, section pointers, sizes, map types, and 8660 /// mappers for the extracted mappable expressions (all included in \a 8661 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8662 /// pair of the relevant declaration and index where it occurs is appended to 8663 /// the device pointers info array. 8664 void generateAllInfo( 8665 MapCombinedInfoTy &CombinedInfo, 8666 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8667 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8668 assert(CurDir.is<const OMPExecutableDirective *>() && 8669 "Expect a executable directive"); 8670 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8671 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8672 } 8673 8674 /// Generate all the base pointers, section pointers, sizes, map types, and 8675 /// mappers for the extracted map clauses of user-defined mapper (all included 8676 /// in \a CombinedInfo). 8677 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8678 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8679 "Expect a declare mapper directive"); 8680 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8681 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8682 } 8683 8684 /// Emit capture info for lambdas for variables captured by reference. 8685 void generateInfoForLambdaCaptures( 8686 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8687 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8688 const auto *RD = VD->getType() 8689 .getCanonicalType() 8690 .getNonReferenceType() 8691 ->getAsCXXRecordDecl(); 8692 if (!RD || !RD->isLambda()) 8693 return; 8694 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8695 LValue VDLVal = CGF.MakeAddrLValue( 8696 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8697 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8698 FieldDecl *ThisCapture = nullptr; 8699 RD->getCaptureFields(Captures, ThisCapture); 8700 if (ThisCapture) { 8701 LValue ThisLVal = 8702 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8703 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8704 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8705 VDLVal.getPointer(CGF)); 8706 CombinedInfo.Exprs.push_back(VD); 8707 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8708 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8709 CombinedInfo.Sizes.push_back( 8710 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8711 CGF.Int64Ty, /*isSigned=*/true)); 8712 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8713 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8714 CombinedInfo.Mappers.push_back(nullptr); 8715 } 8716 for (const LambdaCapture &LC : RD->captures()) { 8717 if (!LC.capturesVariable()) 8718 continue; 8719 const VarDecl *VD = LC.getCapturedVar(); 8720 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8721 continue; 8722 auto It = Captures.find(VD); 8723 assert(It != Captures.end() && "Found lambda capture without field."); 8724 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8725 if (LC.getCaptureKind() == LCK_ByRef) { 8726 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8727 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8728 VDLVal.getPointer(CGF)); 8729 CombinedInfo.Exprs.push_back(VD); 8730 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8731 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8732 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8733 CGF.getTypeSize( 8734 VD->getType().getCanonicalType().getNonReferenceType()), 8735 CGF.Int64Ty, /*isSigned=*/true)); 8736 } else { 8737 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8738 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8739 VDLVal.getPointer(CGF)); 8740 CombinedInfo.Exprs.push_back(VD); 8741 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8742 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8743 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8744 } 8745 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8746 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8747 CombinedInfo.Mappers.push_back(nullptr); 8748 } 8749 } 8750 8751 /// Set correct indices for lambdas captures. 8752 void adjustMemberOfForLambdaCaptures( 8753 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8754 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8755 MapFlagsArrayTy &Types) const { 8756 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8757 // Set correct member_of idx for all implicit lambda captures. 8758 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8759 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8760 continue; 8761 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8762 assert(BasePtr && "Unable to find base lambda address."); 8763 int TgtIdx = -1; 8764 for (unsigned J = I; J > 0; --J) { 8765 unsigned Idx = J - 1; 8766 if (Pointers[Idx] != BasePtr) 8767 continue; 8768 TgtIdx = Idx; 8769 break; 8770 } 8771 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8772 // All other current entries will be MEMBER_OF the combined entry 8773 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8774 // 0xFFFF in the MEMBER_OF field). 8775 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8776 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8777 } 8778 } 8779 8780 /// Generate the base pointers, section pointers, sizes, map types, and 8781 /// mappers associated to a given capture (all included in \a CombinedInfo). 8782 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8783 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8784 StructRangeInfoTy &PartialStruct) const { 8785 assert(!Cap->capturesVariableArrayType() && 8786 "Not expecting to generate map info for a variable array type!"); 8787 8788 // We need to know when we generating information for the first component 8789 const ValueDecl *VD = Cap->capturesThis() 8790 ? nullptr 8791 : Cap->getCapturedVar()->getCanonicalDecl(); 8792 8793 // If this declaration appears in a is_device_ptr clause we just have to 8794 // pass the pointer by value. If it is a reference to a declaration, we just 8795 // pass its value. 8796 if (DevPointersMap.count(VD)) { 8797 CombinedInfo.Exprs.push_back(VD); 8798 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8799 CombinedInfo.Pointers.push_back(Arg); 8800 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8801 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8802 /*isSigned=*/true)); 8803 CombinedInfo.Types.push_back( 8804 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8805 OMP_MAP_TARGET_PARAM); 8806 CombinedInfo.Mappers.push_back(nullptr); 8807 return; 8808 } 8809 8810 using MapData = 8811 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8812 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8813 const ValueDecl *, const Expr *>; 8814 SmallVector<MapData, 4> DeclComponentLists; 8815 assert(CurDir.is<const OMPExecutableDirective *>() && 8816 "Expect a executable directive"); 8817 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8818 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8819 const auto *EI = C->getVarRefs().begin(); 8820 for (const auto L : C->decl_component_lists(VD)) { 8821 const ValueDecl *VDecl, *Mapper; 8822 // The Expression is not correct if the mapping is implicit 8823 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8824 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8825 std::tie(VDecl, Components, Mapper) = L; 8826 assert(VDecl == VD && "We got information for the wrong declaration??"); 8827 assert(!Components.empty() && 8828 "Not expecting declaration with no component lists."); 8829 DeclComponentLists.emplace_back(Components, C->getMapType(), 8830 C->getMapTypeModifiers(), 8831 C->isImplicit(), Mapper, E); 8832 ++EI; 8833 } 8834 } 8835 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8836 const MapData &RHS) { 8837 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8838 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8839 bool HasPresent = !MapModifiers.empty() && 8840 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8841 return K == clang::OMPC_MAP_MODIFIER_present; 8842 }); 8843 bool HasAllocs = MapType == OMPC_MAP_alloc; 8844 MapModifiers = std::get<2>(RHS); 8845 MapType = std::get<1>(LHS); 8846 bool HasPresentR = 8847 !MapModifiers.empty() && 8848 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8849 return K == clang::OMPC_MAP_MODIFIER_present; 8850 }); 8851 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8852 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8853 }); 8854 8855 // Find overlapping elements (including the offset from the base element). 8856 llvm::SmallDenseMap< 8857 const MapData *, 8858 llvm::SmallVector< 8859 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8860 4> 8861 OverlappedData; 8862 size_t Count = 0; 8863 for (const MapData &L : DeclComponentLists) { 8864 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8865 OpenMPMapClauseKind MapType; 8866 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8867 bool IsImplicit; 8868 const ValueDecl *Mapper; 8869 const Expr *VarRef; 8870 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8871 L; 8872 ++Count; 8873 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8874 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8875 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8876 VarRef) = L1; 8877 auto CI = Components.rbegin(); 8878 auto CE = Components.rend(); 8879 auto SI = Components1.rbegin(); 8880 auto SE = Components1.rend(); 8881 for (; CI != CE && SI != SE; ++CI, ++SI) { 8882 if (CI->getAssociatedExpression()->getStmtClass() != 8883 SI->getAssociatedExpression()->getStmtClass()) 8884 break; 8885 // Are we dealing with different variables/fields? 8886 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8887 break; 8888 } 8889 // Found overlapping if, at least for one component, reached the head 8890 // of the components list. 8891 if (CI == CE || SI == SE) { 8892 // Ignore it if it is the same component. 8893 if (CI == CE && SI == SE) 8894 continue; 8895 const auto It = (SI == SE) ? CI : SI; 8896 // If one component is a pointer and another one is a kind of 8897 // dereference of this pointer (array subscript, section, dereference, 8898 // etc.), it is not an overlapping. 8899 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8900 std::prev(It) 8901 ->getAssociatedExpression() 8902 ->getType() 8903 .getNonReferenceType() 8904 ->isPointerType()) 8905 continue; 8906 const MapData &BaseData = CI == CE ? L : L1; 8907 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8908 SI == SE ? Components : Components1; 8909 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8910 OverlappedElements.getSecond().push_back(SubData); 8911 } 8912 } 8913 } 8914 // Sort the overlapped elements for each item. 8915 llvm::SmallVector<const FieldDecl *, 4> Layout; 8916 if (!OverlappedData.empty()) { 8917 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8918 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8919 while (BaseType != OrigType) { 8920 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8921 OrigType = BaseType->getPointeeOrArrayElementType(); 8922 } 8923 8924 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8925 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8926 else { 8927 const auto *RD = BaseType->getAsRecordDecl(); 8928 Layout.append(RD->field_begin(), RD->field_end()); 8929 } 8930 } 8931 for (auto &Pair : OverlappedData) { 8932 llvm::stable_sort( 8933 Pair.getSecond(), 8934 [&Layout]( 8935 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8936 OMPClauseMappableExprCommon::MappableExprComponentListRef 8937 Second) { 8938 auto CI = First.rbegin(); 8939 auto CE = First.rend(); 8940 auto SI = Second.rbegin(); 8941 auto SE = Second.rend(); 8942 for (; CI != CE && SI != SE; ++CI, ++SI) { 8943 if (CI->getAssociatedExpression()->getStmtClass() != 8944 SI->getAssociatedExpression()->getStmtClass()) 8945 break; 8946 // Are we dealing with different variables/fields? 8947 if (CI->getAssociatedDeclaration() != 8948 SI->getAssociatedDeclaration()) 8949 break; 8950 } 8951 8952 // Lists contain the same elements. 8953 if (CI == CE && SI == SE) 8954 return false; 8955 8956 // List with less elements is less than list with more elements. 8957 if (CI == CE || SI == SE) 8958 return CI == CE; 8959 8960 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8961 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8962 if (FD1->getParent() == FD2->getParent()) 8963 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8964 const auto It = 8965 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8966 return FD == FD1 || FD == FD2; 8967 }); 8968 return *It == FD1; 8969 }); 8970 } 8971 8972 // Associated with a capture, because the mapping flags depend on it. 8973 // Go through all of the elements with the overlapped elements. 8974 bool IsFirstComponentList = true; 8975 for (const auto &Pair : OverlappedData) { 8976 const MapData &L = *Pair.getFirst(); 8977 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8978 OpenMPMapClauseKind MapType; 8979 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8980 bool IsImplicit; 8981 const ValueDecl *Mapper; 8982 const Expr *VarRef; 8983 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8984 L; 8985 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8986 OverlappedComponents = Pair.getSecond(); 8987 generateInfoForComponentList( 8988 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8989 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8990 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8991 IsFirstComponentList = false; 8992 } 8993 // Go through other elements without overlapped elements. 8994 for (const MapData &L : DeclComponentLists) { 8995 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8996 OpenMPMapClauseKind MapType; 8997 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8998 bool IsImplicit; 8999 const ValueDecl *Mapper; 9000 const Expr *VarRef; 9001 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9002 L; 9003 auto It = OverlappedData.find(&L); 9004 if (It == OverlappedData.end()) 9005 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9006 Components, CombinedInfo, PartialStruct, 9007 IsFirstComponentList, IsImplicit, Mapper, 9008 /*ForDeviceAddr=*/false, VD, VarRef); 9009 IsFirstComponentList = false; 9010 } 9011 } 9012 9013 /// Generate the default map information for a given capture \a CI, 9014 /// record field declaration \a RI and captured value \a CV. 9015 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9016 const FieldDecl &RI, llvm::Value *CV, 9017 MapCombinedInfoTy &CombinedInfo) const { 9018 bool IsImplicit = true; 9019 // Do the default mapping. 9020 if (CI.capturesThis()) { 9021 CombinedInfo.Exprs.push_back(nullptr); 9022 CombinedInfo.BasePointers.push_back(CV); 9023 CombinedInfo.Pointers.push_back(CV); 9024 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9025 CombinedInfo.Sizes.push_back( 9026 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9027 CGF.Int64Ty, /*isSigned=*/true)); 9028 // Default map type. 9029 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9030 } else if (CI.capturesVariableByCopy()) { 9031 const VarDecl *VD = CI.getCapturedVar(); 9032 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9033 CombinedInfo.BasePointers.push_back(CV); 9034 CombinedInfo.Pointers.push_back(CV); 9035 if (!RI.getType()->isAnyPointerType()) { 9036 // We have to signal to the runtime captures passed by value that are 9037 // not pointers. 9038 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9039 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9040 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9041 } else { 9042 // Pointers are implicitly mapped with a zero size and no flags 9043 // (other than first map that is added for all implicit maps). 9044 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9045 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9046 } 9047 auto I = FirstPrivateDecls.find(VD); 9048 if (I != FirstPrivateDecls.end()) 9049 IsImplicit = I->getSecond(); 9050 } else { 9051 assert(CI.capturesVariable() && "Expected captured reference."); 9052 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9053 QualType ElementType = PtrTy->getPointeeType(); 9054 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9055 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9056 // The default map type for a scalar/complex type is 'to' because by 9057 // default the value doesn't have to be retrieved. For an aggregate 9058 // type, the default is 'tofrom'. 9059 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9060 const VarDecl *VD = CI.getCapturedVar(); 9061 auto I = FirstPrivateDecls.find(VD); 9062 if (I != FirstPrivateDecls.end() && 9063 VD->getType().isConstant(CGF.getContext())) { 9064 llvm::Constant *Addr = 9065 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9066 // Copy the value of the original variable to the new global copy. 9067 CGF.Builder.CreateMemCpy( 9068 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9069 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9070 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9071 // Use new global variable as the base pointers. 9072 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9073 CombinedInfo.BasePointers.push_back(Addr); 9074 CombinedInfo.Pointers.push_back(Addr); 9075 } else { 9076 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9077 CombinedInfo.BasePointers.push_back(CV); 9078 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9079 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9080 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9081 AlignmentSource::Decl)); 9082 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9083 } else { 9084 CombinedInfo.Pointers.push_back(CV); 9085 } 9086 } 9087 if (I != FirstPrivateDecls.end()) 9088 IsImplicit = I->getSecond(); 9089 } 9090 // Every default map produces a single argument which is a target parameter. 9091 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9092 9093 // Add flag stating this is an implicit map. 9094 if (IsImplicit) 9095 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9096 9097 // No user-defined mapper for default mapping. 9098 CombinedInfo.Mappers.push_back(nullptr); 9099 } 9100 }; 9101 } // anonymous namespace 9102 9103 static void emitNonContiguousDescriptor( 9104 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9105 CGOpenMPRuntime::TargetDataInfo &Info) { 9106 CodeGenModule &CGM = CGF.CGM; 9107 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9108 &NonContigInfo = CombinedInfo.NonContigInfo; 9109 9110 // Build an array of struct descriptor_dim and then assign it to 9111 // offload_args. 9112 // 9113 // struct descriptor_dim { 9114 // uint64_t offset; 9115 // uint64_t count; 9116 // uint64_t stride 9117 // }; 9118 ASTContext &C = CGF.getContext(); 9119 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9120 RecordDecl *RD; 9121 RD = C.buildImplicitRecord("descriptor_dim"); 9122 RD->startDefinition(); 9123 addFieldToRecordDecl(C, RD, Int64Ty); 9124 addFieldToRecordDecl(C, RD, Int64Ty); 9125 addFieldToRecordDecl(C, RD, Int64Ty); 9126 RD->completeDefinition(); 9127 QualType DimTy = C.getRecordType(RD); 9128 9129 enum { OffsetFD = 0, CountFD, StrideFD }; 9130 // We need two index variable here since the size of "Dims" is the same as the 9131 // size of Components, however, the size of offset, count, and stride is equal 9132 // to the size of base declaration that is non-contiguous. 9133 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9134 // Skip emitting ir if dimension size is 1 since it cannot be 9135 // non-contiguous. 9136 if (NonContigInfo.Dims[I] == 1) 9137 continue; 9138 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9139 QualType ArrayTy = 9140 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9141 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9142 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9143 unsigned RevIdx = EE - II - 1; 9144 LValue DimsLVal = CGF.MakeAddrLValue( 9145 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9146 // Offset 9147 LValue OffsetLVal = CGF.EmitLValueForField( 9148 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9149 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9150 // Count 9151 LValue CountLVal = CGF.EmitLValueForField( 9152 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9153 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9154 // Stride 9155 LValue StrideLVal = CGF.EmitLValueForField( 9156 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9157 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9158 } 9159 // args[I] = &dims 9160 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9161 DimsAddr, CGM.Int8PtrTy); 9162 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9163 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9164 Info.PointersArray, 0, I); 9165 Address PAddr(P, CGF.getPointerAlign()); 9166 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9167 ++L; 9168 } 9169 } 9170 9171 /// Emit a string constant containing the names of the values mapped to the 9172 /// offloading runtime library. 9173 llvm::Constant * 9174 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9175 MappableExprsHandler::MappingExprInfo &MapExprs) { 9176 llvm::Constant *SrcLocStr; 9177 if (!MapExprs.getMapDecl()) { 9178 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9179 } else { 9180 std::string ExprName = ""; 9181 if (MapExprs.getMapExpr()) { 9182 PrintingPolicy P(CGF.getContext().getLangOpts()); 9183 llvm::raw_string_ostream OS(ExprName); 9184 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9185 OS.flush(); 9186 } else { 9187 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9188 } 9189 9190 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9191 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9192 const char *FileName = PLoc.getFilename(); 9193 unsigned Line = PLoc.getLine(); 9194 unsigned Column = PLoc.getColumn(); 9195 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9196 Line, Column); 9197 } 9198 9199 return SrcLocStr; 9200 } 9201 9202 /// Emit the arrays used to pass the captures and map information to the 9203 /// offloading runtime library. If there is no map or capture information, 9204 /// return nullptr by reference. 9205 static void emitOffloadingArrays( 9206 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9207 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9208 bool IsNonContiguous = false) { 9209 CodeGenModule &CGM = CGF.CGM; 9210 ASTContext &Ctx = CGF.getContext(); 9211 9212 // Reset the array information. 9213 Info.clearArrayInfo(); 9214 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9215 9216 if (Info.NumberOfPtrs) { 9217 // Detect if we have any capture size requiring runtime evaluation of the 9218 // size so that a constant array could be eventually used. 9219 bool hasRuntimeEvaluationCaptureSize = false; 9220 for (llvm::Value *S : CombinedInfo.Sizes) 9221 if (!isa<llvm::Constant>(S)) { 9222 hasRuntimeEvaluationCaptureSize = true; 9223 break; 9224 } 9225 9226 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9227 QualType PointerArrayType = Ctx.getConstantArrayType( 9228 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9229 /*IndexTypeQuals=*/0); 9230 9231 Info.BasePointersArray = 9232 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9233 Info.PointersArray = 9234 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9235 Address MappersArray = 9236 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9237 Info.MappersArray = MappersArray.getPointer(); 9238 9239 // If we don't have any VLA types or other types that require runtime 9240 // evaluation, we can use a constant array for the map sizes, otherwise we 9241 // need to fill up the arrays as we do for the pointers. 9242 QualType Int64Ty = 9243 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9244 if (hasRuntimeEvaluationCaptureSize) { 9245 QualType SizeArrayType = Ctx.getConstantArrayType( 9246 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9247 /*IndexTypeQuals=*/0); 9248 Info.SizesArray = 9249 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9250 } else { 9251 // We expect all the sizes to be constant, so we collect them to create 9252 // a constant array. 9253 SmallVector<llvm::Constant *, 16> ConstSizes; 9254 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9255 if (IsNonContiguous && 9256 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9257 ConstSizes.push_back(llvm::ConstantInt::get( 9258 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9259 } else { 9260 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9261 } 9262 } 9263 9264 auto *SizesArrayInit = llvm::ConstantArray::get( 9265 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9266 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9267 auto *SizesArrayGbl = new llvm::GlobalVariable( 9268 CGM.getModule(), SizesArrayInit->getType(), 9269 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9270 SizesArrayInit, Name); 9271 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9272 Info.SizesArray = SizesArrayGbl; 9273 } 9274 9275 // The map types are always constant so we don't need to generate code to 9276 // fill arrays. Instead, we create an array constant. 9277 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9278 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9279 llvm::Constant *MapTypesArrayInit = 9280 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9281 std::string MaptypesName = 9282 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9283 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9284 CGM.getModule(), MapTypesArrayInit->getType(), 9285 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9286 MapTypesArrayInit, MaptypesName); 9287 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9288 Info.MapTypesArray = MapTypesArrayGbl; 9289 9290 // The information types are only built if there is debug information 9291 // requested. 9292 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9293 Info.MapNamesArray = llvm::Constant::getNullValue( 9294 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9295 } else { 9296 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9297 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9298 }; 9299 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9300 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9301 9302 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9303 llvm::ArrayType::get( 9304 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9305 CombinedInfo.Exprs.size()), 9306 InfoMap); 9307 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9308 CGM.getModule(), MapNamesArrayInit->getType(), 9309 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9310 MapNamesArrayInit, 9311 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9312 Info.MapNamesArray = MapNamesArrayGbl; 9313 } 9314 9315 // If there's a present map type modifier, it must not be applied to the end 9316 // of a region, so generate a separate map type array in that case. 9317 if (Info.separateBeginEndCalls()) { 9318 bool EndMapTypesDiffer = false; 9319 for (uint64_t &Type : Mapping) { 9320 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9321 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9322 EndMapTypesDiffer = true; 9323 } 9324 } 9325 if (EndMapTypesDiffer) { 9326 MapTypesArrayInit = 9327 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9328 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9329 MapTypesArrayGbl = new llvm::GlobalVariable( 9330 CGM.getModule(), MapTypesArrayInit->getType(), 9331 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9332 MapTypesArrayInit, MaptypesName); 9333 MapTypesArrayGbl->setUnnamedAddr( 9334 llvm::GlobalValue::UnnamedAddr::Global); 9335 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9336 } 9337 } 9338 9339 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9340 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9341 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9342 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9343 Info.BasePointersArray, 0, I); 9344 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9345 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9346 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9347 CGF.Builder.CreateStore(BPVal, BPAddr); 9348 9349 if (Info.requiresDevicePointerInfo()) 9350 if (const ValueDecl *DevVD = 9351 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9352 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9353 9354 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9355 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9356 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9357 Info.PointersArray, 0, I); 9358 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9359 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9360 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9361 CGF.Builder.CreateStore(PVal, PAddr); 9362 9363 if (hasRuntimeEvaluationCaptureSize) { 9364 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9365 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9366 Info.SizesArray, 9367 /*Idx0=*/0, 9368 /*Idx1=*/I); 9369 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9370 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9371 CGM.Int64Ty, 9372 /*isSigned=*/true), 9373 SAddr); 9374 } 9375 9376 // Fill up the mapper array. 9377 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9378 if (CombinedInfo.Mappers[I]) { 9379 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9380 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9381 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9382 Info.HasMapper = true; 9383 } 9384 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9385 CGF.Builder.CreateStore(MFunc, MAddr); 9386 } 9387 } 9388 9389 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9390 Info.NumberOfPtrs == 0) 9391 return; 9392 9393 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9394 } 9395 9396 namespace { 9397 /// Additional arguments for emitOffloadingArraysArgument function. 9398 struct ArgumentsOptions { 9399 bool ForEndCall = false; 9400 ArgumentsOptions() = default; 9401 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9402 }; 9403 } // namespace 9404 9405 /// Emit the arguments to be passed to the runtime library based on the 9406 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9407 /// ForEndCall, emit map types to be passed for the end of the region instead of 9408 /// the beginning. 9409 static void emitOffloadingArraysArgument( 9410 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9411 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9412 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9413 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9414 const ArgumentsOptions &Options = ArgumentsOptions()) { 9415 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9416 "expected region end call to runtime only when end call is separate"); 9417 CodeGenModule &CGM = CGF.CGM; 9418 if (Info.NumberOfPtrs) { 9419 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9420 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9421 Info.BasePointersArray, 9422 /*Idx0=*/0, /*Idx1=*/0); 9423 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9424 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9425 Info.PointersArray, 9426 /*Idx0=*/0, 9427 /*Idx1=*/0); 9428 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9429 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9430 /*Idx0=*/0, /*Idx1=*/0); 9431 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9432 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9433 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9434 : Info.MapTypesArray, 9435 /*Idx0=*/0, 9436 /*Idx1=*/0); 9437 9438 // Only emit the mapper information arrays if debug information is 9439 // requested. 9440 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9441 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9442 else 9443 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9444 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9445 Info.MapNamesArray, 9446 /*Idx0=*/0, 9447 /*Idx1=*/0); 9448 // If there is no user-defined mapper, set the mapper array to nullptr to 9449 // avoid an unnecessary data privatization 9450 if (!Info.HasMapper) 9451 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9452 else 9453 MappersArrayArg = 9454 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9455 } else { 9456 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9457 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9458 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9459 MapTypesArrayArg = 9460 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9461 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9462 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9463 } 9464 } 9465 9466 /// Check for inner distribute directive. 9467 static const OMPExecutableDirective * 9468 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9469 const auto *CS = D.getInnermostCapturedStmt(); 9470 const auto *Body = 9471 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9472 const Stmt *ChildStmt = 9473 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9474 9475 if (const auto *NestedDir = 9476 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9477 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9478 switch (D.getDirectiveKind()) { 9479 case OMPD_target: 9480 if (isOpenMPDistributeDirective(DKind)) 9481 return NestedDir; 9482 if (DKind == OMPD_teams) { 9483 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9484 /*IgnoreCaptured=*/true); 9485 if (!Body) 9486 return nullptr; 9487 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9488 if (const auto *NND = 9489 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9490 DKind = NND->getDirectiveKind(); 9491 if (isOpenMPDistributeDirective(DKind)) 9492 return NND; 9493 } 9494 } 9495 return nullptr; 9496 case OMPD_target_teams: 9497 if (isOpenMPDistributeDirective(DKind)) 9498 return NestedDir; 9499 return nullptr; 9500 case OMPD_target_parallel: 9501 case OMPD_target_simd: 9502 case OMPD_target_parallel_for: 9503 case OMPD_target_parallel_for_simd: 9504 return nullptr; 9505 case OMPD_target_teams_distribute: 9506 case OMPD_target_teams_distribute_simd: 9507 case OMPD_target_teams_distribute_parallel_for: 9508 case OMPD_target_teams_distribute_parallel_for_simd: 9509 case OMPD_parallel: 9510 case OMPD_for: 9511 case OMPD_parallel_for: 9512 case OMPD_parallel_master: 9513 case OMPD_parallel_sections: 9514 case OMPD_for_simd: 9515 case OMPD_parallel_for_simd: 9516 case OMPD_cancel: 9517 case OMPD_cancellation_point: 9518 case OMPD_ordered: 9519 case OMPD_threadprivate: 9520 case OMPD_allocate: 9521 case OMPD_task: 9522 case OMPD_simd: 9523 case OMPD_tile: 9524 case OMPD_sections: 9525 case OMPD_section: 9526 case OMPD_single: 9527 case OMPD_master: 9528 case OMPD_critical: 9529 case OMPD_taskyield: 9530 case OMPD_barrier: 9531 case OMPD_taskwait: 9532 case OMPD_taskgroup: 9533 case OMPD_atomic: 9534 case OMPD_flush: 9535 case OMPD_depobj: 9536 case OMPD_scan: 9537 case OMPD_teams: 9538 case OMPD_target_data: 9539 case OMPD_target_exit_data: 9540 case OMPD_target_enter_data: 9541 case OMPD_distribute: 9542 case OMPD_distribute_simd: 9543 case OMPD_distribute_parallel_for: 9544 case OMPD_distribute_parallel_for_simd: 9545 case OMPD_teams_distribute: 9546 case OMPD_teams_distribute_simd: 9547 case OMPD_teams_distribute_parallel_for: 9548 case OMPD_teams_distribute_parallel_for_simd: 9549 case OMPD_target_update: 9550 case OMPD_declare_simd: 9551 case OMPD_declare_variant: 9552 case OMPD_begin_declare_variant: 9553 case OMPD_end_declare_variant: 9554 case OMPD_declare_target: 9555 case OMPD_end_declare_target: 9556 case OMPD_declare_reduction: 9557 case OMPD_declare_mapper: 9558 case OMPD_taskloop: 9559 case OMPD_taskloop_simd: 9560 case OMPD_master_taskloop: 9561 case OMPD_master_taskloop_simd: 9562 case OMPD_parallel_master_taskloop: 9563 case OMPD_parallel_master_taskloop_simd: 9564 case OMPD_requires: 9565 case OMPD_unknown: 9566 default: 9567 llvm_unreachable("Unexpected directive."); 9568 } 9569 } 9570 9571 return nullptr; 9572 } 9573 9574 /// Emit the user-defined mapper function. The code generation follows the 9575 /// pattern in the example below. 9576 /// \code 9577 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9578 /// void *base, void *begin, 9579 /// int64_t size, int64_t type, 9580 /// void *name = nullptr) { 9581 /// // Allocate space for an array section first or add a base/begin for 9582 /// // pointer dereference. 9583 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9584 /// !maptype.IsDelete) 9585 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9586 /// size*sizeof(Ty), clearToFromMember(type)); 9587 /// // Map members. 9588 /// for (unsigned i = 0; i < size; i++) { 9589 /// // For each component specified by this mapper: 9590 /// for (auto c : begin[i]->all_components) { 9591 /// if (c.hasMapper()) 9592 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9593 /// c.arg_type, c.arg_name); 9594 /// else 9595 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9596 /// c.arg_begin, c.arg_size, c.arg_type, 9597 /// c.arg_name); 9598 /// } 9599 /// } 9600 /// // Delete the array section. 9601 /// if (size > 1 && maptype.IsDelete) 9602 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9603 /// size*sizeof(Ty), clearToFromMember(type)); 9604 /// } 9605 /// \endcode 9606 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9607 CodeGenFunction *CGF) { 9608 if (UDMMap.count(D) > 0) 9609 return; 9610 ASTContext &C = CGM.getContext(); 9611 QualType Ty = D->getType(); 9612 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9613 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9614 auto *MapperVarDecl = 9615 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9616 SourceLocation Loc = D->getLocation(); 9617 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9618 9619 // Prepare mapper function arguments and attributes. 9620 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9621 C.VoidPtrTy, ImplicitParamDecl::Other); 9622 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9623 ImplicitParamDecl::Other); 9624 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9625 C.VoidPtrTy, ImplicitParamDecl::Other); 9626 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9627 ImplicitParamDecl::Other); 9628 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9629 ImplicitParamDecl::Other); 9630 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9631 ImplicitParamDecl::Other); 9632 FunctionArgList Args; 9633 Args.push_back(&HandleArg); 9634 Args.push_back(&BaseArg); 9635 Args.push_back(&BeginArg); 9636 Args.push_back(&SizeArg); 9637 Args.push_back(&TypeArg); 9638 Args.push_back(&NameArg); 9639 const CGFunctionInfo &FnInfo = 9640 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9641 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9642 SmallString<64> TyStr; 9643 llvm::raw_svector_ostream Out(TyStr); 9644 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9645 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9646 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9647 Name, &CGM.getModule()); 9648 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9649 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9650 // Start the mapper function code generation. 9651 CodeGenFunction MapperCGF(CGM); 9652 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9653 // Compute the starting and end addresses of array elements. 9654 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9655 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9656 C.getPointerType(Int64Ty), Loc); 9657 // Prepare common arguments for array initiation and deletion. 9658 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9659 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9660 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9661 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9662 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9663 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9664 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9665 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9666 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9667 // Convert the size in bytes into the number of array elements. 9668 Size = MapperCGF.Builder.CreateExactUDiv( 9669 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9670 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9671 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9672 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9673 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9674 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9675 C.getPointerType(Int64Ty), Loc); 9676 9677 // Emit array initiation if this is an array section and \p MapType indicates 9678 // that memory allocation is required. 9679 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9680 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9681 ElementSize, HeadBB, /*IsInit=*/true); 9682 9683 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9684 9685 // Emit the loop header block. 9686 MapperCGF.EmitBlock(HeadBB); 9687 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9688 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9689 // Evaluate whether the initial condition is satisfied. 9690 llvm::Value *IsEmpty = 9691 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9692 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9693 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9694 9695 // Emit the loop body block. 9696 MapperCGF.EmitBlock(BodyBB); 9697 llvm::BasicBlock *LastBB = BodyBB; 9698 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9699 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9700 PtrPHI->addIncoming(PtrBegin, EntryBB); 9701 Address PtrCurrent = 9702 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9703 .getAlignment() 9704 .alignmentOfArrayElement(ElementSize)); 9705 // Privatize the declared variable of mapper to be the current array element. 9706 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9707 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9708 (void)Scope.Privatize(); 9709 9710 // Get map clause information. Fill up the arrays with all mapped variables. 9711 MappableExprsHandler::MapCombinedInfoTy Info; 9712 MappableExprsHandler MEHandler(*D, MapperCGF); 9713 MEHandler.generateAllInfoForMapper(Info); 9714 9715 // Call the runtime API __tgt_mapper_num_components to get the number of 9716 // pre-existing components. 9717 llvm::Value *OffloadingArgs[] = {Handle}; 9718 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9719 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9720 OMPRTL___tgt_mapper_num_components), 9721 OffloadingArgs); 9722 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9723 PreviousSize, 9724 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9725 9726 // Fill up the runtime mapper handle for all components. 9727 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9728 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9729 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9730 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9731 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9732 llvm::Value *CurSizeArg = Info.Sizes[I]; 9733 llvm::Value *CurNameArg = 9734 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9735 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9736 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9737 9738 // Extract the MEMBER_OF field from the map type. 9739 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9740 llvm::Value *MemberMapType = 9741 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9742 9743 // Combine the map type inherited from user-defined mapper with that 9744 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9745 // bits of the \a MapType, which is the input argument of the mapper 9746 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9747 // bits of MemberMapType. 9748 // [OpenMP 5.0], 1.2.6. map-type decay. 9749 // | alloc | to | from | tofrom | release | delete 9750 // ---------------------------------------------------------- 9751 // alloc | alloc | alloc | alloc | alloc | release | delete 9752 // to | alloc | to | alloc | to | release | delete 9753 // from | alloc | alloc | from | from | release | delete 9754 // tofrom | alloc | to | from | tofrom | release | delete 9755 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9756 MapType, 9757 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9758 MappableExprsHandler::OMP_MAP_FROM)); 9759 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9760 llvm::BasicBlock *AllocElseBB = 9761 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9762 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9763 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9764 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9765 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9766 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9767 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9768 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9769 MapperCGF.EmitBlock(AllocBB); 9770 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9771 MemberMapType, 9772 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9773 MappableExprsHandler::OMP_MAP_FROM))); 9774 MapperCGF.Builder.CreateBr(EndBB); 9775 MapperCGF.EmitBlock(AllocElseBB); 9776 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9777 LeftToFrom, 9778 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9779 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9780 // In case of to, clear OMP_MAP_FROM. 9781 MapperCGF.EmitBlock(ToBB); 9782 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9783 MemberMapType, 9784 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9785 MapperCGF.Builder.CreateBr(EndBB); 9786 MapperCGF.EmitBlock(ToElseBB); 9787 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9788 LeftToFrom, 9789 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9790 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9791 // In case of from, clear OMP_MAP_TO. 9792 MapperCGF.EmitBlock(FromBB); 9793 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9794 MemberMapType, 9795 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9796 // In case of tofrom, do nothing. 9797 MapperCGF.EmitBlock(EndBB); 9798 LastBB = EndBB; 9799 llvm::PHINode *CurMapType = 9800 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9801 CurMapType->addIncoming(AllocMapType, AllocBB); 9802 CurMapType->addIncoming(ToMapType, ToBB); 9803 CurMapType->addIncoming(FromMapType, FromBB); 9804 CurMapType->addIncoming(MemberMapType, ToElseBB); 9805 9806 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9807 CurSizeArg, CurMapType, CurNameArg}; 9808 if (Info.Mappers[I]) { 9809 // Call the corresponding mapper function. 9810 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9811 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9812 assert(MapperFunc && "Expect a valid mapper function is available."); 9813 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9814 } else { 9815 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9816 // data structure. 9817 MapperCGF.EmitRuntimeCall( 9818 OMPBuilder.getOrCreateRuntimeFunction( 9819 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9820 OffloadingArgs); 9821 } 9822 } 9823 9824 // Update the pointer to point to the next element that needs to be mapped, 9825 // and check whether we have mapped all elements. 9826 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9827 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9828 PtrPHI->addIncoming(PtrNext, LastBB); 9829 llvm::Value *IsDone = 9830 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9831 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9832 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9833 9834 MapperCGF.EmitBlock(ExitBB); 9835 // Emit array deletion if this is an array section and \p MapType indicates 9836 // that deletion is required. 9837 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9838 ElementSize, DoneBB, /*IsInit=*/false); 9839 9840 // Emit the function exit block. 9841 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9842 MapperCGF.FinishFunction(); 9843 UDMMap.try_emplace(D, Fn); 9844 if (CGF) { 9845 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9846 Decls.second.push_back(D); 9847 } 9848 } 9849 9850 /// Emit the array initialization or deletion portion for user-defined mapper 9851 /// code generation. First, it evaluates whether an array section is mapped and 9852 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9853 /// true, and \a MapType indicates to not delete this array, array 9854 /// initialization code is generated. If \a IsInit is false, and \a MapType 9855 /// indicates to not this array, array deletion code is generated. 9856 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9857 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9858 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9859 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9860 StringRef Prefix = IsInit ? ".init" : ".del"; 9861 9862 // Evaluate if this is an array section. 9863 llvm::BasicBlock *BodyBB = 9864 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9865 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9866 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9867 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9868 MapType, 9869 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9870 llvm::Value *DeleteCond; 9871 llvm::Value *Cond; 9872 if (IsInit) { 9873 // base != begin? 9874 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9875 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9876 // IsPtrAndObj? 9877 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9878 MapType, 9879 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9880 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9881 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9882 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9883 DeleteCond = MapperCGF.Builder.CreateIsNull( 9884 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9885 } else { 9886 Cond = IsArray; 9887 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9888 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9889 } 9890 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9891 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9892 9893 MapperCGF.EmitBlock(BodyBB); 9894 // Get the array size by multiplying element size and element number (i.e., \p 9895 // Size). 9896 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9897 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9898 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9899 // memory allocation/deletion purpose only. 9900 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9901 MapType, 9902 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9903 MappableExprsHandler::OMP_MAP_FROM | 9904 MappableExprsHandler::OMP_MAP_MEMBER_OF))); 9905 llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9906 9907 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9908 // data structure. 9909 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9910 ArraySize, MapTypeArg, MapNameArg}; 9911 MapperCGF.EmitRuntimeCall( 9912 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9913 OMPRTL___tgt_push_mapper_component), 9914 OffloadingArgs); 9915 } 9916 9917 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9918 const OMPDeclareMapperDecl *D) { 9919 auto I = UDMMap.find(D); 9920 if (I != UDMMap.end()) 9921 return I->second; 9922 emitUserDefinedMapper(D); 9923 return UDMMap.lookup(D); 9924 } 9925 9926 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9927 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9928 llvm::Value *DeviceID, 9929 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9930 const OMPLoopDirective &D)> 9931 SizeEmitter) { 9932 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9933 const OMPExecutableDirective *TD = &D; 9934 // Get nested teams distribute kind directive, if any. 9935 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9936 TD = getNestedDistributeDirective(CGM.getContext(), D); 9937 if (!TD) 9938 return; 9939 const auto *LD = cast<OMPLoopDirective>(TD); 9940 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 9941 PrePostActionTy &) { 9942 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9943 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9944 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 9945 CGF.EmitRuntimeCall( 9946 OMPBuilder.getOrCreateRuntimeFunction( 9947 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9948 Args); 9949 } 9950 }; 9951 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9952 } 9953 9954 void CGOpenMPRuntime::emitTargetCall( 9955 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9956 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9957 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9958 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9959 const OMPLoopDirective &D)> 9960 SizeEmitter) { 9961 if (!CGF.HaveInsertPoint()) 9962 return; 9963 9964 assert(OutlinedFn && "Invalid outlined function!"); 9965 9966 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9967 D.hasClausesOfKind<OMPNowaitClause>(); 9968 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9969 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9970 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9971 PrePostActionTy &) { 9972 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9973 }; 9974 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9975 9976 CodeGenFunction::OMPTargetDataInfo InputInfo; 9977 llvm::Value *MapTypesArray = nullptr; 9978 llvm::Value *MapNamesArray = nullptr; 9979 // Fill up the pointer arrays and transfer execution to the device. 9980 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9981 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 9982 &CapturedVars, 9983 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9984 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9985 // Reverse offloading is not supported, so just execute on the host. 9986 if (RequiresOuterTask) { 9987 CapturedVars.clear(); 9988 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9989 } 9990 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9991 return; 9992 } 9993 9994 // On top of the arrays that were filled up, the target offloading call 9995 // takes as arguments the device id as well as the host pointer. The host 9996 // pointer is used by the runtime library to identify the current target 9997 // region, so it only has to be unique and not necessarily point to 9998 // anything. It could be the pointer to the outlined function that 9999 // implements the target region, but we aren't using that so that the 10000 // compiler doesn't need to keep that, and could therefore inline the host 10001 // function if proven worthwhile during optimization. 10002 10003 // From this point on, we need to have an ID of the target region defined. 10004 assert(OutlinedFnID && "Invalid outlined function ID!"); 10005 10006 // Emit device ID if any. 10007 llvm::Value *DeviceID; 10008 if (Device.getPointer()) { 10009 assert((Device.getInt() == OMPC_DEVICE_unknown || 10010 Device.getInt() == OMPC_DEVICE_device_num) && 10011 "Expected device_num modifier."); 10012 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10013 DeviceID = 10014 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10015 } else { 10016 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10017 } 10018 10019 // Emit the number of elements in the offloading arrays. 10020 llvm::Value *PointerNum = 10021 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10022 10023 // Return value of the runtime offloading call. 10024 llvm::Value *Return; 10025 10026 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10027 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10028 10029 // Source location for the ident struct 10030 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10031 10032 // Emit tripcount for the target loop-based directive. 10033 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10034 10035 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10036 // The target region is an outlined function launched by the runtime 10037 // via calls __tgt_target() or __tgt_target_teams(). 10038 // 10039 // __tgt_target() launches a target region with one team and one thread, 10040 // executing a serial region. This master thread may in turn launch 10041 // more threads within its team upon encountering a parallel region, 10042 // however, no additional teams can be launched on the device. 10043 // 10044 // __tgt_target_teams() launches a target region with one or more teams, 10045 // each with one or more threads. This call is required for target 10046 // constructs such as: 10047 // 'target teams' 10048 // 'target' / 'teams' 10049 // 'target teams distribute parallel for' 10050 // 'target parallel' 10051 // and so on. 10052 // 10053 // Note that on the host and CPU targets, the runtime implementation of 10054 // these calls simply call the outlined function without forking threads. 10055 // The outlined functions themselves have runtime calls to 10056 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10057 // the compiler in emitTeamsCall() and emitParallelCall(). 10058 // 10059 // In contrast, on the NVPTX target, the implementation of 10060 // __tgt_target_teams() launches a GPU kernel with the requested number 10061 // of teams and threads so no additional calls to the runtime are required. 10062 if (NumTeams) { 10063 // If we have NumTeams defined this means that we have an enclosed teams 10064 // region. Therefore we also expect to have NumThreads defined. These two 10065 // values should be defined in the presence of a teams directive, 10066 // regardless of having any clauses associated. If the user is using teams 10067 // but no clauses, these two values will be the default that should be 10068 // passed to the runtime library - a 32-bit integer with the value zero. 10069 assert(NumThreads && "Thread limit expression should be available along " 10070 "with number of teams."); 10071 llvm::Value *OffloadingArgs[] = {RTLoc, 10072 DeviceID, 10073 OutlinedFnID, 10074 PointerNum, 10075 InputInfo.BasePointersArray.getPointer(), 10076 InputInfo.PointersArray.getPointer(), 10077 InputInfo.SizesArray.getPointer(), 10078 MapTypesArray, 10079 MapNamesArray, 10080 InputInfo.MappersArray.getPointer(), 10081 NumTeams, 10082 NumThreads}; 10083 Return = CGF.EmitRuntimeCall( 10084 OMPBuilder.getOrCreateRuntimeFunction( 10085 CGM.getModule(), HasNowait 10086 ? OMPRTL___tgt_target_teams_nowait_mapper 10087 : OMPRTL___tgt_target_teams_mapper), 10088 OffloadingArgs); 10089 } else { 10090 llvm::Value *OffloadingArgs[] = {RTLoc, 10091 DeviceID, 10092 OutlinedFnID, 10093 PointerNum, 10094 InputInfo.BasePointersArray.getPointer(), 10095 InputInfo.PointersArray.getPointer(), 10096 InputInfo.SizesArray.getPointer(), 10097 MapTypesArray, 10098 MapNamesArray, 10099 InputInfo.MappersArray.getPointer()}; 10100 Return = CGF.EmitRuntimeCall( 10101 OMPBuilder.getOrCreateRuntimeFunction( 10102 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10103 : OMPRTL___tgt_target_mapper), 10104 OffloadingArgs); 10105 } 10106 10107 // Check the error code and execute the host version if required. 10108 llvm::BasicBlock *OffloadFailedBlock = 10109 CGF.createBasicBlock("omp_offload.failed"); 10110 llvm::BasicBlock *OffloadContBlock = 10111 CGF.createBasicBlock("omp_offload.cont"); 10112 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10113 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10114 10115 CGF.EmitBlock(OffloadFailedBlock); 10116 if (RequiresOuterTask) { 10117 CapturedVars.clear(); 10118 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10119 } 10120 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10121 CGF.EmitBranch(OffloadContBlock); 10122 10123 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10124 }; 10125 10126 // Notify that the host version must be executed. 10127 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10128 RequiresOuterTask](CodeGenFunction &CGF, 10129 PrePostActionTy &) { 10130 if (RequiresOuterTask) { 10131 CapturedVars.clear(); 10132 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10133 } 10134 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10135 }; 10136 10137 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10138 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10139 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10140 // Fill up the arrays with all the captured variables. 10141 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10142 10143 // Get mappable expression information. 10144 MappableExprsHandler MEHandler(D, CGF); 10145 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10146 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10147 10148 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10149 auto *CV = CapturedVars.begin(); 10150 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10151 CE = CS.capture_end(); 10152 CI != CE; ++CI, ++RI, ++CV) { 10153 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10154 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10155 10156 // VLA sizes are passed to the outlined region by copy and do not have map 10157 // information associated. 10158 if (CI->capturesVariableArrayType()) { 10159 CurInfo.Exprs.push_back(nullptr); 10160 CurInfo.BasePointers.push_back(*CV); 10161 CurInfo.Pointers.push_back(*CV); 10162 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10163 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10164 // Copy to the device as an argument. No need to retrieve it. 10165 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10166 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10167 MappableExprsHandler::OMP_MAP_IMPLICIT); 10168 CurInfo.Mappers.push_back(nullptr); 10169 } else { 10170 // If we have any information in the map clause, we use it, otherwise we 10171 // just do a default mapping. 10172 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10173 if (!CI->capturesThis()) 10174 MappedVarSet.insert(CI->getCapturedVar()); 10175 else 10176 MappedVarSet.insert(nullptr); 10177 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10178 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10179 // Generate correct mapping for variables captured by reference in 10180 // lambdas. 10181 if (CI->capturesVariable()) 10182 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10183 CurInfo, LambdaPointers); 10184 } 10185 // We expect to have at least an element of information for this capture. 10186 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10187 "Non-existing map pointer for capture!"); 10188 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10189 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10190 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10191 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10192 "Inconsistent map information sizes!"); 10193 10194 // If there is an entry in PartialStruct it means we have a struct with 10195 // individual members mapped. Emit an extra combined entry. 10196 if (PartialStruct.Base.isValid()) { 10197 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10198 MEHandler.emitCombinedEntry( 10199 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10200 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10201 } 10202 10203 // We need to append the results of this capture to what we already have. 10204 CombinedInfo.append(CurInfo); 10205 } 10206 // Adjust MEMBER_OF flags for the lambdas captures. 10207 MEHandler.adjustMemberOfForLambdaCaptures( 10208 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10209 CombinedInfo.Types); 10210 // Map any list items in a map clause that were not captures because they 10211 // weren't referenced within the construct. 10212 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10213 10214 TargetDataInfo Info; 10215 // Fill up the arrays and create the arguments. 10216 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10217 emitOffloadingArraysArgument( 10218 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10219 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10220 {/*ForEndTask=*/false}); 10221 10222 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10223 InputInfo.BasePointersArray = 10224 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10225 InputInfo.PointersArray = 10226 Address(Info.PointersArray, CGM.getPointerAlign()); 10227 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10228 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10229 MapTypesArray = Info.MapTypesArray; 10230 MapNamesArray = Info.MapNamesArray; 10231 if (RequiresOuterTask) 10232 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10233 else 10234 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10235 }; 10236 10237 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10238 CodeGenFunction &CGF, PrePostActionTy &) { 10239 if (RequiresOuterTask) { 10240 CodeGenFunction::OMPTargetDataInfo InputInfo; 10241 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10242 } else { 10243 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10244 } 10245 }; 10246 10247 // If we have a target function ID it means that we need to support 10248 // offloading, otherwise, just execute on the host. We need to execute on host 10249 // regardless of the conditional in the if clause if, e.g., the user do not 10250 // specify target triples. 10251 if (OutlinedFnID) { 10252 if (IfCond) { 10253 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10254 } else { 10255 RegionCodeGenTy ThenRCG(TargetThenGen); 10256 ThenRCG(CGF); 10257 } 10258 } else { 10259 RegionCodeGenTy ElseRCG(TargetElseGen); 10260 ElseRCG(CGF); 10261 } 10262 } 10263 10264 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10265 StringRef ParentName) { 10266 if (!S) 10267 return; 10268 10269 // Codegen OMP target directives that offload compute to the device. 10270 bool RequiresDeviceCodegen = 10271 isa<OMPExecutableDirective>(S) && 10272 isOpenMPTargetExecutionDirective( 10273 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10274 10275 if (RequiresDeviceCodegen) { 10276 const auto &E = *cast<OMPExecutableDirective>(S); 10277 unsigned DeviceID; 10278 unsigned FileID; 10279 unsigned Line; 10280 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10281 FileID, Line); 10282 10283 // Is this a target region that should not be emitted as an entry point? If 10284 // so just signal we are done with this target region. 10285 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10286 ParentName, Line)) 10287 return; 10288 10289 switch (E.getDirectiveKind()) { 10290 case OMPD_target: 10291 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10292 cast<OMPTargetDirective>(E)); 10293 break; 10294 case OMPD_target_parallel: 10295 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10296 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10297 break; 10298 case OMPD_target_teams: 10299 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10300 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10301 break; 10302 case OMPD_target_teams_distribute: 10303 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10304 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10305 break; 10306 case OMPD_target_teams_distribute_simd: 10307 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10308 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10309 break; 10310 case OMPD_target_parallel_for: 10311 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10312 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10313 break; 10314 case OMPD_target_parallel_for_simd: 10315 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10316 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10317 break; 10318 case OMPD_target_simd: 10319 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10320 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10321 break; 10322 case OMPD_target_teams_distribute_parallel_for: 10323 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10324 CGM, ParentName, 10325 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10326 break; 10327 case OMPD_target_teams_distribute_parallel_for_simd: 10328 CodeGenFunction:: 10329 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10330 CGM, ParentName, 10331 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10332 break; 10333 case OMPD_parallel: 10334 case OMPD_for: 10335 case OMPD_parallel_for: 10336 case OMPD_parallel_master: 10337 case OMPD_parallel_sections: 10338 case OMPD_for_simd: 10339 case OMPD_parallel_for_simd: 10340 case OMPD_cancel: 10341 case OMPD_cancellation_point: 10342 case OMPD_ordered: 10343 case OMPD_threadprivate: 10344 case OMPD_allocate: 10345 case OMPD_task: 10346 case OMPD_simd: 10347 case OMPD_tile: 10348 case OMPD_sections: 10349 case OMPD_section: 10350 case OMPD_single: 10351 case OMPD_master: 10352 case OMPD_critical: 10353 case OMPD_taskyield: 10354 case OMPD_barrier: 10355 case OMPD_taskwait: 10356 case OMPD_taskgroup: 10357 case OMPD_atomic: 10358 case OMPD_flush: 10359 case OMPD_depobj: 10360 case OMPD_scan: 10361 case OMPD_teams: 10362 case OMPD_target_data: 10363 case OMPD_target_exit_data: 10364 case OMPD_target_enter_data: 10365 case OMPD_distribute: 10366 case OMPD_distribute_simd: 10367 case OMPD_distribute_parallel_for: 10368 case OMPD_distribute_parallel_for_simd: 10369 case OMPD_teams_distribute: 10370 case OMPD_teams_distribute_simd: 10371 case OMPD_teams_distribute_parallel_for: 10372 case OMPD_teams_distribute_parallel_for_simd: 10373 case OMPD_target_update: 10374 case OMPD_declare_simd: 10375 case OMPD_declare_variant: 10376 case OMPD_begin_declare_variant: 10377 case OMPD_end_declare_variant: 10378 case OMPD_declare_target: 10379 case OMPD_end_declare_target: 10380 case OMPD_declare_reduction: 10381 case OMPD_declare_mapper: 10382 case OMPD_taskloop: 10383 case OMPD_taskloop_simd: 10384 case OMPD_master_taskloop: 10385 case OMPD_master_taskloop_simd: 10386 case OMPD_parallel_master_taskloop: 10387 case OMPD_parallel_master_taskloop_simd: 10388 case OMPD_requires: 10389 case OMPD_unknown: 10390 default: 10391 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10392 } 10393 return; 10394 } 10395 10396 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10397 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10398 return; 10399 10400 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10401 return; 10402 } 10403 10404 // If this is a lambda function, look into its body. 10405 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10406 S = L->getBody(); 10407 10408 // Keep looking for target regions recursively. 10409 for (const Stmt *II : S->children()) 10410 scanForTargetRegionsFunctions(II, ParentName); 10411 } 10412 10413 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10414 // If emitting code for the host, we do not process FD here. Instead we do 10415 // the normal code generation. 10416 if (!CGM.getLangOpts().OpenMPIsDevice) { 10417 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10418 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10419 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10420 // Do not emit device_type(nohost) functions for the host. 10421 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10422 return true; 10423 } 10424 return false; 10425 } 10426 10427 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10428 // Try to detect target regions in the function. 10429 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10430 StringRef Name = CGM.getMangledName(GD); 10431 scanForTargetRegionsFunctions(FD->getBody(), Name); 10432 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10433 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10434 // Do not emit device_type(nohost) functions for the host. 10435 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10436 return true; 10437 } 10438 10439 // Do not to emit function if it is not marked as declare target. 10440 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10441 AlreadyEmittedTargetDecls.count(VD) == 0; 10442 } 10443 10444 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10445 if (!CGM.getLangOpts().OpenMPIsDevice) 10446 return false; 10447 10448 // Check if there are Ctors/Dtors in this declaration and look for target 10449 // regions in it. We use the complete variant to produce the kernel name 10450 // mangling. 10451 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10452 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10453 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10454 StringRef ParentName = 10455 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10456 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10457 } 10458 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10459 StringRef ParentName = 10460 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10461 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10462 } 10463 } 10464 10465 // Do not to emit variable if it is not marked as declare target. 10466 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10467 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10468 cast<VarDecl>(GD.getDecl())); 10469 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10470 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10471 HasRequiresUnifiedSharedMemory)) { 10472 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10473 return true; 10474 } 10475 return false; 10476 } 10477 10478 llvm::Constant * 10479 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10480 const VarDecl *VD) { 10481 assert(VD->getType().isConstant(CGM.getContext()) && 10482 "Expected constant variable."); 10483 StringRef VarName; 10484 llvm::Constant *Addr; 10485 llvm::GlobalValue::LinkageTypes Linkage; 10486 QualType Ty = VD->getType(); 10487 SmallString<128> Buffer; 10488 { 10489 unsigned DeviceID; 10490 unsigned FileID; 10491 unsigned Line; 10492 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10493 FileID, Line); 10494 llvm::raw_svector_ostream OS(Buffer); 10495 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10496 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10497 VarName = OS.str(); 10498 } 10499 Linkage = llvm::GlobalValue::InternalLinkage; 10500 Addr = 10501 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10502 getDefaultFirstprivateAddressSpace()); 10503 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10504 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10505 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10506 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10507 VarName, Addr, VarSize, 10508 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10509 return Addr; 10510 } 10511 10512 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10513 llvm::Constant *Addr) { 10514 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10515 !CGM.getLangOpts().OpenMPIsDevice) 10516 return; 10517 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10519 if (!Res) { 10520 if (CGM.getLangOpts().OpenMPIsDevice) { 10521 // Register non-target variables being emitted in device code (debug info 10522 // may cause this). 10523 StringRef VarName = CGM.getMangledName(VD); 10524 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10525 } 10526 return; 10527 } 10528 // Register declare target variables. 10529 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10530 StringRef VarName; 10531 CharUnits VarSize; 10532 llvm::GlobalValue::LinkageTypes Linkage; 10533 10534 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10535 !HasRequiresUnifiedSharedMemory) { 10536 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10537 VarName = CGM.getMangledName(VD); 10538 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10539 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10540 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10541 } else { 10542 VarSize = CharUnits::Zero(); 10543 } 10544 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10545 // Temp solution to prevent optimizations of the internal variables. 10546 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10547 std::string RefName = getName({VarName, "ref"}); 10548 if (!CGM.GetGlobalValue(RefName)) { 10549 llvm::Constant *AddrRef = 10550 getOrCreateInternalVariable(Addr->getType(), RefName); 10551 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10552 GVAddrRef->setConstant(/*Val=*/true); 10553 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10554 GVAddrRef->setInitializer(Addr); 10555 CGM.addCompilerUsedGlobal(GVAddrRef); 10556 } 10557 } 10558 } else { 10559 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10560 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10561 HasRequiresUnifiedSharedMemory)) && 10562 "Declare target attribute must link or to with unified memory."); 10563 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10564 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10565 else 10566 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10567 10568 if (CGM.getLangOpts().OpenMPIsDevice) { 10569 VarName = Addr->getName(); 10570 Addr = nullptr; 10571 } else { 10572 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10573 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10574 } 10575 VarSize = CGM.getPointerSize(); 10576 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10577 } 10578 10579 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10580 VarName, Addr, VarSize, Flags, Linkage); 10581 } 10582 10583 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10584 if (isa<FunctionDecl>(GD.getDecl()) || 10585 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10586 return emitTargetFunctions(GD); 10587 10588 return emitTargetGlobalVariable(GD); 10589 } 10590 10591 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10592 for (const VarDecl *VD : DeferredGlobalVariables) { 10593 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10594 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10595 if (!Res) 10596 continue; 10597 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10598 !HasRequiresUnifiedSharedMemory) { 10599 CGM.EmitGlobal(VD); 10600 } else { 10601 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10602 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10603 HasRequiresUnifiedSharedMemory)) && 10604 "Expected link clause or to clause with unified memory."); 10605 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10606 } 10607 } 10608 } 10609 10610 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10611 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10612 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10613 " Expected target-based directive."); 10614 } 10615 10616 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10617 for (const OMPClause *Clause : D->clauselists()) { 10618 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10619 HasRequiresUnifiedSharedMemory = true; 10620 } else if (const auto *AC = 10621 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10622 switch (AC->getAtomicDefaultMemOrderKind()) { 10623 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10624 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10625 break; 10626 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10627 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10628 break; 10629 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10630 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10631 break; 10632 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10633 break; 10634 } 10635 } 10636 } 10637 } 10638 10639 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10640 return RequiresAtomicOrdering; 10641 } 10642 10643 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10644 LangAS &AS) { 10645 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10646 return false; 10647 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10648 switch(A->getAllocatorType()) { 10649 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10650 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10651 // Not supported, fallback to the default mem space. 10652 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10653 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10654 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10655 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10656 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10657 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10658 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10659 AS = LangAS::Default; 10660 return true; 10661 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10662 llvm_unreachable("Expected predefined allocator for the variables with the " 10663 "static storage."); 10664 } 10665 return false; 10666 } 10667 10668 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10669 return HasRequiresUnifiedSharedMemory; 10670 } 10671 10672 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10673 CodeGenModule &CGM) 10674 : CGM(CGM) { 10675 if (CGM.getLangOpts().OpenMPIsDevice) { 10676 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10677 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10678 } 10679 } 10680 10681 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10682 if (CGM.getLangOpts().OpenMPIsDevice) 10683 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10684 } 10685 10686 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10687 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10688 return true; 10689 10690 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10691 // Do not to emit function if it is marked as declare target as it was already 10692 // emitted. 10693 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10694 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10695 if (auto *F = dyn_cast_or_null<llvm::Function>( 10696 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10697 return !F->isDeclaration(); 10698 return false; 10699 } 10700 return true; 10701 } 10702 10703 return !AlreadyEmittedTargetDecls.insert(D).second; 10704 } 10705 10706 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10707 // If we don't have entries or if we are emitting code for the device, we 10708 // don't need to do anything. 10709 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10710 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10711 (OffloadEntriesInfoManager.empty() && 10712 !HasEmittedDeclareTargetRegion && 10713 !HasEmittedTargetRegion)) 10714 return nullptr; 10715 10716 // Create and register the function that handles the requires directives. 10717 ASTContext &C = CGM.getContext(); 10718 10719 llvm::Function *RequiresRegFn; 10720 { 10721 CodeGenFunction CGF(CGM); 10722 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10723 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10724 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10725 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10726 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10727 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10728 // TODO: check for other requires clauses. 10729 // The requires directive takes effect only when a target region is 10730 // present in the compilation unit. Otherwise it is ignored and not 10731 // passed to the runtime. This avoids the runtime from throwing an error 10732 // for mismatching requires clauses across compilation units that don't 10733 // contain at least 1 target region. 10734 assert((HasEmittedTargetRegion || 10735 HasEmittedDeclareTargetRegion || 10736 !OffloadEntriesInfoManager.empty()) && 10737 "Target or declare target region expected."); 10738 if (HasRequiresUnifiedSharedMemory) 10739 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10740 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10741 CGM.getModule(), OMPRTL___tgt_register_requires), 10742 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10743 CGF.FinishFunction(); 10744 } 10745 return RequiresRegFn; 10746 } 10747 10748 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10749 const OMPExecutableDirective &D, 10750 SourceLocation Loc, 10751 llvm::Function *OutlinedFn, 10752 ArrayRef<llvm::Value *> CapturedVars) { 10753 if (!CGF.HaveInsertPoint()) 10754 return; 10755 10756 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10757 CodeGenFunction::RunCleanupsScope Scope(CGF); 10758 10759 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10760 llvm::Value *Args[] = { 10761 RTLoc, 10762 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10763 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10764 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10765 RealArgs.append(std::begin(Args), std::end(Args)); 10766 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10767 10768 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10769 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10770 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10771 } 10772 10773 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10774 const Expr *NumTeams, 10775 const Expr *ThreadLimit, 10776 SourceLocation Loc) { 10777 if (!CGF.HaveInsertPoint()) 10778 return; 10779 10780 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10781 10782 llvm::Value *NumTeamsVal = 10783 NumTeams 10784 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10785 CGF.CGM.Int32Ty, /* isSigned = */ true) 10786 : CGF.Builder.getInt32(0); 10787 10788 llvm::Value *ThreadLimitVal = 10789 ThreadLimit 10790 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10791 CGF.CGM.Int32Ty, /* isSigned = */ true) 10792 : CGF.Builder.getInt32(0); 10793 10794 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10795 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10796 ThreadLimitVal}; 10797 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10798 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10799 PushNumTeamsArgs); 10800 } 10801 10802 void CGOpenMPRuntime::emitTargetDataCalls( 10803 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10804 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10805 if (!CGF.HaveInsertPoint()) 10806 return; 10807 10808 // Action used to replace the default codegen action and turn privatization 10809 // off. 10810 PrePostActionTy NoPrivAction; 10811 10812 // Generate the code for the opening of the data environment. Capture all the 10813 // arguments of the runtime call by reference because they are used in the 10814 // closing of the region. 10815 auto &&BeginThenGen = [this, &D, Device, &Info, 10816 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10817 // Fill up the arrays with all the mapped variables. 10818 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10819 10820 // Get map clause information. 10821 MappableExprsHandler MEHandler(D, CGF); 10822 MEHandler.generateAllInfo(CombinedInfo); 10823 10824 // Fill up the arrays and create the arguments. 10825 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10826 /*IsNonContiguous=*/true); 10827 10828 llvm::Value *BasePointersArrayArg = nullptr; 10829 llvm::Value *PointersArrayArg = nullptr; 10830 llvm::Value *SizesArrayArg = nullptr; 10831 llvm::Value *MapTypesArrayArg = nullptr; 10832 llvm::Value *MapNamesArrayArg = nullptr; 10833 llvm::Value *MappersArrayArg = nullptr; 10834 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10835 SizesArrayArg, MapTypesArrayArg, 10836 MapNamesArrayArg, MappersArrayArg, Info); 10837 10838 // Emit device ID if any. 10839 llvm::Value *DeviceID = nullptr; 10840 if (Device) { 10841 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10842 CGF.Int64Ty, /*isSigned=*/true); 10843 } else { 10844 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10845 } 10846 10847 // Emit the number of elements in the offloading arrays. 10848 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10849 // 10850 // Source location for the ident struct 10851 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10852 10853 llvm::Value *OffloadingArgs[] = {RTLoc, 10854 DeviceID, 10855 PointerNum, 10856 BasePointersArrayArg, 10857 PointersArrayArg, 10858 SizesArrayArg, 10859 MapTypesArrayArg, 10860 MapNamesArrayArg, 10861 MappersArrayArg}; 10862 CGF.EmitRuntimeCall( 10863 OMPBuilder.getOrCreateRuntimeFunction( 10864 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10865 OffloadingArgs); 10866 10867 // If device pointer privatization is required, emit the body of the region 10868 // here. It will have to be duplicated: with and without privatization. 10869 if (!Info.CaptureDeviceAddrMap.empty()) 10870 CodeGen(CGF); 10871 }; 10872 10873 // Generate code for the closing of the data region. 10874 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10875 PrePostActionTy &) { 10876 assert(Info.isValid() && "Invalid data environment closing arguments."); 10877 10878 llvm::Value *BasePointersArrayArg = nullptr; 10879 llvm::Value *PointersArrayArg = nullptr; 10880 llvm::Value *SizesArrayArg = nullptr; 10881 llvm::Value *MapTypesArrayArg = nullptr; 10882 llvm::Value *MapNamesArrayArg = nullptr; 10883 llvm::Value *MappersArrayArg = nullptr; 10884 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10885 SizesArrayArg, MapTypesArrayArg, 10886 MapNamesArrayArg, MappersArrayArg, Info, 10887 {/*ForEndCall=*/true}); 10888 10889 // Emit device ID if any. 10890 llvm::Value *DeviceID = nullptr; 10891 if (Device) { 10892 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10893 CGF.Int64Ty, /*isSigned=*/true); 10894 } else { 10895 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10896 } 10897 10898 // Emit the number of elements in the offloading arrays. 10899 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10900 10901 // Source location for the ident struct 10902 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10903 10904 llvm::Value *OffloadingArgs[] = {RTLoc, 10905 DeviceID, 10906 PointerNum, 10907 BasePointersArrayArg, 10908 PointersArrayArg, 10909 SizesArrayArg, 10910 MapTypesArrayArg, 10911 MapNamesArrayArg, 10912 MappersArrayArg}; 10913 CGF.EmitRuntimeCall( 10914 OMPBuilder.getOrCreateRuntimeFunction( 10915 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10916 OffloadingArgs); 10917 }; 10918 10919 // If we need device pointer privatization, we need to emit the body of the 10920 // region with no privatization in the 'else' branch of the conditional. 10921 // Otherwise, we don't have to do anything. 10922 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10923 PrePostActionTy &) { 10924 if (!Info.CaptureDeviceAddrMap.empty()) { 10925 CodeGen.setAction(NoPrivAction); 10926 CodeGen(CGF); 10927 } 10928 }; 10929 10930 // We don't have to do anything to close the region if the if clause evaluates 10931 // to false. 10932 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10933 10934 if (IfCond) { 10935 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10936 } else { 10937 RegionCodeGenTy RCG(BeginThenGen); 10938 RCG(CGF); 10939 } 10940 10941 // If we don't require privatization of device pointers, we emit the body in 10942 // between the runtime calls. This avoids duplicating the body code. 10943 if (Info.CaptureDeviceAddrMap.empty()) { 10944 CodeGen.setAction(NoPrivAction); 10945 CodeGen(CGF); 10946 } 10947 10948 if (IfCond) { 10949 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10950 } else { 10951 RegionCodeGenTy RCG(EndThenGen); 10952 RCG(CGF); 10953 } 10954 } 10955 10956 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10957 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10958 const Expr *Device) { 10959 if (!CGF.HaveInsertPoint()) 10960 return; 10961 10962 assert((isa<OMPTargetEnterDataDirective>(D) || 10963 isa<OMPTargetExitDataDirective>(D) || 10964 isa<OMPTargetUpdateDirective>(D)) && 10965 "Expecting either target enter, exit data, or update directives."); 10966 10967 CodeGenFunction::OMPTargetDataInfo InputInfo; 10968 llvm::Value *MapTypesArray = nullptr; 10969 llvm::Value *MapNamesArray = nullptr; 10970 // Generate the code for the opening of the data environment. 10971 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10972 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10973 // Emit device ID if any. 10974 llvm::Value *DeviceID = nullptr; 10975 if (Device) { 10976 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10977 CGF.Int64Ty, /*isSigned=*/true); 10978 } else { 10979 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10980 } 10981 10982 // Emit the number of elements in the offloading arrays. 10983 llvm::Constant *PointerNum = 10984 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10985 10986 // Source location for the ident struct 10987 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10988 10989 llvm::Value *OffloadingArgs[] = {RTLoc, 10990 DeviceID, 10991 PointerNum, 10992 InputInfo.BasePointersArray.getPointer(), 10993 InputInfo.PointersArray.getPointer(), 10994 InputInfo.SizesArray.getPointer(), 10995 MapTypesArray, 10996 MapNamesArray, 10997 InputInfo.MappersArray.getPointer()}; 10998 10999 // Select the right runtime function call for each standalone 11000 // directive. 11001 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11002 RuntimeFunction RTLFn; 11003 switch (D.getDirectiveKind()) { 11004 case OMPD_target_enter_data: 11005 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11006 : OMPRTL___tgt_target_data_begin_mapper; 11007 break; 11008 case OMPD_target_exit_data: 11009 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11010 : OMPRTL___tgt_target_data_end_mapper; 11011 break; 11012 case OMPD_target_update: 11013 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11014 : OMPRTL___tgt_target_data_update_mapper; 11015 break; 11016 case OMPD_parallel: 11017 case OMPD_for: 11018 case OMPD_parallel_for: 11019 case OMPD_parallel_master: 11020 case OMPD_parallel_sections: 11021 case OMPD_for_simd: 11022 case OMPD_parallel_for_simd: 11023 case OMPD_cancel: 11024 case OMPD_cancellation_point: 11025 case OMPD_ordered: 11026 case OMPD_threadprivate: 11027 case OMPD_allocate: 11028 case OMPD_task: 11029 case OMPD_simd: 11030 case OMPD_tile: 11031 case OMPD_sections: 11032 case OMPD_section: 11033 case OMPD_single: 11034 case OMPD_master: 11035 case OMPD_critical: 11036 case OMPD_taskyield: 11037 case OMPD_barrier: 11038 case OMPD_taskwait: 11039 case OMPD_taskgroup: 11040 case OMPD_atomic: 11041 case OMPD_flush: 11042 case OMPD_depobj: 11043 case OMPD_scan: 11044 case OMPD_teams: 11045 case OMPD_target_data: 11046 case OMPD_distribute: 11047 case OMPD_distribute_simd: 11048 case OMPD_distribute_parallel_for: 11049 case OMPD_distribute_parallel_for_simd: 11050 case OMPD_teams_distribute: 11051 case OMPD_teams_distribute_simd: 11052 case OMPD_teams_distribute_parallel_for: 11053 case OMPD_teams_distribute_parallel_for_simd: 11054 case OMPD_declare_simd: 11055 case OMPD_declare_variant: 11056 case OMPD_begin_declare_variant: 11057 case OMPD_end_declare_variant: 11058 case OMPD_declare_target: 11059 case OMPD_end_declare_target: 11060 case OMPD_declare_reduction: 11061 case OMPD_declare_mapper: 11062 case OMPD_taskloop: 11063 case OMPD_taskloop_simd: 11064 case OMPD_master_taskloop: 11065 case OMPD_master_taskloop_simd: 11066 case OMPD_parallel_master_taskloop: 11067 case OMPD_parallel_master_taskloop_simd: 11068 case OMPD_target: 11069 case OMPD_target_simd: 11070 case OMPD_target_teams_distribute: 11071 case OMPD_target_teams_distribute_simd: 11072 case OMPD_target_teams_distribute_parallel_for: 11073 case OMPD_target_teams_distribute_parallel_for_simd: 11074 case OMPD_target_teams: 11075 case OMPD_target_parallel: 11076 case OMPD_target_parallel_for: 11077 case OMPD_target_parallel_for_simd: 11078 case OMPD_requires: 11079 case OMPD_unknown: 11080 default: 11081 llvm_unreachable("Unexpected standalone target data directive."); 11082 break; 11083 } 11084 CGF.EmitRuntimeCall( 11085 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11086 OffloadingArgs); 11087 }; 11088 11089 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11090 &MapNamesArray](CodeGenFunction &CGF, 11091 PrePostActionTy &) { 11092 // Fill up the arrays with all the mapped variables. 11093 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11094 11095 // Get map clause information. 11096 MappableExprsHandler MEHandler(D, CGF); 11097 MEHandler.generateAllInfo(CombinedInfo); 11098 11099 TargetDataInfo Info; 11100 // Fill up the arrays and create the arguments. 11101 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11102 /*IsNonContiguous=*/true); 11103 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11104 D.hasClausesOfKind<OMPNowaitClause>(); 11105 emitOffloadingArraysArgument( 11106 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11107 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11108 {/*ForEndTask=*/false}); 11109 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11110 InputInfo.BasePointersArray = 11111 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11112 InputInfo.PointersArray = 11113 Address(Info.PointersArray, CGM.getPointerAlign()); 11114 InputInfo.SizesArray = 11115 Address(Info.SizesArray, CGM.getPointerAlign()); 11116 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11117 MapTypesArray = Info.MapTypesArray; 11118 MapNamesArray = Info.MapNamesArray; 11119 if (RequiresOuterTask) 11120 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11121 else 11122 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11123 }; 11124 11125 if (IfCond) { 11126 emitIfClause(CGF, IfCond, TargetThenGen, 11127 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11128 } else { 11129 RegionCodeGenTy ThenRCG(TargetThenGen); 11130 ThenRCG(CGF); 11131 } 11132 } 11133 11134 namespace { 11135 /// Kind of parameter in a function with 'declare simd' directive. 11136 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11137 /// Attribute set of the parameter. 11138 struct ParamAttrTy { 11139 ParamKindTy Kind = Vector; 11140 llvm::APSInt StrideOrArg; 11141 llvm::APSInt Alignment; 11142 }; 11143 } // namespace 11144 11145 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11146 ArrayRef<ParamAttrTy> ParamAttrs) { 11147 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11148 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11149 // of that clause. The VLEN value must be power of 2. 11150 // In other case the notion of the function`s "characteristic data type" (CDT) 11151 // is used to compute the vector length. 11152 // CDT is defined in the following order: 11153 // a) For non-void function, the CDT is the return type. 11154 // b) If the function has any non-uniform, non-linear parameters, then the 11155 // CDT is the type of the first such parameter. 11156 // c) If the CDT determined by a) or b) above is struct, union, or class 11157 // type which is pass-by-value (except for the type that maps to the 11158 // built-in complex data type), the characteristic data type is int. 11159 // d) If none of the above three cases is applicable, the CDT is int. 11160 // The VLEN is then determined based on the CDT and the size of vector 11161 // register of that ISA for which current vector version is generated. The 11162 // VLEN is computed using the formula below: 11163 // VLEN = sizeof(vector_register) / sizeof(CDT), 11164 // where vector register size specified in section 3.2.1 Registers and the 11165 // Stack Frame of original AMD64 ABI document. 11166 QualType RetType = FD->getReturnType(); 11167 if (RetType.isNull()) 11168 return 0; 11169 ASTContext &C = FD->getASTContext(); 11170 QualType CDT; 11171 if (!RetType.isNull() && !RetType->isVoidType()) { 11172 CDT = RetType; 11173 } else { 11174 unsigned Offset = 0; 11175 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11176 if (ParamAttrs[Offset].Kind == Vector) 11177 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11178 ++Offset; 11179 } 11180 if (CDT.isNull()) { 11181 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11182 if (ParamAttrs[I + Offset].Kind == Vector) { 11183 CDT = FD->getParamDecl(I)->getType(); 11184 break; 11185 } 11186 } 11187 } 11188 } 11189 if (CDT.isNull()) 11190 CDT = C.IntTy; 11191 CDT = CDT->getCanonicalTypeUnqualified(); 11192 if (CDT->isRecordType() || CDT->isUnionType()) 11193 CDT = C.IntTy; 11194 return C.getTypeSize(CDT); 11195 } 11196 11197 static void 11198 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11199 const llvm::APSInt &VLENVal, 11200 ArrayRef<ParamAttrTy> ParamAttrs, 11201 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11202 struct ISADataTy { 11203 char ISA; 11204 unsigned VecRegSize; 11205 }; 11206 ISADataTy ISAData[] = { 11207 { 11208 'b', 128 11209 }, // SSE 11210 { 11211 'c', 256 11212 }, // AVX 11213 { 11214 'd', 256 11215 }, // AVX2 11216 { 11217 'e', 512 11218 }, // AVX512 11219 }; 11220 llvm::SmallVector<char, 2> Masked; 11221 switch (State) { 11222 case OMPDeclareSimdDeclAttr::BS_Undefined: 11223 Masked.push_back('N'); 11224 Masked.push_back('M'); 11225 break; 11226 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11227 Masked.push_back('N'); 11228 break; 11229 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11230 Masked.push_back('M'); 11231 break; 11232 } 11233 for (char Mask : Masked) { 11234 for (const ISADataTy &Data : ISAData) { 11235 SmallString<256> Buffer; 11236 llvm::raw_svector_ostream Out(Buffer); 11237 Out << "_ZGV" << Data.ISA << Mask; 11238 if (!VLENVal) { 11239 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11240 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11241 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11242 } else { 11243 Out << VLENVal; 11244 } 11245 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11246 switch (ParamAttr.Kind){ 11247 case LinearWithVarStride: 11248 Out << 's' << ParamAttr.StrideOrArg; 11249 break; 11250 case Linear: 11251 Out << 'l'; 11252 if (ParamAttr.StrideOrArg != 1) 11253 Out << ParamAttr.StrideOrArg; 11254 break; 11255 case Uniform: 11256 Out << 'u'; 11257 break; 11258 case Vector: 11259 Out << 'v'; 11260 break; 11261 } 11262 if (!!ParamAttr.Alignment) 11263 Out << 'a' << ParamAttr.Alignment; 11264 } 11265 Out << '_' << Fn->getName(); 11266 Fn->addFnAttr(Out.str()); 11267 } 11268 } 11269 } 11270 11271 // This are the Functions that are needed to mangle the name of the 11272 // vector functions generated by the compiler, according to the rules 11273 // defined in the "Vector Function ABI specifications for AArch64", 11274 // available at 11275 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11276 11277 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11278 /// 11279 /// TODO: Need to implement the behavior for reference marked with a 11280 /// var or no linear modifiers (1.b in the section). For this, we 11281 /// need to extend ParamKindTy to support the linear modifiers. 11282 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11283 QT = QT.getCanonicalType(); 11284 11285 if (QT->isVoidType()) 11286 return false; 11287 11288 if (Kind == ParamKindTy::Uniform) 11289 return false; 11290 11291 if (Kind == ParamKindTy::Linear) 11292 return false; 11293 11294 // TODO: Handle linear references with modifiers 11295 11296 if (Kind == ParamKindTy::LinearWithVarStride) 11297 return false; 11298 11299 return true; 11300 } 11301 11302 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11303 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11304 QT = QT.getCanonicalType(); 11305 unsigned Size = C.getTypeSize(QT); 11306 11307 // Only scalars and complex within 16 bytes wide set PVB to true. 11308 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11309 return false; 11310 11311 if (QT->isFloatingType()) 11312 return true; 11313 11314 if (QT->isIntegerType()) 11315 return true; 11316 11317 if (QT->isPointerType()) 11318 return true; 11319 11320 // TODO: Add support for complex types (section 3.1.2, item 2). 11321 11322 return false; 11323 } 11324 11325 /// Computes the lane size (LS) of a return type or of an input parameter, 11326 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11327 /// TODO: Add support for references, section 3.2.1, item 1. 11328 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11329 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11330 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11331 if (getAArch64PBV(PTy, C)) 11332 return C.getTypeSize(PTy); 11333 } 11334 if (getAArch64PBV(QT, C)) 11335 return C.getTypeSize(QT); 11336 11337 return C.getTypeSize(C.getUIntPtrType()); 11338 } 11339 11340 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11341 // signature of the scalar function, as defined in 3.2.2 of the 11342 // AAVFABI. 11343 static std::tuple<unsigned, unsigned, bool> 11344 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11345 QualType RetType = FD->getReturnType().getCanonicalType(); 11346 11347 ASTContext &C = FD->getASTContext(); 11348 11349 bool OutputBecomesInput = false; 11350 11351 llvm::SmallVector<unsigned, 8> Sizes; 11352 if (!RetType->isVoidType()) { 11353 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11354 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11355 OutputBecomesInput = true; 11356 } 11357 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11358 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11359 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11360 } 11361 11362 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11363 // The LS of a function parameter / return value can only be a power 11364 // of 2, starting from 8 bits, up to 128. 11365 assert(std::all_of(Sizes.begin(), Sizes.end(), 11366 [](unsigned Size) { 11367 return Size == 8 || Size == 16 || Size == 32 || 11368 Size == 64 || Size == 128; 11369 }) && 11370 "Invalid size"); 11371 11372 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11373 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11374 OutputBecomesInput); 11375 } 11376 11377 /// Mangle the parameter part of the vector function name according to 11378 /// their OpenMP classification. The mangling function is defined in 11379 /// section 3.5 of the AAVFABI. 11380 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11381 SmallString<256> Buffer; 11382 llvm::raw_svector_ostream Out(Buffer); 11383 for (const auto &ParamAttr : ParamAttrs) { 11384 switch (ParamAttr.Kind) { 11385 case LinearWithVarStride: 11386 Out << "ls" << ParamAttr.StrideOrArg; 11387 break; 11388 case Linear: 11389 Out << 'l'; 11390 // Don't print the step value if it is not present or if it is 11391 // equal to 1. 11392 if (ParamAttr.StrideOrArg != 1) 11393 Out << ParamAttr.StrideOrArg; 11394 break; 11395 case Uniform: 11396 Out << 'u'; 11397 break; 11398 case Vector: 11399 Out << 'v'; 11400 break; 11401 } 11402 11403 if (!!ParamAttr.Alignment) 11404 Out << 'a' << ParamAttr.Alignment; 11405 } 11406 11407 return std::string(Out.str()); 11408 } 11409 11410 // Function used to add the attribute. The parameter `VLEN` is 11411 // templated to allow the use of "x" when targeting scalable functions 11412 // for SVE. 11413 template <typename T> 11414 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11415 char ISA, StringRef ParSeq, 11416 StringRef MangledName, bool OutputBecomesInput, 11417 llvm::Function *Fn) { 11418 SmallString<256> Buffer; 11419 llvm::raw_svector_ostream Out(Buffer); 11420 Out << Prefix << ISA << LMask << VLEN; 11421 if (OutputBecomesInput) 11422 Out << "v"; 11423 Out << ParSeq << "_" << MangledName; 11424 Fn->addFnAttr(Out.str()); 11425 } 11426 11427 // Helper function to generate the Advanced SIMD names depending on 11428 // the value of the NDS when simdlen is not present. 11429 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11430 StringRef Prefix, char ISA, 11431 StringRef ParSeq, StringRef MangledName, 11432 bool OutputBecomesInput, 11433 llvm::Function *Fn) { 11434 switch (NDS) { 11435 case 8: 11436 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11437 OutputBecomesInput, Fn); 11438 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11439 OutputBecomesInput, Fn); 11440 break; 11441 case 16: 11442 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11443 OutputBecomesInput, Fn); 11444 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11445 OutputBecomesInput, Fn); 11446 break; 11447 case 32: 11448 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11449 OutputBecomesInput, Fn); 11450 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11451 OutputBecomesInput, Fn); 11452 break; 11453 case 64: 11454 case 128: 11455 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11456 OutputBecomesInput, Fn); 11457 break; 11458 default: 11459 llvm_unreachable("Scalar type is too wide."); 11460 } 11461 } 11462 11463 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11464 static void emitAArch64DeclareSimdFunction( 11465 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11466 ArrayRef<ParamAttrTy> ParamAttrs, 11467 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11468 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11469 11470 // Get basic data for building the vector signature. 11471 const auto Data = getNDSWDS(FD, ParamAttrs); 11472 const unsigned NDS = std::get<0>(Data); 11473 const unsigned WDS = std::get<1>(Data); 11474 const bool OutputBecomesInput = std::get<2>(Data); 11475 11476 // Check the values provided via `simdlen` by the user. 11477 // 1. A `simdlen(1)` doesn't produce vector signatures, 11478 if (UserVLEN == 1) { 11479 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11480 DiagnosticsEngine::Warning, 11481 "The clause simdlen(1) has no effect when targeting aarch64."); 11482 CGM.getDiags().Report(SLoc, DiagID); 11483 return; 11484 } 11485 11486 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11487 // Advanced SIMD output. 11488 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11489 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11490 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11491 "power of 2 when targeting Advanced SIMD."); 11492 CGM.getDiags().Report(SLoc, DiagID); 11493 return; 11494 } 11495 11496 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11497 // limits. 11498 if (ISA == 's' && UserVLEN != 0) { 11499 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11500 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11501 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11502 "lanes in the architectural constraints " 11503 "for SVE (min is 128-bit, max is " 11504 "2048-bit, by steps of 128-bit)"); 11505 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11506 return; 11507 } 11508 } 11509 11510 // Sort out parameter sequence. 11511 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11512 StringRef Prefix = "_ZGV"; 11513 // Generate simdlen from user input (if any). 11514 if (UserVLEN) { 11515 if (ISA == 's') { 11516 // SVE generates only a masked function. 11517 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11518 OutputBecomesInput, Fn); 11519 } else { 11520 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11521 // Advanced SIMD generates one or two functions, depending on 11522 // the `[not]inbranch` clause. 11523 switch (State) { 11524 case OMPDeclareSimdDeclAttr::BS_Undefined: 11525 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11526 OutputBecomesInput, Fn); 11527 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11528 OutputBecomesInput, Fn); 11529 break; 11530 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11531 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11532 OutputBecomesInput, Fn); 11533 break; 11534 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11535 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11536 OutputBecomesInput, Fn); 11537 break; 11538 } 11539 } 11540 } else { 11541 // If no user simdlen is provided, follow the AAVFABI rules for 11542 // generating the vector length. 11543 if (ISA == 's') { 11544 // SVE, section 3.4.1, item 1. 11545 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11546 OutputBecomesInput, Fn); 11547 } else { 11548 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11549 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11550 // two vector names depending on the use of the clause 11551 // `[not]inbranch`. 11552 switch (State) { 11553 case OMPDeclareSimdDeclAttr::BS_Undefined: 11554 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11555 OutputBecomesInput, Fn); 11556 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11557 OutputBecomesInput, Fn); 11558 break; 11559 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11560 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11561 OutputBecomesInput, Fn); 11562 break; 11563 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11564 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11565 OutputBecomesInput, Fn); 11566 break; 11567 } 11568 } 11569 } 11570 } 11571 11572 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11573 llvm::Function *Fn) { 11574 ASTContext &C = CGM.getContext(); 11575 FD = FD->getMostRecentDecl(); 11576 // Map params to their positions in function decl. 11577 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11578 if (isa<CXXMethodDecl>(FD)) 11579 ParamPositions.try_emplace(FD, 0); 11580 unsigned ParamPos = ParamPositions.size(); 11581 for (const ParmVarDecl *P : FD->parameters()) { 11582 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11583 ++ParamPos; 11584 } 11585 while (FD) { 11586 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11587 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11588 // Mark uniform parameters. 11589 for (const Expr *E : Attr->uniforms()) { 11590 E = E->IgnoreParenImpCasts(); 11591 unsigned Pos; 11592 if (isa<CXXThisExpr>(E)) { 11593 Pos = ParamPositions[FD]; 11594 } else { 11595 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11596 ->getCanonicalDecl(); 11597 Pos = ParamPositions[PVD]; 11598 } 11599 ParamAttrs[Pos].Kind = Uniform; 11600 } 11601 // Get alignment info. 11602 auto NI = Attr->alignments_begin(); 11603 for (const Expr *E : Attr->aligneds()) { 11604 E = E->IgnoreParenImpCasts(); 11605 unsigned Pos; 11606 QualType ParmTy; 11607 if (isa<CXXThisExpr>(E)) { 11608 Pos = ParamPositions[FD]; 11609 ParmTy = E->getType(); 11610 } else { 11611 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11612 ->getCanonicalDecl(); 11613 Pos = ParamPositions[PVD]; 11614 ParmTy = PVD->getType(); 11615 } 11616 ParamAttrs[Pos].Alignment = 11617 (*NI) 11618 ? (*NI)->EvaluateKnownConstInt(C) 11619 : llvm::APSInt::getUnsigned( 11620 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11621 .getQuantity()); 11622 ++NI; 11623 } 11624 // Mark linear parameters. 11625 auto SI = Attr->steps_begin(); 11626 auto MI = Attr->modifiers_begin(); 11627 for (const Expr *E : Attr->linears()) { 11628 E = E->IgnoreParenImpCasts(); 11629 unsigned Pos; 11630 // Rescaling factor needed to compute the linear parameter 11631 // value in the mangled name. 11632 unsigned PtrRescalingFactor = 1; 11633 if (isa<CXXThisExpr>(E)) { 11634 Pos = ParamPositions[FD]; 11635 } else { 11636 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11637 ->getCanonicalDecl(); 11638 Pos = ParamPositions[PVD]; 11639 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11640 PtrRescalingFactor = CGM.getContext() 11641 .getTypeSizeInChars(P->getPointeeType()) 11642 .getQuantity(); 11643 } 11644 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11645 ParamAttr.Kind = Linear; 11646 // Assuming a stride of 1, for `linear` without modifiers. 11647 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11648 if (*SI) { 11649 Expr::EvalResult Result; 11650 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11651 if (const auto *DRE = 11652 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11653 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11654 ParamAttr.Kind = LinearWithVarStride; 11655 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11656 ParamPositions[StridePVD->getCanonicalDecl()]); 11657 } 11658 } 11659 } else { 11660 ParamAttr.StrideOrArg = Result.Val.getInt(); 11661 } 11662 } 11663 // If we are using a linear clause on a pointer, we need to 11664 // rescale the value of linear_step with the byte size of the 11665 // pointee type. 11666 if (Linear == ParamAttr.Kind) 11667 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11668 ++SI; 11669 ++MI; 11670 } 11671 llvm::APSInt VLENVal; 11672 SourceLocation ExprLoc; 11673 const Expr *VLENExpr = Attr->getSimdlen(); 11674 if (VLENExpr) { 11675 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11676 ExprLoc = VLENExpr->getExprLoc(); 11677 } 11678 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11679 if (CGM.getTriple().isX86()) { 11680 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11681 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11682 unsigned VLEN = VLENVal.getExtValue(); 11683 StringRef MangledName = Fn->getName(); 11684 if (CGM.getTarget().hasFeature("sve")) 11685 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11686 MangledName, 's', 128, Fn, ExprLoc); 11687 if (CGM.getTarget().hasFeature("neon")) 11688 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11689 MangledName, 'n', 128, Fn, ExprLoc); 11690 } 11691 } 11692 FD = FD->getPreviousDecl(); 11693 } 11694 } 11695 11696 namespace { 11697 /// Cleanup action for doacross support. 11698 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11699 public: 11700 static const int DoacrossFinArgs = 2; 11701 11702 private: 11703 llvm::FunctionCallee RTLFn; 11704 llvm::Value *Args[DoacrossFinArgs]; 11705 11706 public: 11707 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11708 ArrayRef<llvm::Value *> CallArgs) 11709 : RTLFn(RTLFn) { 11710 assert(CallArgs.size() == DoacrossFinArgs); 11711 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11712 } 11713 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11714 if (!CGF.HaveInsertPoint()) 11715 return; 11716 CGF.EmitRuntimeCall(RTLFn, Args); 11717 } 11718 }; 11719 } // namespace 11720 11721 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11722 const OMPLoopDirective &D, 11723 ArrayRef<Expr *> NumIterations) { 11724 if (!CGF.HaveInsertPoint()) 11725 return; 11726 11727 ASTContext &C = CGM.getContext(); 11728 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11729 RecordDecl *RD; 11730 if (KmpDimTy.isNull()) { 11731 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11732 // kmp_int64 lo; // lower 11733 // kmp_int64 up; // upper 11734 // kmp_int64 st; // stride 11735 // }; 11736 RD = C.buildImplicitRecord("kmp_dim"); 11737 RD->startDefinition(); 11738 addFieldToRecordDecl(C, RD, Int64Ty); 11739 addFieldToRecordDecl(C, RD, Int64Ty); 11740 addFieldToRecordDecl(C, RD, Int64Ty); 11741 RD->completeDefinition(); 11742 KmpDimTy = C.getRecordType(RD); 11743 } else { 11744 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11745 } 11746 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11747 QualType ArrayTy = 11748 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11749 11750 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11751 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11752 enum { LowerFD = 0, UpperFD, StrideFD }; 11753 // Fill dims with data. 11754 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11755 LValue DimsLVal = CGF.MakeAddrLValue( 11756 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11757 // dims.upper = num_iterations; 11758 LValue UpperLVal = CGF.EmitLValueForField( 11759 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11760 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11761 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11762 Int64Ty, NumIterations[I]->getExprLoc()); 11763 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11764 // dims.stride = 1; 11765 LValue StrideLVal = CGF.EmitLValueForField( 11766 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11767 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11768 StrideLVal); 11769 } 11770 11771 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11772 // kmp_int32 num_dims, struct kmp_dim * dims); 11773 llvm::Value *Args[] = { 11774 emitUpdateLocation(CGF, D.getBeginLoc()), 11775 getThreadID(CGF, D.getBeginLoc()), 11776 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11777 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11778 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11779 CGM.VoidPtrTy)}; 11780 11781 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11782 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11783 CGF.EmitRuntimeCall(RTLFn, Args); 11784 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11785 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11786 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11787 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11788 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11789 llvm::makeArrayRef(FiniArgs)); 11790 } 11791 11792 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11793 const OMPDependClause *C) { 11794 QualType Int64Ty = 11795 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11796 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11797 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11798 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11799 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11800 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11801 const Expr *CounterVal = C->getLoopData(I); 11802 assert(CounterVal); 11803 llvm::Value *CntVal = CGF.EmitScalarConversion( 11804 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11805 CounterVal->getExprLoc()); 11806 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11807 /*Volatile=*/false, Int64Ty); 11808 } 11809 llvm::Value *Args[] = { 11810 emitUpdateLocation(CGF, C->getBeginLoc()), 11811 getThreadID(CGF, C->getBeginLoc()), 11812 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11813 llvm::FunctionCallee RTLFn; 11814 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11815 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11816 OMPRTL___kmpc_doacross_post); 11817 } else { 11818 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11819 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11820 OMPRTL___kmpc_doacross_wait); 11821 } 11822 CGF.EmitRuntimeCall(RTLFn, Args); 11823 } 11824 11825 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11826 llvm::FunctionCallee Callee, 11827 ArrayRef<llvm::Value *> Args) const { 11828 assert(Loc.isValid() && "Outlined function call location must be valid."); 11829 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11830 11831 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11832 if (Fn->doesNotThrow()) { 11833 CGF.EmitNounwindRuntimeCall(Fn, Args); 11834 return; 11835 } 11836 } 11837 CGF.EmitRuntimeCall(Callee, Args); 11838 } 11839 11840 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11841 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11842 ArrayRef<llvm::Value *> Args) const { 11843 emitCall(CGF, Loc, OutlinedFn, Args); 11844 } 11845 11846 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11847 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11848 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11849 HasEmittedDeclareTargetRegion = true; 11850 } 11851 11852 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11853 const VarDecl *NativeParam, 11854 const VarDecl *TargetParam) const { 11855 return CGF.GetAddrOfLocalVar(NativeParam); 11856 } 11857 11858 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11859 const VarDecl *VD) { 11860 if (!VD) 11861 return Address::invalid(); 11862 Address UntiedAddr = Address::invalid(); 11863 Address UntiedRealAddr = Address::invalid(); 11864 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11865 if (It != FunctionToUntiedTaskStackMap.end()) { 11866 const UntiedLocalVarsAddressesMap &UntiedData = 11867 UntiedLocalVarsStack[It->second]; 11868 auto I = UntiedData.find(VD); 11869 if (I != UntiedData.end()) { 11870 UntiedAddr = I->second.first; 11871 UntiedRealAddr = I->second.second; 11872 } 11873 } 11874 const VarDecl *CVD = VD->getCanonicalDecl(); 11875 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11876 // Use the default allocation. 11877 if (!isAllocatableDecl(VD)) 11878 return UntiedAddr; 11879 llvm::Value *Size; 11880 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11881 if (CVD->getType()->isVariablyModifiedType()) { 11882 Size = CGF.getTypeSize(CVD->getType()); 11883 // Align the size: ((size + align - 1) / align) * align 11884 Size = CGF.Builder.CreateNUWAdd( 11885 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11886 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11887 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11888 } else { 11889 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11890 Size = CGM.getSize(Sz.alignTo(Align)); 11891 } 11892 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11893 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11894 assert(AA->getAllocator() && 11895 "Expected allocator expression for non-default allocator."); 11896 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11897 // According to the standard, the original allocator type is a enum 11898 // (integer). Convert to pointer type, if required. 11899 Allocator = CGF.EmitScalarConversion( 11900 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11901 AA->getAllocator()->getExprLoc()); 11902 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11903 11904 llvm::Value *Addr = 11905 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11906 CGM.getModule(), OMPRTL___kmpc_alloc), 11907 Args, getName({CVD->getName(), ".void.addr"})); 11908 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11909 CGM.getModule(), OMPRTL___kmpc_free); 11910 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11911 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11912 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11913 if (UntiedAddr.isValid()) 11914 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11915 11916 // Cleanup action for allocate support. 11917 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11918 llvm::FunctionCallee RTLFn; 11919 unsigned LocEncoding; 11920 Address Addr; 11921 const Expr *Allocator; 11922 11923 public: 11924 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11925 Address Addr, const Expr *Allocator) 11926 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11927 Allocator(Allocator) {} 11928 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11929 if (!CGF.HaveInsertPoint()) 11930 return; 11931 llvm::Value *Args[3]; 11932 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11933 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11934 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11935 Addr.getPointer(), CGF.VoidPtrTy); 11936 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11937 // According to the standard, the original allocator type is a enum 11938 // (integer). Convert to pointer type, if required. 11939 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11940 CGF.getContext().VoidPtrTy, 11941 Allocator->getExprLoc()); 11942 Args[2] = AllocVal; 11943 11944 CGF.EmitRuntimeCall(RTLFn, Args); 11945 } 11946 }; 11947 Address VDAddr = 11948 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11949 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11950 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11951 VDAddr, AA->getAllocator()); 11952 if (UntiedRealAddr.isValid()) 11953 if (auto *Region = 11954 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11955 Region->emitUntiedSwitch(CGF); 11956 return VDAddr; 11957 } 11958 return UntiedAddr; 11959 } 11960 11961 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11962 const VarDecl *VD) const { 11963 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11964 if (It == FunctionToUntiedTaskStackMap.end()) 11965 return false; 11966 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11967 } 11968 11969 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11970 CodeGenModule &CGM, const OMPLoopDirective &S) 11971 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11972 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11973 if (!NeedToPush) 11974 return; 11975 NontemporalDeclsSet &DS = 11976 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11977 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11978 for (const Stmt *Ref : C->private_refs()) { 11979 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11980 const ValueDecl *VD; 11981 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11982 VD = DRE->getDecl(); 11983 } else { 11984 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11985 assert((ME->isImplicitCXXThis() || 11986 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11987 "Expected member of current class."); 11988 VD = ME->getMemberDecl(); 11989 } 11990 DS.insert(VD); 11991 } 11992 } 11993 } 11994 11995 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11996 if (!NeedToPush) 11997 return; 11998 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11999 } 12000 12001 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12002 CodeGenFunction &CGF, 12003 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 12004 std::pair<Address, Address>> &LocalVars) 12005 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12006 if (!NeedToPush) 12007 return; 12008 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12009 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12010 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12011 } 12012 12013 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12014 if (!NeedToPush) 12015 return; 12016 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12017 } 12018 12019 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12020 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12021 12022 return llvm::any_of( 12023 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12024 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12025 } 12026 12027 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12028 const OMPExecutableDirective &S, 12029 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12030 const { 12031 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12032 // Vars in target/task regions must be excluded completely. 12033 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12034 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12035 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12036 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12037 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12038 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12039 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12040 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12041 } 12042 } 12043 // Exclude vars in private clauses. 12044 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12045 for (const Expr *Ref : C->varlists()) { 12046 if (!Ref->getType()->isScalarType()) 12047 continue; 12048 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12049 if (!DRE) 12050 continue; 12051 NeedToCheckForLPCs.insert(DRE->getDecl()); 12052 } 12053 } 12054 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12055 for (const Expr *Ref : C->varlists()) { 12056 if (!Ref->getType()->isScalarType()) 12057 continue; 12058 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12059 if (!DRE) 12060 continue; 12061 NeedToCheckForLPCs.insert(DRE->getDecl()); 12062 } 12063 } 12064 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12065 for (const Expr *Ref : C->varlists()) { 12066 if (!Ref->getType()->isScalarType()) 12067 continue; 12068 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12069 if (!DRE) 12070 continue; 12071 NeedToCheckForLPCs.insert(DRE->getDecl()); 12072 } 12073 } 12074 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12075 for (const Expr *Ref : C->varlists()) { 12076 if (!Ref->getType()->isScalarType()) 12077 continue; 12078 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12079 if (!DRE) 12080 continue; 12081 NeedToCheckForLPCs.insert(DRE->getDecl()); 12082 } 12083 } 12084 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12085 for (const Expr *Ref : C->varlists()) { 12086 if (!Ref->getType()->isScalarType()) 12087 continue; 12088 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12089 if (!DRE) 12090 continue; 12091 NeedToCheckForLPCs.insert(DRE->getDecl()); 12092 } 12093 } 12094 for (const Decl *VD : NeedToCheckForLPCs) { 12095 for (const LastprivateConditionalData &Data : 12096 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12097 if (Data.DeclToUniqueName.count(VD) > 0) { 12098 if (!Data.Disabled) 12099 NeedToAddForLPCsAsDisabled.insert(VD); 12100 break; 12101 } 12102 } 12103 } 12104 } 12105 12106 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12107 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12108 : CGM(CGF.CGM), 12109 Action((CGM.getLangOpts().OpenMP >= 50 && 12110 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12111 [](const OMPLastprivateClause *C) { 12112 return C->getKind() == 12113 OMPC_LASTPRIVATE_conditional; 12114 })) 12115 ? ActionToDo::PushAsLastprivateConditional 12116 : ActionToDo::DoNotPush) { 12117 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12118 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12119 return; 12120 assert(Action == ActionToDo::PushAsLastprivateConditional && 12121 "Expected a push action."); 12122 LastprivateConditionalData &Data = 12123 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12124 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12125 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12126 continue; 12127 12128 for (const Expr *Ref : C->varlists()) { 12129 Data.DeclToUniqueName.insert(std::make_pair( 12130 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12131 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12132 } 12133 } 12134 Data.IVLVal = IVLVal; 12135 Data.Fn = CGF.CurFn; 12136 } 12137 12138 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12139 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12140 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12141 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12142 if (CGM.getLangOpts().OpenMP < 50) 12143 return; 12144 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12145 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12146 if (!NeedToAddForLPCsAsDisabled.empty()) { 12147 Action = ActionToDo::DisableLastprivateConditional; 12148 LastprivateConditionalData &Data = 12149 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12150 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12151 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12152 Data.Fn = CGF.CurFn; 12153 Data.Disabled = true; 12154 } 12155 } 12156 12157 CGOpenMPRuntime::LastprivateConditionalRAII 12158 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12159 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12160 return LastprivateConditionalRAII(CGF, S); 12161 } 12162 12163 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12164 if (CGM.getLangOpts().OpenMP < 50) 12165 return; 12166 if (Action == ActionToDo::DisableLastprivateConditional) { 12167 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12168 "Expected list of disabled private vars."); 12169 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12170 } 12171 if (Action == ActionToDo::PushAsLastprivateConditional) { 12172 assert( 12173 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12174 "Expected list of lastprivate conditional vars."); 12175 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12176 } 12177 } 12178 12179 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12180 const VarDecl *VD) { 12181 ASTContext &C = CGM.getContext(); 12182 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12183 if (I == LastprivateConditionalToTypes.end()) 12184 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12185 QualType NewType; 12186 const FieldDecl *VDField; 12187 const FieldDecl *FiredField; 12188 LValue BaseLVal; 12189 auto VI = I->getSecond().find(VD); 12190 if (VI == I->getSecond().end()) { 12191 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12192 RD->startDefinition(); 12193 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12194 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12195 RD->completeDefinition(); 12196 NewType = C.getRecordType(RD); 12197 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12198 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12199 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12200 } else { 12201 NewType = std::get<0>(VI->getSecond()); 12202 VDField = std::get<1>(VI->getSecond()); 12203 FiredField = std::get<2>(VI->getSecond()); 12204 BaseLVal = std::get<3>(VI->getSecond()); 12205 } 12206 LValue FiredLVal = 12207 CGF.EmitLValueForField(BaseLVal, FiredField); 12208 CGF.EmitStoreOfScalar( 12209 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12210 FiredLVal); 12211 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12212 } 12213 12214 namespace { 12215 /// Checks if the lastprivate conditional variable is referenced in LHS. 12216 class LastprivateConditionalRefChecker final 12217 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12218 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12219 const Expr *FoundE = nullptr; 12220 const Decl *FoundD = nullptr; 12221 StringRef UniqueDeclName; 12222 LValue IVLVal; 12223 llvm::Function *FoundFn = nullptr; 12224 SourceLocation Loc; 12225 12226 public: 12227 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12228 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12229 llvm::reverse(LPM)) { 12230 auto It = D.DeclToUniqueName.find(E->getDecl()); 12231 if (It == D.DeclToUniqueName.end()) 12232 continue; 12233 if (D.Disabled) 12234 return false; 12235 FoundE = E; 12236 FoundD = E->getDecl()->getCanonicalDecl(); 12237 UniqueDeclName = It->second; 12238 IVLVal = D.IVLVal; 12239 FoundFn = D.Fn; 12240 break; 12241 } 12242 return FoundE == E; 12243 } 12244 bool VisitMemberExpr(const MemberExpr *E) { 12245 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12246 return false; 12247 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12248 llvm::reverse(LPM)) { 12249 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12250 if (It == D.DeclToUniqueName.end()) 12251 continue; 12252 if (D.Disabled) 12253 return false; 12254 FoundE = E; 12255 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12256 UniqueDeclName = It->second; 12257 IVLVal = D.IVLVal; 12258 FoundFn = D.Fn; 12259 break; 12260 } 12261 return FoundE == E; 12262 } 12263 bool VisitStmt(const Stmt *S) { 12264 for (const Stmt *Child : S->children()) { 12265 if (!Child) 12266 continue; 12267 if (const auto *E = dyn_cast<Expr>(Child)) 12268 if (!E->isGLValue()) 12269 continue; 12270 if (Visit(Child)) 12271 return true; 12272 } 12273 return false; 12274 } 12275 explicit LastprivateConditionalRefChecker( 12276 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12277 : LPM(LPM) {} 12278 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12279 getFoundData() const { 12280 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12281 } 12282 }; 12283 } // namespace 12284 12285 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12286 LValue IVLVal, 12287 StringRef UniqueDeclName, 12288 LValue LVal, 12289 SourceLocation Loc) { 12290 // Last updated loop counter for the lastprivate conditional var. 12291 // int<xx> last_iv = 0; 12292 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12293 llvm::Constant *LastIV = 12294 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12295 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12296 IVLVal.getAlignment().getAsAlign()); 12297 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12298 12299 // Last value of the lastprivate conditional. 12300 // decltype(priv_a) last_a; 12301 llvm::Constant *Last = getOrCreateInternalVariable( 12302 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12303 cast<llvm::GlobalVariable>(Last)->setAlignment( 12304 LVal.getAlignment().getAsAlign()); 12305 LValue LastLVal = 12306 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12307 12308 // Global loop counter. Required to handle inner parallel-for regions. 12309 // iv 12310 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12311 12312 // #pragma omp critical(a) 12313 // if (last_iv <= iv) { 12314 // last_iv = iv; 12315 // last_a = priv_a; 12316 // } 12317 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12318 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12319 Action.Enter(CGF); 12320 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12321 // (last_iv <= iv) ? Check if the variable is updated and store new 12322 // value in global var. 12323 llvm::Value *CmpRes; 12324 if (IVLVal.getType()->isSignedIntegerType()) { 12325 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12326 } else { 12327 assert(IVLVal.getType()->isUnsignedIntegerType() && 12328 "Loop iteration variable must be integer."); 12329 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12330 } 12331 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12332 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12333 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12334 // { 12335 CGF.EmitBlock(ThenBB); 12336 12337 // last_iv = iv; 12338 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12339 12340 // last_a = priv_a; 12341 switch (CGF.getEvaluationKind(LVal.getType())) { 12342 case TEK_Scalar: { 12343 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12344 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12345 break; 12346 } 12347 case TEK_Complex: { 12348 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12349 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12350 break; 12351 } 12352 case TEK_Aggregate: 12353 llvm_unreachable( 12354 "Aggregates are not supported in lastprivate conditional."); 12355 } 12356 // } 12357 CGF.EmitBranch(ExitBB); 12358 // There is no need to emit line number for unconditional branch. 12359 (void)ApplyDebugLocation::CreateEmpty(CGF); 12360 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12361 }; 12362 12363 if (CGM.getLangOpts().OpenMPSimd) { 12364 // Do not emit as a critical region as no parallel region could be emitted. 12365 RegionCodeGenTy ThenRCG(CodeGen); 12366 ThenRCG(CGF); 12367 } else { 12368 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12369 } 12370 } 12371 12372 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12373 const Expr *LHS) { 12374 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12375 return; 12376 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12377 if (!Checker.Visit(LHS)) 12378 return; 12379 const Expr *FoundE; 12380 const Decl *FoundD; 12381 StringRef UniqueDeclName; 12382 LValue IVLVal; 12383 llvm::Function *FoundFn; 12384 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12385 Checker.getFoundData(); 12386 if (FoundFn != CGF.CurFn) { 12387 // Special codegen for inner parallel regions. 12388 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12389 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12390 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12391 "Lastprivate conditional is not found in outer region."); 12392 QualType StructTy = std::get<0>(It->getSecond()); 12393 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12394 LValue PrivLVal = CGF.EmitLValue(FoundE); 12395 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12396 PrivLVal.getAddress(CGF), 12397 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12398 LValue BaseLVal = 12399 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12400 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12401 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12402 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12403 FiredLVal, llvm::AtomicOrdering::Unordered, 12404 /*IsVolatile=*/true, /*isInit=*/false); 12405 return; 12406 } 12407 12408 // Private address of the lastprivate conditional in the current context. 12409 // priv_a 12410 LValue LVal = CGF.EmitLValue(FoundE); 12411 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12412 FoundE->getExprLoc()); 12413 } 12414 12415 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12416 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12417 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12418 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12419 return; 12420 auto Range = llvm::reverse(LastprivateConditionalStack); 12421 auto It = llvm::find_if( 12422 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12423 if (It == Range.end() || It->Fn != CGF.CurFn) 12424 return; 12425 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12426 assert(LPCI != LastprivateConditionalToTypes.end() && 12427 "Lastprivates must be registered already."); 12428 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12429 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12430 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12431 for (const auto &Pair : It->DeclToUniqueName) { 12432 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12433 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12434 continue; 12435 auto I = LPCI->getSecond().find(Pair.first); 12436 assert(I != LPCI->getSecond().end() && 12437 "Lastprivate must be rehistered already."); 12438 // bool Cmp = priv_a.Fired != 0; 12439 LValue BaseLVal = std::get<3>(I->getSecond()); 12440 LValue FiredLVal = 12441 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12442 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12443 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12444 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12445 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12446 // if (Cmp) { 12447 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12448 CGF.EmitBlock(ThenBB); 12449 Address Addr = CGF.GetAddrOfLocalVar(VD); 12450 LValue LVal; 12451 if (VD->getType()->isReferenceType()) 12452 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12453 AlignmentSource::Decl); 12454 else 12455 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12456 AlignmentSource::Decl); 12457 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12458 D.getBeginLoc()); 12459 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12460 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12461 // } 12462 } 12463 } 12464 12465 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12466 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12467 SourceLocation Loc) { 12468 if (CGF.getLangOpts().OpenMP < 50) 12469 return; 12470 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12471 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12472 "Unknown lastprivate conditional variable."); 12473 StringRef UniqueName = It->second; 12474 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12475 // The variable was not updated in the region - exit. 12476 if (!GV) 12477 return; 12478 LValue LPLVal = CGF.MakeAddrLValue( 12479 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12480 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12481 CGF.EmitStoreOfScalar(Res, PrivLVal); 12482 } 12483 12484 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12485 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12486 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12487 llvm_unreachable("Not supported in SIMD-only mode"); 12488 } 12489 12490 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12491 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12492 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12493 llvm_unreachable("Not supported in SIMD-only mode"); 12494 } 12495 12496 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12497 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12498 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12499 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12500 bool Tied, unsigned &NumberOfParts) { 12501 llvm_unreachable("Not supported in SIMD-only mode"); 12502 } 12503 12504 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12505 SourceLocation Loc, 12506 llvm::Function *OutlinedFn, 12507 ArrayRef<llvm::Value *> CapturedVars, 12508 const Expr *IfCond) { 12509 llvm_unreachable("Not supported in SIMD-only mode"); 12510 } 12511 12512 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12513 CodeGenFunction &CGF, StringRef CriticalName, 12514 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12515 const Expr *Hint) { 12516 llvm_unreachable("Not supported in SIMD-only mode"); 12517 } 12518 12519 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12520 const RegionCodeGenTy &MasterOpGen, 12521 SourceLocation Loc) { 12522 llvm_unreachable("Not supported in SIMD-only mode"); 12523 } 12524 12525 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12526 SourceLocation Loc) { 12527 llvm_unreachable("Not supported in SIMD-only mode"); 12528 } 12529 12530 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12531 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12532 SourceLocation Loc) { 12533 llvm_unreachable("Not supported in SIMD-only mode"); 12534 } 12535 12536 void CGOpenMPSIMDRuntime::emitSingleRegion( 12537 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12538 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12539 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12540 ArrayRef<const Expr *> AssignmentOps) { 12541 llvm_unreachable("Not supported in SIMD-only mode"); 12542 } 12543 12544 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12545 const RegionCodeGenTy &OrderedOpGen, 12546 SourceLocation Loc, 12547 bool IsThreads) { 12548 llvm_unreachable("Not supported in SIMD-only mode"); 12549 } 12550 12551 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12552 SourceLocation Loc, 12553 OpenMPDirectiveKind Kind, 12554 bool EmitChecks, 12555 bool ForceSimpleCall) { 12556 llvm_unreachable("Not supported in SIMD-only mode"); 12557 } 12558 12559 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12560 CodeGenFunction &CGF, SourceLocation Loc, 12561 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12562 bool Ordered, const DispatchRTInput &DispatchValues) { 12563 llvm_unreachable("Not supported in SIMD-only mode"); 12564 } 12565 12566 void CGOpenMPSIMDRuntime::emitForStaticInit( 12567 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12568 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12569 llvm_unreachable("Not supported in SIMD-only mode"); 12570 } 12571 12572 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12573 CodeGenFunction &CGF, SourceLocation Loc, 12574 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12575 llvm_unreachable("Not supported in SIMD-only mode"); 12576 } 12577 12578 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12579 SourceLocation Loc, 12580 unsigned IVSize, 12581 bool IVSigned) { 12582 llvm_unreachable("Not supported in SIMD-only mode"); 12583 } 12584 12585 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12586 SourceLocation Loc, 12587 OpenMPDirectiveKind DKind) { 12588 llvm_unreachable("Not supported in SIMD-only mode"); 12589 } 12590 12591 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12592 SourceLocation Loc, 12593 unsigned IVSize, bool IVSigned, 12594 Address IL, Address LB, 12595 Address UB, Address ST) { 12596 llvm_unreachable("Not supported in SIMD-only mode"); 12597 } 12598 12599 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12600 llvm::Value *NumThreads, 12601 SourceLocation Loc) { 12602 llvm_unreachable("Not supported in SIMD-only mode"); 12603 } 12604 12605 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12606 ProcBindKind ProcBind, 12607 SourceLocation Loc) { 12608 llvm_unreachable("Not supported in SIMD-only mode"); 12609 } 12610 12611 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12612 const VarDecl *VD, 12613 Address VDAddr, 12614 SourceLocation Loc) { 12615 llvm_unreachable("Not supported in SIMD-only mode"); 12616 } 12617 12618 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12619 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12620 CodeGenFunction *CGF) { 12621 llvm_unreachable("Not supported in SIMD-only mode"); 12622 } 12623 12624 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12625 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12626 llvm_unreachable("Not supported in SIMD-only mode"); 12627 } 12628 12629 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12630 ArrayRef<const Expr *> Vars, 12631 SourceLocation Loc, 12632 llvm::AtomicOrdering AO) { 12633 llvm_unreachable("Not supported in SIMD-only mode"); 12634 } 12635 12636 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12637 const OMPExecutableDirective &D, 12638 llvm::Function *TaskFunction, 12639 QualType SharedsTy, Address Shareds, 12640 const Expr *IfCond, 12641 const OMPTaskDataTy &Data) { 12642 llvm_unreachable("Not supported in SIMD-only mode"); 12643 } 12644 12645 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12646 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12647 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12648 const Expr *IfCond, const OMPTaskDataTy &Data) { 12649 llvm_unreachable("Not supported in SIMD-only mode"); 12650 } 12651 12652 void CGOpenMPSIMDRuntime::emitReduction( 12653 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12654 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12655 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12656 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12657 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12658 ReductionOps, Options); 12659 } 12660 12661 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12662 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12663 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12664 llvm_unreachable("Not supported in SIMD-only mode"); 12665 } 12666 12667 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12668 SourceLocation Loc, 12669 bool IsWorksharingReduction) { 12670 llvm_unreachable("Not supported in SIMD-only mode"); 12671 } 12672 12673 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12674 SourceLocation Loc, 12675 ReductionCodeGen &RCG, 12676 unsigned N) { 12677 llvm_unreachable("Not supported in SIMD-only mode"); 12678 } 12679 12680 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12681 SourceLocation Loc, 12682 llvm::Value *ReductionsPtr, 12683 LValue SharedLVal) { 12684 llvm_unreachable("Not supported in SIMD-only mode"); 12685 } 12686 12687 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12688 SourceLocation Loc) { 12689 llvm_unreachable("Not supported in SIMD-only mode"); 12690 } 12691 12692 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12693 CodeGenFunction &CGF, SourceLocation Loc, 12694 OpenMPDirectiveKind CancelRegion) { 12695 llvm_unreachable("Not supported in SIMD-only mode"); 12696 } 12697 12698 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12699 SourceLocation Loc, const Expr *IfCond, 12700 OpenMPDirectiveKind CancelRegion) { 12701 llvm_unreachable("Not supported in SIMD-only mode"); 12702 } 12703 12704 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12705 const OMPExecutableDirective &D, StringRef ParentName, 12706 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12707 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12708 llvm_unreachable("Not supported in SIMD-only mode"); 12709 } 12710 12711 void CGOpenMPSIMDRuntime::emitTargetCall( 12712 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12713 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12714 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12715 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12716 const OMPLoopDirective &D)> 12717 SizeEmitter) { 12718 llvm_unreachable("Not supported in SIMD-only mode"); 12719 } 12720 12721 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12722 llvm_unreachable("Not supported in SIMD-only mode"); 12723 } 12724 12725 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12726 llvm_unreachable("Not supported in SIMD-only mode"); 12727 } 12728 12729 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12730 return false; 12731 } 12732 12733 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12734 const OMPExecutableDirective &D, 12735 SourceLocation Loc, 12736 llvm::Function *OutlinedFn, 12737 ArrayRef<llvm::Value *> CapturedVars) { 12738 llvm_unreachable("Not supported in SIMD-only mode"); 12739 } 12740 12741 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12742 const Expr *NumTeams, 12743 const Expr *ThreadLimit, 12744 SourceLocation Loc) { 12745 llvm_unreachable("Not supported in SIMD-only mode"); 12746 } 12747 12748 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12749 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12750 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12751 llvm_unreachable("Not supported in SIMD-only mode"); 12752 } 12753 12754 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12755 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12756 const Expr *Device) { 12757 llvm_unreachable("Not supported in SIMD-only mode"); 12758 } 12759 12760 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12761 const OMPLoopDirective &D, 12762 ArrayRef<Expr *> NumIterations) { 12763 llvm_unreachable("Not supported in SIMD-only mode"); 12764 } 12765 12766 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12767 const OMPDependClause *C) { 12768 llvm_unreachable("Not supported in SIMD-only mode"); 12769 } 12770 12771 const VarDecl * 12772 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12773 const VarDecl *NativeParam) const { 12774 llvm_unreachable("Not supported in SIMD-only mode"); 12775 } 12776 12777 Address 12778 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12779 const VarDecl *NativeParam, 12780 const VarDecl *TargetParam) const { 12781 llvm_unreachable("Not supported in SIMD-only mode"); 12782 } 12783