1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1530 } 1531 1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1533 return OMPBuilder.IdentPtr; 1534 } 1535 1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1537 if (!Kmpc_MicroTy) { 1538 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1539 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1540 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1541 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1542 } 1543 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1544 } 1545 1546 llvm::FunctionCallee 1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1548 assert((IVSize == 32 || IVSize == 64) && 1549 "IV size is not compatible with the omp runtime"); 1550 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1551 : "__kmpc_for_static_init_4u") 1552 : (IVSigned ? "__kmpc_for_static_init_8" 1553 : "__kmpc_for_static_init_8u"); 1554 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1555 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1556 llvm::Type *TypeParams[] = { 1557 getIdentTyPointerTy(), // loc 1558 CGM.Int32Ty, // tid 1559 CGM.Int32Ty, // schedtype 1560 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1561 PtrTy, // p_lower 1562 PtrTy, // p_upper 1563 PtrTy, // p_stride 1564 ITy, // incr 1565 ITy // chunk 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1579 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1582 CGM.Int32Ty, // tid 1583 CGM.Int32Ty, // schedtype 1584 ITy, // lower 1585 ITy, // upper 1586 ITy, // stride 1587 ITy // chunk 1588 }; 1589 auto *FnTy = 1590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1591 return CGM.CreateRuntimeFunction(FnTy, Name); 1592 } 1593 1594 llvm::FunctionCallee 1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1596 assert((IVSize == 32 || IVSize == 64) && 1597 "IV size is not compatible with the omp runtime"); 1598 StringRef Name = 1599 IVSize == 32 1600 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1601 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1602 llvm::Type *TypeParams[] = { 1603 getIdentTyPointerTy(), // loc 1604 CGM.Int32Ty, // tid 1605 }; 1606 auto *FnTy = 1607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1608 return CGM.CreateRuntimeFunction(FnTy, Name); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = 1616 IVSize == 32 1617 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1618 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1621 llvm::Type *TypeParams[] = { 1622 getIdentTyPointerTy(), // loc 1623 CGM.Int32Ty, // tid 1624 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1625 PtrTy, // p_lower 1626 PtrTy, // p_upper 1627 PtrTy // p_stride 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 /// Obtain information that uniquely identifies a target entry. This 1635 /// consists of the file and device IDs as well as line number associated with 1636 /// the relevant entry source location. 1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1638 unsigned &DeviceID, unsigned &FileID, 1639 unsigned &LineNum) { 1640 SourceManager &SM = C.getSourceManager(); 1641 1642 // The loc should be always valid and have a file ID (the user cannot use 1643 // #pragma directives in macros) 1644 1645 assert(Loc.isValid() && "Source location is expected to be always valid."); 1646 1647 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1648 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1649 1650 llvm::sys::fs::UniqueID ID; 1651 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1652 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1653 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1654 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1655 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1656 << PLoc.getFilename() << EC.message(); 1657 } 1658 1659 DeviceID = ID.getDevice(); 1660 FileID = ID.getFile(); 1661 LineNum = PLoc.getLine(); 1662 } 1663 1664 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1665 if (CGM.getLangOpts().OpenMPSimd) 1666 return Address::invalid(); 1667 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1668 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1669 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1670 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1671 HasRequiresUnifiedSharedMemory))) { 1672 SmallString<64> PtrName; 1673 { 1674 llvm::raw_svector_ostream OS(PtrName); 1675 OS << CGM.getMangledName(GlobalDecl(VD)); 1676 if (!VD->isExternallyVisible()) { 1677 unsigned DeviceID, FileID, Line; 1678 getTargetEntryUniqueInfo(CGM.getContext(), 1679 VD->getCanonicalDecl()->getBeginLoc(), 1680 DeviceID, FileID, Line); 1681 OS << llvm::format("_%x", FileID); 1682 } 1683 OS << "_decl_tgt_ref_ptr"; 1684 } 1685 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1686 if (!Ptr) { 1687 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1688 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1689 PtrName); 1690 1691 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1692 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1693 1694 if (!CGM.getLangOpts().OpenMPIsDevice) 1695 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1696 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1697 } 1698 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1699 } 1700 return Address::invalid(); 1701 } 1702 1703 llvm::Constant * 1704 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1705 assert(!CGM.getLangOpts().OpenMPUseTLS || 1706 !CGM.getContext().getTargetInfo().isTLSSupported()); 1707 // Lookup the entry, lazily creating it if necessary. 1708 std::string Suffix = getName({"cache", ""}); 1709 return getOrCreateInternalVariable( 1710 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1711 } 1712 1713 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1714 const VarDecl *VD, 1715 Address VDAddr, 1716 SourceLocation Loc) { 1717 if (CGM.getLangOpts().OpenMPUseTLS && 1718 CGM.getContext().getTargetInfo().isTLSSupported()) 1719 return VDAddr; 1720 1721 llvm::Type *VarTy = VDAddr.getElementType(); 1722 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1723 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1724 CGM.Int8PtrTy), 1725 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1726 getOrCreateThreadPrivateCache(VD)}; 1727 return Address(CGF.EmitRuntimeCall( 1728 OMPBuilder.getOrCreateRuntimeFunction( 1729 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1730 Args), 1731 VDAddr.getAlignment()); 1732 } 1733 1734 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1735 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1736 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1737 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1738 // library. 1739 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1740 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1741 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1742 OMPLoc); 1743 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1744 // to register constructor/destructor for variable. 1745 llvm::Value *Args[] = { 1746 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1747 Ctor, CopyCtor, Dtor}; 1748 CGF.EmitRuntimeCall( 1749 OMPBuilder.getOrCreateRuntimeFunction( 1750 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1751 Args); 1752 } 1753 1754 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1755 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1756 bool PerformInit, CodeGenFunction *CGF) { 1757 if (CGM.getLangOpts().OpenMPUseTLS && 1758 CGM.getContext().getTargetInfo().isTLSSupported()) 1759 return nullptr; 1760 1761 VD = VD->getDefinition(CGM.getContext()); 1762 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1763 QualType ASTTy = VD->getType(); 1764 1765 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1766 const Expr *Init = VD->getAnyInitializer(); 1767 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1768 // Generate function that re-emits the declaration's initializer into the 1769 // threadprivate copy of the variable VD 1770 CodeGenFunction CtorCGF(CGM); 1771 FunctionArgList Args; 1772 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1773 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1774 ImplicitParamDecl::Other); 1775 Args.push_back(&Dst); 1776 1777 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1778 CGM.getContext().VoidPtrTy, Args); 1779 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1780 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1781 llvm::Function *Fn = 1782 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1783 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1784 Args, Loc, Loc); 1785 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1786 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1787 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1788 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1789 Arg = CtorCGF.Builder.CreateElementBitCast( 1790 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1791 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1792 /*IsInitializer=*/true); 1793 ArgVal = CtorCGF.EmitLoadOfScalar( 1794 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1795 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1796 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1797 CtorCGF.FinishFunction(); 1798 Ctor = Fn; 1799 } 1800 if (VD->getType().isDestructedType() != QualType::DK_none) { 1801 // Generate function that emits destructor call for the threadprivate copy 1802 // of the variable VD 1803 CodeGenFunction DtorCGF(CGM); 1804 FunctionArgList Args; 1805 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1806 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1807 ImplicitParamDecl::Other); 1808 Args.push_back(&Dst); 1809 1810 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1811 CGM.getContext().VoidTy, Args); 1812 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1813 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1814 llvm::Function *Fn = 1815 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1816 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1817 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1818 Loc, Loc); 1819 // Create a scope with an artificial location for the body of this function. 1820 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1821 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1822 DtorCGF.GetAddrOfLocalVar(&Dst), 1823 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1824 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1825 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1826 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1827 DtorCGF.FinishFunction(); 1828 Dtor = Fn; 1829 } 1830 // Do not emit init function if it is not required. 1831 if (!Ctor && !Dtor) 1832 return nullptr; 1833 1834 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1835 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1836 /*isVarArg=*/false) 1837 ->getPointerTo(); 1838 // Copying constructor for the threadprivate variable. 1839 // Must be NULL - reserved by runtime, but currently it requires that this 1840 // parameter is always NULL. Otherwise it fires assertion. 1841 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1842 if (Ctor == nullptr) { 1843 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1844 /*isVarArg=*/false) 1845 ->getPointerTo(); 1846 Ctor = llvm::Constant::getNullValue(CtorTy); 1847 } 1848 if (Dtor == nullptr) { 1849 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1850 /*isVarArg=*/false) 1851 ->getPointerTo(); 1852 Dtor = llvm::Constant::getNullValue(DtorTy); 1853 } 1854 if (!CGF) { 1855 auto *InitFunctionTy = 1856 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1857 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1858 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1859 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1860 CodeGenFunction InitCGF(CGM); 1861 FunctionArgList ArgList; 1862 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1863 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1864 Loc, Loc); 1865 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1866 InitCGF.FinishFunction(); 1867 return InitFunction; 1868 } 1869 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1870 } 1871 return nullptr; 1872 } 1873 1874 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1875 llvm::GlobalVariable *Addr, 1876 bool PerformInit) { 1877 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1878 !CGM.getLangOpts().OpenMPIsDevice) 1879 return false; 1880 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1881 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1882 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1883 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1884 HasRequiresUnifiedSharedMemory)) 1885 return CGM.getLangOpts().OpenMPIsDevice; 1886 VD = VD->getDefinition(CGM.getContext()); 1887 assert(VD && "Unknown VarDecl"); 1888 1889 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1890 return CGM.getLangOpts().OpenMPIsDevice; 1891 1892 QualType ASTTy = VD->getType(); 1893 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1894 1895 // Produce the unique prefix to identify the new target regions. We use 1896 // the source location of the variable declaration which we know to not 1897 // conflict with any target region. 1898 unsigned DeviceID; 1899 unsigned FileID; 1900 unsigned Line; 1901 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1902 SmallString<128> Buffer, Out; 1903 { 1904 llvm::raw_svector_ostream OS(Buffer); 1905 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1906 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1907 } 1908 1909 const Expr *Init = VD->getAnyInitializer(); 1910 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1911 llvm::Constant *Ctor; 1912 llvm::Constant *ID; 1913 if (CGM.getLangOpts().OpenMPIsDevice) { 1914 // Generate function that re-emits the declaration's initializer into 1915 // the threadprivate copy of the variable VD 1916 CodeGenFunction CtorCGF(CGM); 1917 1918 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1919 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1920 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1921 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1922 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1923 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1924 FunctionArgList(), Loc, Loc); 1925 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1926 CtorCGF.EmitAnyExprToMem(Init, 1927 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1928 Init->getType().getQualifiers(), 1929 /*IsInitializer=*/true); 1930 CtorCGF.FinishFunction(); 1931 Ctor = Fn; 1932 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1933 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1934 } else { 1935 Ctor = new llvm::GlobalVariable( 1936 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1937 llvm::GlobalValue::PrivateLinkage, 1938 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1939 ID = Ctor; 1940 } 1941 1942 // Register the information for the entry associated with the constructor. 1943 Out.clear(); 1944 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1945 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1946 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1947 } 1948 if (VD->getType().isDestructedType() != QualType::DK_none) { 1949 llvm::Constant *Dtor; 1950 llvm::Constant *ID; 1951 if (CGM.getLangOpts().OpenMPIsDevice) { 1952 // Generate function that emits destructor call for the threadprivate 1953 // copy of the variable VD 1954 CodeGenFunction DtorCGF(CGM); 1955 1956 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1957 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1958 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1959 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1960 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1961 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1962 FunctionArgList(), Loc, Loc); 1963 // Create a scope with an artificial location for the body of this 1964 // function. 1965 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1966 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1967 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1968 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1969 DtorCGF.FinishFunction(); 1970 Dtor = Fn; 1971 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1972 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1973 } else { 1974 Dtor = new llvm::GlobalVariable( 1975 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1976 llvm::GlobalValue::PrivateLinkage, 1977 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1978 ID = Dtor; 1979 } 1980 // Register the information for the entry associated with the destructor. 1981 Out.clear(); 1982 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1983 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1984 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1985 } 1986 return CGM.getLangOpts().OpenMPIsDevice; 1987 } 1988 1989 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1990 QualType VarType, 1991 StringRef Name) { 1992 std::string Suffix = getName({"artificial", ""}); 1993 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1994 llvm::Value *GAddr = 1995 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1996 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1997 CGM.getTarget().isTLSSupported()) { 1998 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1999 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2000 } 2001 std::string CacheSuffix = getName({"cache", ""}); 2002 llvm::Value *Args[] = { 2003 emitUpdateLocation(CGF, SourceLocation()), 2004 getThreadID(CGF, SourceLocation()), 2005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2006 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2007 /*isSigned=*/false), 2008 getOrCreateInternalVariable( 2009 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2010 return Address( 2011 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2012 CGF.EmitRuntimeCall( 2013 OMPBuilder.getOrCreateRuntimeFunction( 2014 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2015 Args), 2016 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2017 CGM.getContext().getTypeAlignInChars(VarType)); 2018 } 2019 2020 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2021 const RegionCodeGenTy &ThenGen, 2022 const RegionCodeGenTy &ElseGen) { 2023 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2024 2025 // If the condition constant folds and can be elided, try to avoid emitting 2026 // the condition and the dead arm of the if/else. 2027 bool CondConstant; 2028 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2029 if (CondConstant) 2030 ThenGen(CGF); 2031 else 2032 ElseGen(CGF); 2033 return; 2034 } 2035 2036 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2037 // emit the conditional branch. 2038 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2039 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2040 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2041 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2042 2043 // Emit the 'then' code. 2044 CGF.EmitBlock(ThenBlock); 2045 ThenGen(CGF); 2046 CGF.EmitBranch(ContBlock); 2047 // Emit the 'else' code if present. 2048 // There is no need to emit line number for unconditional branch. 2049 (void)ApplyDebugLocation::CreateEmpty(CGF); 2050 CGF.EmitBlock(ElseBlock); 2051 ElseGen(CGF); 2052 // There is no need to emit line number for unconditional branch. 2053 (void)ApplyDebugLocation::CreateEmpty(CGF); 2054 CGF.EmitBranch(ContBlock); 2055 // Emit the continuation block for code after the if. 2056 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2057 } 2058 2059 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2060 llvm::Function *OutlinedFn, 2061 ArrayRef<llvm::Value *> CapturedVars, 2062 const Expr *IfCond) { 2063 if (!CGF.HaveInsertPoint()) 2064 return; 2065 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2066 auto &M = CGM.getModule(); 2067 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2068 this](CodeGenFunction &CGF, PrePostActionTy &) { 2069 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2070 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2071 llvm::Value *Args[] = { 2072 RTLoc, 2073 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2074 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2075 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2076 RealArgs.append(std::begin(Args), std::end(Args)); 2077 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2078 2079 llvm::FunctionCallee RTLFn = 2080 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2081 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2082 }; 2083 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2084 this](CodeGenFunction &CGF, PrePostActionTy &) { 2085 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2086 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2087 // Build calls: 2088 // __kmpc_serialized_parallel(&Loc, GTid); 2089 llvm::Value *Args[] = {RTLoc, ThreadID}; 2090 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2091 M, OMPRTL___kmpc_serialized_parallel), 2092 Args); 2093 2094 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2095 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2096 Address ZeroAddrBound = 2097 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2098 /*Name=*/".bound.zero.addr"); 2099 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2100 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2101 // ThreadId for serialized parallels is 0. 2102 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2103 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2104 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2105 2106 // Ensure we do not inline the function. This is trivially true for the ones 2107 // passed to __kmpc_fork_call but the ones calles in serialized regions 2108 // could be inlined. This is not a perfect but it is closer to the invariant 2109 // we want, namely, every data environment starts with a new function. 2110 // TODO: We should pass the if condition to the runtime function and do the 2111 // handling there. Much cleaner code. 2112 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2113 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2114 2115 // __kmpc_end_serialized_parallel(&Loc, GTid); 2116 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2117 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2118 M, OMPRTL___kmpc_end_serialized_parallel), 2119 EndArgs); 2120 }; 2121 if (IfCond) { 2122 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2123 } else { 2124 RegionCodeGenTy ThenRCG(ThenGen); 2125 ThenRCG(CGF); 2126 } 2127 } 2128 2129 // If we're inside an (outlined) parallel region, use the region info's 2130 // thread-ID variable (it is passed in a first argument of the outlined function 2131 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2132 // regular serial code region, get thread ID by calling kmp_int32 2133 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2134 // return the address of that temp. 2135 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2136 SourceLocation Loc) { 2137 if (auto *OMPRegionInfo = 2138 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2139 if (OMPRegionInfo->getThreadIDVariable()) 2140 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2141 2142 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2143 QualType Int32Ty = 2144 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2145 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2146 CGF.EmitStoreOfScalar(ThreadID, 2147 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2148 2149 return ThreadIDTemp; 2150 } 2151 2152 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2153 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2154 SmallString<256> Buffer; 2155 llvm::raw_svector_ostream Out(Buffer); 2156 Out << Name; 2157 StringRef RuntimeName = Out.str(); 2158 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2159 if (Elem.second) { 2160 assert(Elem.second->getType()->getPointerElementType() == Ty && 2161 "OMP internal variable has different type than requested"); 2162 return &*Elem.second; 2163 } 2164 2165 return Elem.second = new llvm::GlobalVariable( 2166 CGM.getModule(), Ty, /*IsConstant*/ false, 2167 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2168 Elem.first(), /*InsertBefore=*/nullptr, 2169 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2170 } 2171 2172 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2173 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2174 std::string Name = getName({Prefix, "var"}); 2175 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2176 } 2177 2178 namespace { 2179 /// Common pre(post)-action for different OpenMP constructs. 2180 class CommonActionTy final : public PrePostActionTy { 2181 llvm::FunctionCallee EnterCallee; 2182 ArrayRef<llvm::Value *> EnterArgs; 2183 llvm::FunctionCallee ExitCallee; 2184 ArrayRef<llvm::Value *> ExitArgs; 2185 bool Conditional; 2186 llvm::BasicBlock *ContBlock = nullptr; 2187 2188 public: 2189 CommonActionTy(llvm::FunctionCallee EnterCallee, 2190 ArrayRef<llvm::Value *> EnterArgs, 2191 llvm::FunctionCallee ExitCallee, 2192 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2193 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2194 ExitArgs(ExitArgs), Conditional(Conditional) {} 2195 void Enter(CodeGenFunction &CGF) override { 2196 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2197 if (Conditional) { 2198 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2199 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2200 ContBlock = CGF.createBasicBlock("omp_if.end"); 2201 // Generate the branch (If-stmt) 2202 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2203 CGF.EmitBlock(ThenBlock); 2204 } 2205 } 2206 void Done(CodeGenFunction &CGF) { 2207 // Emit the rest of blocks/branches 2208 CGF.EmitBranch(ContBlock); 2209 CGF.EmitBlock(ContBlock, true); 2210 } 2211 void Exit(CodeGenFunction &CGF) override { 2212 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2213 } 2214 }; 2215 } // anonymous namespace 2216 2217 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2218 StringRef CriticalName, 2219 const RegionCodeGenTy &CriticalOpGen, 2220 SourceLocation Loc, const Expr *Hint) { 2221 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2222 // CriticalOpGen(); 2223 // __kmpc_end_critical(ident_t *, gtid, Lock); 2224 // Prepare arguments and build a call to __kmpc_critical 2225 if (!CGF.HaveInsertPoint()) 2226 return; 2227 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2228 getCriticalRegionLock(CriticalName)}; 2229 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2230 std::end(Args)); 2231 if (Hint) { 2232 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2233 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2234 } 2235 CommonActionTy Action( 2236 OMPBuilder.getOrCreateRuntimeFunction( 2237 CGM.getModule(), 2238 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2239 EnterArgs, 2240 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2241 OMPRTL___kmpc_end_critical), 2242 Args); 2243 CriticalOpGen.setAction(Action); 2244 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2245 } 2246 2247 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2248 const RegionCodeGenTy &MasterOpGen, 2249 SourceLocation Loc) { 2250 if (!CGF.HaveInsertPoint()) 2251 return; 2252 // if(__kmpc_master(ident_t *, gtid)) { 2253 // MasterOpGen(); 2254 // __kmpc_end_master(ident_t *, gtid); 2255 // } 2256 // Prepare arguments and build a call to __kmpc_master 2257 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2258 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2259 CGM.getModule(), OMPRTL___kmpc_master), 2260 Args, 2261 OMPBuilder.getOrCreateRuntimeFunction( 2262 CGM.getModule(), OMPRTL___kmpc_end_master), 2263 Args, 2264 /*Conditional=*/true); 2265 MasterOpGen.setAction(Action); 2266 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2267 Action.Done(CGF); 2268 } 2269 2270 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2271 SourceLocation Loc) { 2272 if (!CGF.HaveInsertPoint()) 2273 return; 2274 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2275 OMPBuilder.createTaskyield(CGF.Builder); 2276 } else { 2277 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2278 llvm::Value *Args[] = { 2279 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2280 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2281 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2282 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2283 Args); 2284 } 2285 2286 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2287 Region->emitUntiedSwitch(CGF); 2288 } 2289 2290 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2291 const RegionCodeGenTy &TaskgroupOpGen, 2292 SourceLocation Loc) { 2293 if (!CGF.HaveInsertPoint()) 2294 return; 2295 // __kmpc_taskgroup(ident_t *, gtid); 2296 // TaskgroupOpGen(); 2297 // __kmpc_end_taskgroup(ident_t *, gtid); 2298 // Prepare arguments and build a call to __kmpc_taskgroup 2299 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2300 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2301 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2302 Args, 2303 OMPBuilder.getOrCreateRuntimeFunction( 2304 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2305 Args); 2306 TaskgroupOpGen.setAction(Action); 2307 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2308 } 2309 2310 /// Given an array of pointers to variables, project the address of a 2311 /// given variable. 2312 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2313 unsigned Index, const VarDecl *Var) { 2314 // Pull out the pointer to the variable. 2315 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2316 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2317 2318 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2319 Addr = CGF.Builder.CreateElementBitCast( 2320 Addr, CGF.ConvertTypeForMem(Var->getType())); 2321 return Addr; 2322 } 2323 2324 static llvm::Value *emitCopyprivateCopyFunction( 2325 CodeGenModule &CGM, llvm::Type *ArgsType, 2326 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2327 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2328 SourceLocation Loc) { 2329 ASTContext &C = CGM.getContext(); 2330 // void copy_func(void *LHSArg, void *RHSArg); 2331 FunctionArgList Args; 2332 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2333 ImplicitParamDecl::Other); 2334 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2335 ImplicitParamDecl::Other); 2336 Args.push_back(&LHSArg); 2337 Args.push_back(&RHSArg); 2338 const auto &CGFI = 2339 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2340 std::string Name = 2341 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2342 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2343 llvm::GlobalValue::InternalLinkage, Name, 2344 &CGM.getModule()); 2345 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2346 Fn->setDoesNotRecurse(); 2347 CodeGenFunction CGF(CGM); 2348 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2349 // Dest = (void*[n])(LHSArg); 2350 // Src = (void*[n])(RHSArg); 2351 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2352 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2353 ArgsType), CGF.getPointerAlign()); 2354 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2355 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2356 ArgsType), CGF.getPointerAlign()); 2357 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2358 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2359 // ... 2360 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2361 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2362 const auto *DestVar = 2363 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2364 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2365 2366 const auto *SrcVar = 2367 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2368 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2369 2370 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2371 QualType Type = VD->getType(); 2372 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2373 } 2374 CGF.FinishFunction(); 2375 return Fn; 2376 } 2377 2378 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2379 const RegionCodeGenTy &SingleOpGen, 2380 SourceLocation Loc, 2381 ArrayRef<const Expr *> CopyprivateVars, 2382 ArrayRef<const Expr *> SrcExprs, 2383 ArrayRef<const Expr *> DstExprs, 2384 ArrayRef<const Expr *> AssignmentOps) { 2385 if (!CGF.HaveInsertPoint()) 2386 return; 2387 assert(CopyprivateVars.size() == SrcExprs.size() && 2388 CopyprivateVars.size() == DstExprs.size() && 2389 CopyprivateVars.size() == AssignmentOps.size()); 2390 ASTContext &C = CGM.getContext(); 2391 // int32 did_it = 0; 2392 // if(__kmpc_single(ident_t *, gtid)) { 2393 // SingleOpGen(); 2394 // __kmpc_end_single(ident_t *, gtid); 2395 // did_it = 1; 2396 // } 2397 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2398 // <copy_func>, did_it); 2399 2400 Address DidIt = Address::invalid(); 2401 if (!CopyprivateVars.empty()) { 2402 // int32 did_it = 0; 2403 QualType KmpInt32Ty = 2404 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2405 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2406 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2407 } 2408 // Prepare arguments and build a call to __kmpc_single 2409 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2410 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2411 CGM.getModule(), OMPRTL___kmpc_single), 2412 Args, 2413 OMPBuilder.getOrCreateRuntimeFunction( 2414 CGM.getModule(), OMPRTL___kmpc_end_single), 2415 Args, 2416 /*Conditional=*/true); 2417 SingleOpGen.setAction(Action); 2418 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2419 if (DidIt.isValid()) { 2420 // did_it = 1; 2421 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2422 } 2423 Action.Done(CGF); 2424 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2425 // <copy_func>, did_it); 2426 if (DidIt.isValid()) { 2427 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2428 QualType CopyprivateArrayTy = C.getConstantArrayType( 2429 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2430 /*IndexTypeQuals=*/0); 2431 // Create a list of all private variables for copyprivate. 2432 Address CopyprivateList = 2433 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2434 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2435 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2436 CGF.Builder.CreateStore( 2437 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2438 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2439 CGF.VoidPtrTy), 2440 Elem); 2441 } 2442 // Build function that copies private values from single region to all other 2443 // threads in the corresponding parallel region. 2444 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2445 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2446 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2447 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2448 Address CL = 2449 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2450 CGF.VoidPtrTy); 2451 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2452 llvm::Value *Args[] = { 2453 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2454 getThreadID(CGF, Loc), // i32 <gtid> 2455 BufSize, // size_t <buf_size> 2456 CL.getPointer(), // void *<copyprivate list> 2457 CpyFn, // void (*) (void *, void *) <copy_func> 2458 DidItVal // i32 did_it 2459 }; 2460 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2461 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2462 Args); 2463 } 2464 } 2465 2466 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2467 const RegionCodeGenTy &OrderedOpGen, 2468 SourceLocation Loc, bool IsThreads) { 2469 if (!CGF.HaveInsertPoint()) 2470 return; 2471 // __kmpc_ordered(ident_t *, gtid); 2472 // OrderedOpGen(); 2473 // __kmpc_end_ordered(ident_t *, gtid); 2474 // Prepare arguments and build a call to __kmpc_ordered 2475 if (IsThreads) { 2476 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2477 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2478 CGM.getModule(), OMPRTL___kmpc_ordered), 2479 Args, 2480 OMPBuilder.getOrCreateRuntimeFunction( 2481 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2482 Args); 2483 OrderedOpGen.setAction(Action); 2484 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2485 return; 2486 } 2487 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2488 } 2489 2490 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2491 unsigned Flags; 2492 if (Kind == OMPD_for) 2493 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2494 else if (Kind == OMPD_sections) 2495 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2496 else if (Kind == OMPD_single) 2497 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2498 else if (Kind == OMPD_barrier) 2499 Flags = OMP_IDENT_BARRIER_EXPL; 2500 else 2501 Flags = OMP_IDENT_BARRIER_IMPL; 2502 return Flags; 2503 } 2504 2505 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2506 CodeGenFunction &CGF, const OMPLoopDirective &S, 2507 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2508 // Check if the loop directive is actually a doacross loop directive. In this 2509 // case choose static, 1 schedule. 2510 if (llvm::any_of( 2511 S.getClausesOfKind<OMPOrderedClause>(), 2512 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2513 ScheduleKind = OMPC_SCHEDULE_static; 2514 // Chunk size is 1 in this case. 2515 llvm::APInt ChunkSize(32, 1); 2516 ChunkExpr = IntegerLiteral::Create( 2517 CGF.getContext(), ChunkSize, 2518 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2519 SourceLocation()); 2520 } 2521 } 2522 2523 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2524 OpenMPDirectiveKind Kind, bool EmitChecks, 2525 bool ForceSimpleCall) { 2526 // Check if we should use the OMPBuilder 2527 auto *OMPRegionInfo = 2528 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2529 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2530 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2531 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2532 return; 2533 } 2534 2535 if (!CGF.HaveInsertPoint()) 2536 return; 2537 // Build call __kmpc_cancel_barrier(loc, thread_id); 2538 // Build call __kmpc_barrier(loc, thread_id); 2539 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2540 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2541 // thread_id); 2542 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2543 getThreadID(CGF, Loc)}; 2544 if (OMPRegionInfo) { 2545 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2546 llvm::Value *Result = CGF.EmitRuntimeCall( 2547 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2548 OMPRTL___kmpc_cancel_barrier), 2549 Args); 2550 if (EmitChecks) { 2551 // if (__kmpc_cancel_barrier()) { 2552 // exit from construct; 2553 // } 2554 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2555 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2556 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2557 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2558 CGF.EmitBlock(ExitBB); 2559 // exit from construct; 2560 CodeGenFunction::JumpDest CancelDestination = 2561 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2562 CGF.EmitBranchThroughCleanup(CancelDestination); 2563 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2564 } 2565 return; 2566 } 2567 } 2568 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2569 CGM.getModule(), OMPRTL___kmpc_barrier), 2570 Args); 2571 } 2572 2573 /// Map the OpenMP loop schedule to the runtime enumeration. 2574 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2575 bool Chunked, bool Ordered) { 2576 switch (ScheduleKind) { 2577 case OMPC_SCHEDULE_static: 2578 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2579 : (Ordered ? OMP_ord_static : OMP_sch_static); 2580 case OMPC_SCHEDULE_dynamic: 2581 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2582 case OMPC_SCHEDULE_guided: 2583 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2584 case OMPC_SCHEDULE_runtime: 2585 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2586 case OMPC_SCHEDULE_auto: 2587 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2588 case OMPC_SCHEDULE_unknown: 2589 assert(!Chunked && "chunk was specified but schedule kind not known"); 2590 return Ordered ? OMP_ord_static : OMP_sch_static; 2591 } 2592 llvm_unreachable("Unexpected runtime schedule"); 2593 } 2594 2595 /// Map the OpenMP distribute schedule to the runtime enumeration. 2596 static OpenMPSchedType 2597 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2598 // only static is allowed for dist_schedule 2599 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2600 } 2601 2602 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2603 bool Chunked) const { 2604 OpenMPSchedType Schedule = 2605 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2606 return Schedule == OMP_sch_static; 2607 } 2608 2609 bool CGOpenMPRuntime::isStaticNonchunked( 2610 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2611 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2612 return Schedule == OMP_dist_sch_static; 2613 } 2614 2615 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2616 bool Chunked) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2619 return Schedule == OMP_sch_static_chunked; 2620 } 2621 2622 bool CGOpenMPRuntime::isStaticChunked( 2623 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2624 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2625 return Schedule == OMP_dist_sch_static_chunked; 2626 } 2627 2628 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2629 OpenMPSchedType Schedule = 2630 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2631 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2632 return Schedule != OMP_sch_static; 2633 } 2634 2635 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2636 OpenMPScheduleClauseModifier M1, 2637 OpenMPScheduleClauseModifier M2) { 2638 int Modifier = 0; 2639 switch (M1) { 2640 case OMPC_SCHEDULE_MODIFIER_monotonic: 2641 Modifier = OMP_sch_modifier_monotonic; 2642 break; 2643 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2644 Modifier = OMP_sch_modifier_nonmonotonic; 2645 break; 2646 case OMPC_SCHEDULE_MODIFIER_simd: 2647 if (Schedule == OMP_sch_static_chunked) 2648 Schedule = OMP_sch_static_balanced_chunked; 2649 break; 2650 case OMPC_SCHEDULE_MODIFIER_last: 2651 case OMPC_SCHEDULE_MODIFIER_unknown: 2652 break; 2653 } 2654 switch (M2) { 2655 case OMPC_SCHEDULE_MODIFIER_monotonic: 2656 Modifier = OMP_sch_modifier_monotonic; 2657 break; 2658 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2659 Modifier = OMP_sch_modifier_nonmonotonic; 2660 break; 2661 case OMPC_SCHEDULE_MODIFIER_simd: 2662 if (Schedule == OMP_sch_static_chunked) 2663 Schedule = OMP_sch_static_balanced_chunked; 2664 break; 2665 case OMPC_SCHEDULE_MODIFIER_last: 2666 case OMPC_SCHEDULE_MODIFIER_unknown: 2667 break; 2668 } 2669 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2670 // If the static schedule kind is specified or if the ordered clause is 2671 // specified, and if the nonmonotonic modifier is not specified, the effect is 2672 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2673 // modifier is specified, the effect is as if the nonmonotonic modifier is 2674 // specified. 2675 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2676 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2677 Schedule == OMP_sch_static_balanced_chunked || 2678 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2679 Schedule == OMP_dist_sch_static_chunked || 2680 Schedule == OMP_dist_sch_static)) 2681 Modifier = OMP_sch_modifier_nonmonotonic; 2682 } 2683 return Schedule | Modifier; 2684 } 2685 2686 void CGOpenMPRuntime::emitForDispatchInit( 2687 CodeGenFunction &CGF, SourceLocation Loc, 2688 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2689 bool Ordered, const DispatchRTInput &DispatchValues) { 2690 if (!CGF.HaveInsertPoint()) 2691 return; 2692 OpenMPSchedType Schedule = getRuntimeSchedule( 2693 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2694 assert(Ordered || 2695 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2696 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2697 Schedule != OMP_sch_static_balanced_chunked)); 2698 // Call __kmpc_dispatch_init( 2699 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2700 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2701 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2702 2703 // If the Chunk was not specified in the clause - use default value 1. 2704 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2705 : CGF.Builder.getIntN(IVSize, 1); 2706 llvm::Value *Args[] = { 2707 emitUpdateLocation(CGF, Loc), 2708 getThreadID(CGF, Loc), 2709 CGF.Builder.getInt32(addMonoNonMonoModifier( 2710 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2711 DispatchValues.LB, // Lower 2712 DispatchValues.UB, // Upper 2713 CGF.Builder.getIntN(IVSize, 1), // Stride 2714 Chunk // Chunk 2715 }; 2716 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2717 } 2718 2719 static void emitForStaticInitCall( 2720 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2721 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2722 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2723 const CGOpenMPRuntime::StaticRTInput &Values) { 2724 if (!CGF.HaveInsertPoint()) 2725 return; 2726 2727 assert(!Values.Ordered); 2728 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2729 Schedule == OMP_sch_static_balanced_chunked || 2730 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2731 Schedule == OMP_dist_sch_static || 2732 Schedule == OMP_dist_sch_static_chunked); 2733 2734 // Call __kmpc_for_static_init( 2735 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2736 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2737 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2738 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2739 llvm::Value *Chunk = Values.Chunk; 2740 if (Chunk == nullptr) { 2741 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2742 Schedule == OMP_dist_sch_static) && 2743 "expected static non-chunked schedule"); 2744 // If the Chunk was not specified in the clause - use default value 1. 2745 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2746 } else { 2747 assert((Schedule == OMP_sch_static_chunked || 2748 Schedule == OMP_sch_static_balanced_chunked || 2749 Schedule == OMP_ord_static_chunked || 2750 Schedule == OMP_dist_sch_static_chunked) && 2751 "expected static chunked schedule"); 2752 } 2753 llvm::Value *Args[] = { 2754 UpdateLocation, 2755 ThreadId, 2756 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2757 M2)), // Schedule type 2758 Values.IL.getPointer(), // &isLastIter 2759 Values.LB.getPointer(), // &LB 2760 Values.UB.getPointer(), // &UB 2761 Values.ST.getPointer(), // &Stride 2762 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2763 Chunk // Chunk 2764 }; 2765 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2766 } 2767 2768 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2769 SourceLocation Loc, 2770 OpenMPDirectiveKind DKind, 2771 const OpenMPScheduleTy &ScheduleKind, 2772 const StaticRTInput &Values) { 2773 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2774 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2775 assert(isOpenMPWorksharingDirective(DKind) && 2776 "Expected loop-based or sections-based directive."); 2777 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2778 isOpenMPLoopDirective(DKind) 2779 ? OMP_IDENT_WORK_LOOP 2780 : OMP_IDENT_WORK_SECTIONS); 2781 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2782 llvm::FunctionCallee StaticInitFunction = 2783 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2784 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2785 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2786 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2787 } 2788 2789 void CGOpenMPRuntime::emitDistributeStaticInit( 2790 CodeGenFunction &CGF, SourceLocation Loc, 2791 OpenMPDistScheduleClauseKind SchedKind, 2792 const CGOpenMPRuntime::StaticRTInput &Values) { 2793 OpenMPSchedType ScheduleNum = 2794 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2795 llvm::Value *UpdatedLocation = 2796 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2797 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2798 llvm::FunctionCallee StaticInitFunction = 2799 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2800 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2801 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2802 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2803 } 2804 2805 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2806 SourceLocation Loc, 2807 OpenMPDirectiveKind DKind) { 2808 if (!CGF.HaveInsertPoint()) 2809 return; 2810 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2811 llvm::Value *Args[] = { 2812 emitUpdateLocation(CGF, Loc, 2813 isOpenMPDistributeDirective(DKind) 2814 ? OMP_IDENT_WORK_DISTRIBUTE 2815 : isOpenMPLoopDirective(DKind) 2816 ? OMP_IDENT_WORK_LOOP 2817 : OMP_IDENT_WORK_SECTIONS), 2818 getThreadID(CGF, Loc)}; 2819 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2820 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2821 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2822 Args); 2823 } 2824 2825 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2826 SourceLocation Loc, 2827 unsigned IVSize, 2828 bool IVSigned) { 2829 if (!CGF.HaveInsertPoint()) 2830 return; 2831 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2832 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2833 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2834 } 2835 2836 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2837 SourceLocation Loc, unsigned IVSize, 2838 bool IVSigned, Address IL, 2839 Address LB, Address UB, 2840 Address ST) { 2841 // Call __kmpc_dispatch_next( 2842 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2843 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2844 // kmp_int[32|64] *p_stride); 2845 llvm::Value *Args[] = { 2846 emitUpdateLocation(CGF, Loc), 2847 getThreadID(CGF, Loc), 2848 IL.getPointer(), // &isLastIter 2849 LB.getPointer(), // &Lower 2850 UB.getPointer(), // &Upper 2851 ST.getPointer() // &Stride 2852 }; 2853 llvm::Value *Call = 2854 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2855 return CGF.EmitScalarConversion( 2856 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2857 CGF.getContext().BoolTy, Loc); 2858 } 2859 2860 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2861 llvm::Value *NumThreads, 2862 SourceLocation Loc) { 2863 if (!CGF.HaveInsertPoint()) 2864 return; 2865 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2866 llvm::Value *Args[] = { 2867 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2868 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2869 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2870 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2871 Args); 2872 } 2873 2874 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2875 ProcBindKind ProcBind, 2876 SourceLocation Loc) { 2877 if (!CGF.HaveInsertPoint()) 2878 return; 2879 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2880 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2881 llvm::Value *Args[] = { 2882 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2883 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2884 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2886 Args); 2887 } 2888 2889 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2890 SourceLocation Loc, llvm::AtomicOrdering AO) { 2891 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2892 OMPBuilder.createFlush(CGF.Builder); 2893 } else { 2894 if (!CGF.HaveInsertPoint()) 2895 return; 2896 // Build call void __kmpc_flush(ident_t *loc) 2897 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2898 CGM.getModule(), OMPRTL___kmpc_flush), 2899 emitUpdateLocation(CGF, Loc)); 2900 } 2901 } 2902 2903 namespace { 2904 /// Indexes of fields for type kmp_task_t. 2905 enum KmpTaskTFields { 2906 /// List of shared variables. 2907 KmpTaskTShareds, 2908 /// Task routine. 2909 KmpTaskTRoutine, 2910 /// Partition id for the untied tasks. 2911 KmpTaskTPartId, 2912 /// Function with call of destructors for private variables. 2913 Data1, 2914 /// Task priority. 2915 Data2, 2916 /// (Taskloops only) Lower bound. 2917 KmpTaskTLowerBound, 2918 /// (Taskloops only) Upper bound. 2919 KmpTaskTUpperBound, 2920 /// (Taskloops only) Stride. 2921 KmpTaskTStride, 2922 /// (Taskloops only) Is last iteration flag. 2923 KmpTaskTLastIter, 2924 /// (Taskloops only) Reduction data. 2925 KmpTaskTReductions, 2926 }; 2927 } // anonymous namespace 2928 2929 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2930 return OffloadEntriesTargetRegion.empty() && 2931 OffloadEntriesDeviceGlobalVar.empty(); 2932 } 2933 2934 /// Initialize target region entry. 2935 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2936 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2937 StringRef ParentName, unsigned LineNum, 2938 unsigned Order) { 2939 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2940 "only required for the device " 2941 "code generation."); 2942 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2943 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2944 OMPTargetRegionEntryTargetRegion); 2945 ++OffloadingEntriesNum; 2946 } 2947 2948 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2949 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2950 StringRef ParentName, unsigned LineNum, 2951 llvm::Constant *Addr, llvm::Constant *ID, 2952 OMPTargetRegionEntryKind Flags) { 2953 // If we are emitting code for a target, the entry is already initialized, 2954 // only has to be registered. 2955 if (CGM.getLangOpts().OpenMPIsDevice) { 2956 // This could happen if the device compilation is invoked standalone. 2957 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2958 initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2959 OffloadingEntriesNum); 2960 auto &Entry = 2961 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2962 Entry.setAddress(Addr); 2963 Entry.setID(ID); 2964 Entry.setFlags(Flags); 2965 } else { 2966 if (Flags == 2967 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2968 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2969 /*IgnoreAddressId*/ true)) 2970 return; 2971 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2972 "Target region entry already registered!"); 2973 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2974 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2975 ++OffloadingEntriesNum; 2976 } 2977 } 2978 2979 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2980 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2981 bool IgnoreAddressId) const { 2982 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2983 if (PerDevice == OffloadEntriesTargetRegion.end()) 2984 return false; 2985 auto PerFile = PerDevice->second.find(FileID); 2986 if (PerFile == PerDevice->second.end()) 2987 return false; 2988 auto PerParentName = PerFile->second.find(ParentName); 2989 if (PerParentName == PerFile->second.end()) 2990 return false; 2991 auto PerLine = PerParentName->second.find(LineNum); 2992 if (PerLine == PerParentName->second.end()) 2993 return false; 2994 // Fail if this entry is already registered. 2995 if (!IgnoreAddressId && 2996 (PerLine->second.getAddress() || PerLine->second.getID())) 2997 return false; 2998 return true; 2999 } 3000 3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3002 const OffloadTargetRegionEntryInfoActTy &Action) { 3003 // Scan all target region entries and perform the provided action. 3004 for (const auto &D : OffloadEntriesTargetRegion) 3005 for (const auto &F : D.second) 3006 for (const auto &P : F.second) 3007 for (const auto &L : P.second) 3008 Action(D.first, F.first, P.first(), L.first, L.second); 3009 } 3010 3011 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3012 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3013 OMPTargetGlobalVarEntryKind Flags, 3014 unsigned Order) { 3015 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3016 "only required for the device " 3017 "code generation."); 3018 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3019 ++OffloadingEntriesNum; 3020 } 3021 3022 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3023 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3024 CharUnits VarSize, 3025 OMPTargetGlobalVarEntryKind Flags, 3026 llvm::GlobalValue::LinkageTypes Linkage) { 3027 if (CGM.getLangOpts().OpenMPIsDevice) { 3028 // This could happen if the device compilation is invoked standalone. 3029 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3030 initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); 3031 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3032 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3033 "Resetting with the new address."); 3034 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3035 if (Entry.getVarSize().isZero()) { 3036 Entry.setVarSize(VarSize); 3037 Entry.setLinkage(Linkage); 3038 } 3039 return; 3040 } 3041 Entry.setVarSize(VarSize); 3042 Entry.setLinkage(Linkage); 3043 Entry.setAddress(Addr); 3044 } else { 3045 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3046 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3047 assert(Entry.isValid() && Entry.getFlags() == Flags && 3048 "Entry not initialized!"); 3049 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3050 "Resetting with the new address."); 3051 if (Entry.getVarSize().isZero()) { 3052 Entry.setVarSize(VarSize); 3053 Entry.setLinkage(Linkage); 3054 } 3055 return; 3056 } 3057 OffloadEntriesDeviceGlobalVar.try_emplace( 3058 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3059 ++OffloadingEntriesNum; 3060 } 3061 } 3062 3063 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3064 actOnDeviceGlobalVarEntriesInfo( 3065 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3066 // Scan all target region entries and perform the provided action. 3067 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3068 Action(E.getKey(), E.getValue()); 3069 } 3070 3071 void CGOpenMPRuntime::createOffloadEntry( 3072 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3073 llvm::GlobalValue::LinkageTypes Linkage) { 3074 StringRef Name = Addr->getName(); 3075 llvm::Module &M = CGM.getModule(); 3076 llvm::LLVMContext &C = M.getContext(); 3077 3078 // Create constant string with the name. 3079 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3080 3081 std::string StringName = getName({"omp_offloading", "entry_name"}); 3082 auto *Str = new llvm::GlobalVariable( 3083 M, StrPtrInit->getType(), /*isConstant=*/true, 3084 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3085 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3086 3087 llvm::Constant *Data[] = { 3088 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3089 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3090 llvm::ConstantInt::get(CGM.SizeTy, Size), 3091 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3092 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3093 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3094 llvm::GlobalVariable *Entry = createGlobalStruct( 3095 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3096 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3097 3098 // The entry has to be created in the section the linker expects it to be. 3099 Entry->setSection("omp_offloading_entries"); 3100 } 3101 3102 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3103 // Emit the offloading entries and metadata so that the device codegen side 3104 // can easily figure out what to emit. The produced metadata looks like 3105 // this: 3106 // 3107 // !omp_offload.info = !{!1, ...} 3108 // 3109 // Right now we only generate metadata for function that contain target 3110 // regions. 3111 3112 // If we are in simd mode or there are no entries, we don't need to do 3113 // anything. 3114 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3115 return; 3116 3117 llvm::Module &M = CGM.getModule(); 3118 llvm::LLVMContext &C = M.getContext(); 3119 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3120 SourceLocation, StringRef>, 3121 16> 3122 OrderedEntries(OffloadEntriesInfoManager.size()); 3123 llvm::SmallVector<StringRef, 16> ParentFunctions( 3124 OffloadEntriesInfoManager.size()); 3125 3126 // Auxiliary methods to create metadata values and strings. 3127 auto &&GetMDInt = [this](unsigned V) { 3128 return llvm::ConstantAsMetadata::get( 3129 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3130 }; 3131 3132 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3133 3134 // Create the offloading info metadata node. 3135 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3136 3137 // Create function that emits metadata for each target region entry; 3138 auto &&TargetRegionMetadataEmitter = 3139 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3140 &GetMDString]( 3141 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3142 unsigned Line, 3143 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3144 // Generate metadata for target regions. Each entry of this metadata 3145 // contains: 3146 // - Entry 0 -> Kind of this type of metadata (0). 3147 // - Entry 1 -> Device ID of the file where the entry was identified. 3148 // - Entry 2 -> File ID of the file where the entry was identified. 3149 // - Entry 3 -> Mangled name of the function where the entry was 3150 // identified. 3151 // - Entry 4 -> Line in the file where the entry was identified. 3152 // - Entry 5 -> Order the entry was created. 3153 // The first element of the metadata node is the kind. 3154 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3155 GetMDInt(FileID), GetMDString(ParentName), 3156 GetMDInt(Line), GetMDInt(E.getOrder())}; 3157 3158 SourceLocation Loc; 3159 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3160 E = CGM.getContext().getSourceManager().fileinfo_end(); 3161 I != E; ++I) { 3162 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3163 I->getFirst()->getUniqueID().getFile() == FileID) { 3164 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3165 I->getFirst(), Line, 1); 3166 break; 3167 } 3168 } 3169 // Save this entry in the right position of the ordered entries array. 3170 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3171 ParentFunctions[E.getOrder()] = ParentName; 3172 3173 // Add metadata to the named metadata node. 3174 MD->addOperand(llvm::MDNode::get(C, Ops)); 3175 }; 3176 3177 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3178 TargetRegionMetadataEmitter); 3179 3180 // Create function that emits metadata for each device global variable entry; 3181 auto &&DeviceGlobalVarMetadataEmitter = 3182 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3183 MD](StringRef MangledName, 3184 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3185 &E) { 3186 // Generate metadata for global variables. Each entry of this metadata 3187 // contains: 3188 // - Entry 0 -> Kind of this type of metadata (1). 3189 // - Entry 1 -> Mangled name of the variable. 3190 // - Entry 2 -> Declare target kind. 3191 // - Entry 3 -> Order the entry was created. 3192 // The first element of the metadata node is the kind. 3193 llvm::Metadata *Ops[] = { 3194 GetMDInt(E.getKind()), GetMDString(MangledName), 3195 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3196 3197 // Save this entry in the right position of the ordered entries array. 3198 OrderedEntries[E.getOrder()] = 3199 std::make_tuple(&E, SourceLocation(), MangledName); 3200 3201 // Add metadata to the named metadata node. 3202 MD->addOperand(llvm::MDNode::get(C, Ops)); 3203 }; 3204 3205 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3206 DeviceGlobalVarMetadataEmitter); 3207 3208 for (const auto &E : OrderedEntries) { 3209 assert(std::get<0>(E) && "All ordered entries must exist!"); 3210 if (const auto *CE = 3211 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3212 std::get<0>(E))) { 3213 if (!CE->getID() || !CE->getAddress()) { 3214 // Do not blame the entry if the parent funtion is not emitted. 3215 StringRef FnName = ParentFunctions[CE->getOrder()]; 3216 if (!CGM.GetGlobalValue(FnName)) 3217 continue; 3218 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3219 DiagnosticsEngine::Error, 3220 "Offloading entry for target region in %0 is incorrect: either the " 3221 "address or the ID is invalid."); 3222 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3223 continue; 3224 } 3225 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3226 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3227 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3228 OffloadEntryInfoDeviceGlobalVar>( 3229 std::get<0>(E))) { 3230 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3231 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3232 CE->getFlags()); 3233 switch (Flags) { 3234 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3235 if (CGM.getLangOpts().OpenMPIsDevice && 3236 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3237 continue; 3238 if (!CE->getAddress()) { 3239 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3240 DiagnosticsEngine::Error, "Offloading entry for declare target " 3241 "variable %0 is incorrect: the " 3242 "address is invalid."); 3243 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3244 continue; 3245 } 3246 // The vaiable has no definition - no need to add the entry. 3247 if (CE->getVarSize().isZero()) 3248 continue; 3249 break; 3250 } 3251 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3252 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3253 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3254 "Declaret target link address is set."); 3255 if (CGM.getLangOpts().OpenMPIsDevice) 3256 continue; 3257 if (!CE->getAddress()) { 3258 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3259 DiagnosticsEngine::Error, 3260 "Offloading entry for declare target variable is incorrect: the " 3261 "address is invalid."); 3262 CGM.getDiags().Report(DiagID); 3263 continue; 3264 } 3265 break; 3266 } 3267 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3268 CE->getVarSize().getQuantity(), Flags, 3269 CE->getLinkage()); 3270 } else { 3271 llvm_unreachable("Unsupported entry kind."); 3272 } 3273 } 3274 } 3275 3276 /// Loads all the offload entries information from the host IR 3277 /// metadata. 3278 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3279 // If we are in target mode, load the metadata from the host IR. This code has 3280 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3281 3282 if (!CGM.getLangOpts().OpenMPIsDevice) 3283 return; 3284 3285 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3286 return; 3287 3288 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3289 if (auto EC = Buf.getError()) { 3290 CGM.getDiags().Report(diag::err_cannot_open_file) 3291 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3292 return; 3293 } 3294 3295 llvm::LLVMContext C; 3296 auto ME = expectedToErrorOrAndEmitErrors( 3297 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3298 3299 if (auto EC = ME.getError()) { 3300 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3301 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3302 CGM.getDiags().Report(DiagID) 3303 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3304 return; 3305 } 3306 3307 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3308 if (!MD) 3309 return; 3310 3311 for (llvm::MDNode *MN : MD->operands()) { 3312 auto &&GetMDInt = [MN](unsigned Idx) { 3313 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3314 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3315 }; 3316 3317 auto &&GetMDString = [MN](unsigned Idx) { 3318 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3319 return V->getString(); 3320 }; 3321 3322 switch (GetMDInt(0)) { 3323 default: 3324 llvm_unreachable("Unexpected metadata!"); 3325 break; 3326 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3327 OffloadingEntryInfoTargetRegion: 3328 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3329 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3330 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3331 /*Order=*/GetMDInt(5)); 3332 break; 3333 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3334 OffloadingEntryInfoDeviceGlobalVar: 3335 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3336 /*MangledName=*/GetMDString(1), 3337 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3338 /*Flags=*/GetMDInt(2)), 3339 /*Order=*/GetMDInt(3)); 3340 break; 3341 } 3342 } 3343 } 3344 3345 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3346 if (!KmpRoutineEntryPtrTy) { 3347 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3348 ASTContext &C = CGM.getContext(); 3349 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3350 FunctionProtoType::ExtProtoInfo EPI; 3351 KmpRoutineEntryPtrQTy = C.getPointerType( 3352 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3353 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3354 } 3355 } 3356 3357 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3358 // Make sure the type of the entry is already created. This is the type we 3359 // have to create: 3360 // struct __tgt_offload_entry{ 3361 // void *addr; // Pointer to the offload entry info. 3362 // // (function or global) 3363 // char *name; // Name of the function or global. 3364 // size_t size; // Size of the entry info (0 if it a function). 3365 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3366 // int32_t reserved; // Reserved, to use by the runtime library. 3367 // }; 3368 if (TgtOffloadEntryQTy.isNull()) { 3369 ASTContext &C = CGM.getContext(); 3370 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3371 RD->startDefinition(); 3372 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3373 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3374 addFieldToRecordDecl(C, RD, C.getSizeType()); 3375 addFieldToRecordDecl( 3376 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3377 addFieldToRecordDecl( 3378 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3379 RD->completeDefinition(); 3380 RD->addAttr(PackedAttr::CreateImplicit(C)); 3381 TgtOffloadEntryQTy = C.getRecordType(RD); 3382 } 3383 return TgtOffloadEntryQTy; 3384 } 3385 3386 namespace { 3387 struct PrivateHelpersTy { 3388 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3389 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3390 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3391 PrivateElemInit(PrivateElemInit) {} 3392 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3393 const Expr *OriginalRef = nullptr; 3394 const VarDecl *Original = nullptr; 3395 const VarDecl *PrivateCopy = nullptr; 3396 const VarDecl *PrivateElemInit = nullptr; 3397 bool isLocalPrivate() const { 3398 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3399 } 3400 }; 3401 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3402 } // anonymous namespace 3403 3404 static bool isAllocatableDecl(const VarDecl *VD) { 3405 const VarDecl *CVD = VD->getCanonicalDecl(); 3406 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3407 return false; 3408 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3409 // Use the default allocation. 3410 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3411 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3412 !AA->getAllocator()); 3413 } 3414 3415 static RecordDecl * 3416 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3417 if (!Privates.empty()) { 3418 ASTContext &C = CGM.getContext(); 3419 // Build struct .kmp_privates_t. { 3420 // /* private vars */ 3421 // }; 3422 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3423 RD->startDefinition(); 3424 for (const auto &Pair : Privates) { 3425 const VarDecl *VD = Pair.second.Original; 3426 QualType Type = VD->getType().getNonReferenceType(); 3427 // If the private variable is a local variable with lvalue ref type, 3428 // allocate the pointer instead of the pointee type. 3429 if (Pair.second.isLocalPrivate()) { 3430 if (VD->getType()->isLValueReferenceType()) 3431 Type = C.getPointerType(Type); 3432 if (isAllocatableDecl(VD)) 3433 Type = C.getPointerType(Type); 3434 } 3435 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3436 if (VD->hasAttrs()) { 3437 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3438 E(VD->getAttrs().end()); 3439 I != E; ++I) 3440 FD->addAttr(*I); 3441 } 3442 } 3443 RD->completeDefinition(); 3444 return RD; 3445 } 3446 return nullptr; 3447 } 3448 3449 static RecordDecl * 3450 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3451 QualType KmpInt32Ty, 3452 QualType KmpRoutineEntryPointerQTy) { 3453 ASTContext &C = CGM.getContext(); 3454 // Build struct kmp_task_t { 3455 // void * shareds; 3456 // kmp_routine_entry_t routine; 3457 // kmp_int32 part_id; 3458 // kmp_cmplrdata_t data1; 3459 // kmp_cmplrdata_t data2; 3460 // For taskloops additional fields: 3461 // kmp_uint64 lb; 3462 // kmp_uint64 ub; 3463 // kmp_int64 st; 3464 // kmp_int32 liter; 3465 // void * reductions; 3466 // }; 3467 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3468 UD->startDefinition(); 3469 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3470 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3471 UD->completeDefinition(); 3472 QualType KmpCmplrdataTy = C.getRecordType(UD); 3473 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3474 RD->startDefinition(); 3475 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3476 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3477 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3478 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3479 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3480 if (isOpenMPTaskLoopDirective(Kind)) { 3481 QualType KmpUInt64Ty = 3482 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3483 QualType KmpInt64Ty = 3484 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3485 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3486 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3487 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3488 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3489 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3490 } 3491 RD->completeDefinition(); 3492 return RD; 3493 } 3494 3495 static RecordDecl * 3496 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3497 ArrayRef<PrivateDataTy> Privates) { 3498 ASTContext &C = CGM.getContext(); 3499 // Build struct kmp_task_t_with_privates { 3500 // kmp_task_t task_data; 3501 // .kmp_privates_t. privates; 3502 // }; 3503 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3504 RD->startDefinition(); 3505 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3506 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3507 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3508 RD->completeDefinition(); 3509 return RD; 3510 } 3511 3512 /// Emit a proxy function which accepts kmp_task_t as the second 3513 /// argument. 3514 /// \code 3515 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3516 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3517 /// For taskloops: 3518 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3519 /// tt->reductions, tt->shareds); 3520 /// return 0; 3521 /// } 3522 /// \endcode 3523 static llvm::Function * 3524 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3525 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3526 QualType KmpTaskTWithPrivatesPtrQTy, 3527 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3528 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3529 llvm::Value *TaskPrivatesMap) { 3530 ASTContext &C = CGM.getContext(); 3531 FunctionArgList Args; 3532 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3533 ImplicitParamDecl::Other); 3534 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3535 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3536 ImplicitParamDecl::Other); 3537 Args.push_back(&GtidArg); 3538 Args.push_back(&TaskTypeArg); 3539 const auto &TaskEntryFnInfo = 3540 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3541 llvm::FunctionType *TaskEntryTy = 3542 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3543 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3544 auto *TaskEntry = llvm::Function::Create( 3545 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3546 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3547 TaskEntry->setDoesNotRecurse(); 3548 CodeGenFunction CGF(CGM); 3549 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3550 Loc, Loc); 3551 3552 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3553 // tt, 3554 // For taskloops: 3555 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3556 // tt->task_data.shareds); 3557 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3558 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3559 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3560 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3561 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3562 const auto *KmpTaskTWithPrivatesQTyRD = 3563 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3564 LValue Base = 3565 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3566 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3567 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3568 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3569 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3570 3571 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3572 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3573 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3574 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3575 CGF.ConvertTypeForMem(SharedsPtrTy)); 3576 3577 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3578 llvm::Value *PrivatesParam; 3579 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3580 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3581 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3582 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3583 } else { 3584 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3585 } 3586 3587 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3588 TaskPrivatesMap, 3589 CGF.Builder 3590 .CreatePointerBitCastOrAddrSpaceCast( 3591 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3592 .getPointer()}; 3593 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3594 std::end(CommonArgs)); 3595 if (isOpenMPTaskLoopDirective(Kind)) { 3596 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3597 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3598 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3599 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3600 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3601 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3602 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3603 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3604 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3605 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3606 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3607 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3608 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3609 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3610 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3611 CallArgs.push_back(LBParam); 3612 CallArgs.push_back(UBParam); 3613 CallArgs.push_back(StParam); 3614 CallArgs.push_back(LIParam); 3615 CallArgs.push_back(RParam); 3616 } 3617 CallArgs.push_back(SharedsParam); 3618 3619 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3620 CallArgs); 3621 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3622 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3623 CGF.FinishFunction(); 3624 return TaskEntry; 3625 } 3626 3627 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3628 SourceLocation Loc, 3629 QualType KmpInt32Ty, 3630 QualType KmpTaskTWithPrivatesPtrQTy, 3631 QualType KmpTaskTWithPrivatesQTy) { 3632 ASTContext &C = CGM.getContext(); 3633 FunctionArgList Args; 3634 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3635 ImplicitParamDecl::Other); 3636 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3637 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3638 ImplicitParamDecl::Other); 3639 Args.push_back(&GtidArg); 3640 Args.push_back(&TaskTypeArg); 3641 const auto &DestructorFnInfo = 3642 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3643 llvm::FunctionType *DestructorFnTy = 3644 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3645 std::string Name = 3646 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3647 auto *DestructorFn = 3648 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3649 Name, &CGM.getModule()); 3650 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3651 DestructorFnInfo); 3652 DestructorFn->setDoesNotRecurse(); 3653 CodeGenFunction CGF(CGM); 3654 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3655 Args, Loc, Loc); 3656 3657 LValue Base = CGF.EmitLoadOfPointerLValue( 3658 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3659 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3660 const auto *KmpTaskTWithPrivatesQTyRD = 3661 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3662 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3663 Base = CGF.EmitLValueForField(Base, *FI); 3664 for (const auto *Field : 3665 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3666 if (QualType::DestructionKind DtorKind = 3667 Field->getType().isDestructedType()) { 3668 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3669 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3670 } 3671 } 3672 CGF.FinishFunction(); 3673 return DestructorFn; 3674 } 3675 3676 /// Emit a privates mapping function for correct handling of private and 3677 /// firstprivate variables. 3678 /// \code 3679 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3680 /// **noalias priv1,..., <tyn> **noalias privn) { 3681 /// *priv1 = &.privates.priv1; 3682 /// ...; 3683 /// *privn = &.privates.privn; 3684 /// } 3685 /// \endcode 3686 static llvm::Value * 3687 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3688 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3689 ArrayRef<PrivateDataTy> Privates) { 3690 ASTContext &C = CGM.getContext(); 3691 FunctionArgList Args; 3692 ImplicitParamDecl TaskPrivatesArg( 3693 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3694 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3695 ImplicitParamDecl::Other); 3696 Args.push_back(&TaskPrivatesArg); 3697 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3698 unsigned Counter = 1; 3699 for (const Expr *E : Data.PrivateVars) { 3700 Args.push_back(ImplicitParamDecl::Create( 3701 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3702 C.getPointerType(C.getPointerType(E->getType())) 3703 .withConst() 3704 .withRestrict(), 3705 ImplicitParamDecl::Other)); 3706 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3707 PrivateVarsPos[VD] = Counter; 3708 ++Counter; 3709 } 3710 for (const Expr *E : Data.FirstprivateVars) { 3711 Args.push_back(ImplicitParamDecl::Create( 3712 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3713 C.getPointerType(C.getPointerType(E->getType())) 3714 .withConst() 3715 .withRestrict(), 3716 ImplicitParamDecl::Other)); 3717 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3718 PrivateVarsPos[VD] = Counter; 3719 ++Counter; 3720 } 3721 for (const Expr *E : Data.LastprivateVars) { 3722 Args.push_back(ImplicitParamDecl::Create( 3723 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3724 C.getPointerType(C.getPointerType(E->getType())) 3725 .withConst() 3726 .withRestrict(), 3727 ImplicitParamDecl::Other)); 3728 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3729 PrivateVarsPos[VD] = Counter; 3730 ++Counter; 3731 } 3732 for (const VarDecl *VD : Data.PrivateLocals) { 3733 QualType Ty = VD->getType().getNonReferenceType(); 3734 if (VD->getType()->isLValueReferenceType()) 3735 Ty = C.getPointerType(Ty); 3736 if (isAllocatableDecl(VD)) 3737 Ty = C.getPointerType(Ty); 3738 Args.push_back(ImplicitParamDecl::Create( 3739 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3740 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3741 ImplicitParamDecl::Other)); 3742 PrivateVarsPos[VD] = Counter; 3743 ++Counter; 3744 } 3745 const auto &TaskPrivatesMapFnInfo = 3746 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3747 llvm::FunctionType *TaskPrivatesMapTy = 3748 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3749 std::string Name = 3750 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3751 auto *TaskPrivatesMap = llvm::Function::Create( 3752 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3753 &CGM.getModule()); 3754 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3755 TaskPrivatesMapFnInfo); 3756 if (CGM.getLangOpts().Optimize) { 3757 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3758 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3759 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3760 } 3761 CodeGenFunction CGF(CGM); 3762 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3763 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3764 3765 // *privi = &.privates.privi; 3766 LValue Base = CGF.EmitLoadOfPointerLValue( 3767 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3768 TaskPrivatesArg.getType()->castAs<PointerType>()); 3769 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3770 Counter = 0; 3771 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3772 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3773 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3774 LValue RefLVal = 3775 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3776 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3777 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3778 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3779 ++Counter; 3780 } 3781 CGF.FinishFunction(); 3782 return TaskPrivatesMap; 3783 } 3784 3785 /// Emit initialization for private variables in task-based directives. 3786 static void emitPrivatesInit(CodeGenFunction &CGF, 3787 const OMPExecutableDirective &D, 3788 Address KmpTaskSharedsPtr, LValue TDBase, 3789 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3790 QualType SharedsTy, QualType SharedsPtrTy, 3791 const OMPTaskDataTy &Data, 3792 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3793 ASTContext &C = CGF.getContext(); 3794 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3795 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3796 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3797 ? OMPD_taskloop 3798 : OMPD_task; 3799 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3800 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3801 LValue SrcBase; 3802 bool IsTargetTask = 3803 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3804 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3805 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3806 // PointersArray, SizesArray, and MappersArray. The original variables for 3807 // these arrays are not captured and we get their addresses explicitly. 3808 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3809 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3810 SrcBase = CGF.MakeAddrLValue( 3811 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3812 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3813 SharedsTy); 3814 } 3815 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3816 for (const PrivateDataTy &Pair : Privates) { 3817 // Do not initialize private locals. 3818 if (Pair.second.isLocalPrivate()) { 3819 ++FI; 3820 continue; 3821 } 3822 const VarDecl *VD = Pair.second.PrivateCopy; 3823 const Expr *Init = VD->getAnyInitializer(); 3824 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3825 !CGF.isTrivialInitializer(Init)))) { 3826 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3827 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3828 const VarDecl *OriginalVD = Pair.second.Original; 3829 // Check if the variable is the target-based BasePointersArray, 3830 // PointersArray, SizesArray, or MappersArray. 3831 LValue SharedRefLValue; 3832 QualType Type = PrivateLValue.getType(); 3833 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3834 if (IsTargetTask && !SharedField) { 3835 assert(isa<ImplicitParamDecl>(OriginalVD) && 3836 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3837 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3838 ->getNumParams() == 0 && 3839 isa<TranslationUnitDecl>( 3840 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3841 ->getDeclContext()) && 3842 "Expected artificial target data variable."); 3843 SharedRefLValue = 3844 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3845 } else if (ForDup) { 3846 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3847 SharedRefLValue = CGF.MakeAddrLValue( 3848 Address(SharedRefLValue.getPointer(CGF), 3849 C.getDeclAlign(OriginalVD)), 3850 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3851 SharedRefLValue.getTBAAInfo()); 3852 } else if (CGF.LambdaCaptureFields.count( 3853 Pair.second.Original->getCanonicalDecl()) > 0 || 3854 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3855 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3856 } else { 3857 // Processing for implicitly captured variables. 3858 InlinedOpenMPRegionRAII Region( 3859 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3860 /*HasCancel=*/false); 3861 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3862 } 3863 if (Type->isArrayType()) { 3864 // Initialize firstprivate array. 3865 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3866 // Perform simple memcpy. 3867 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3868 } else { 3869 // Initialize firstprivate array using element-by-element 3870 // initialization. 3871 CGF.EmitOMPAggregateAssign( 3872 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3873 Type, 3874 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3875 Address SrcElement) { 3876 // Clean up any temporaries needed by the initialization. 3877 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3878 InitScope.addPrivate( 3879 Elem, [SrcElement]() -> Address { return SrcElement; }); 3880 (void)InitScope.Privatize(); 3881 // Emit initialization for single element. 3882 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3883 CGF, &CapturesInfo); 3884 CGF.EmitAnyExprToMem(Init, DestElement, 3885 Init->getType().getQualifiers(), 3886 /*IsInitializer=*/false); 3887 }); 3888 } 3889 } else { 3890 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3891 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3892 return SharedRefLValue.getAddress(CGF); 3893 }); 3894 (void)InitScope.Privatize(); 3895 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3896 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3897 /*capturedByInit=*/false); 3898 } 3899 } else { 3900 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3901 } 3902 } 3903 ++FI; 3904 } 3905 } 3906 3907 /// Check if duplication function is required for taskloops. 3908 static bool checkInitIsRequired(CodeGenFunction &CGF, 3909 ArrayRef<PrivateDataTy> Privates) { 3910 bool InitRequired = false; 3911 for (const PrivateDataTy &Pair : Privates) { 3912 if (Pair.second.isLocalPrivate()) 3913 continue; 3914 const VarDecl *VD = Pair.second.PrivateCopy; 3915 const Expr *Init = VD->getAnyInitializer(); 3916 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3917 !CGF.isTrivialInitializer(Init)); 3918 if (InitRequired) 3919 break; 3920 } 3921 return InitRequired; 3922 } 3923 3924 3925 /// Emit task_dup function (for initialization of 3926 /// private/firstprivate/lastprivate vars and last_iter flag) 3927 /// \code 3928 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3929 /// lastpriv) { 3930 /// // setup lastprivate flag 3931 /// task_dst->last = lastpriv; 3932 /// // could be constructor calls here... 3933 /// } 3934 /// \endcode 3935 static llvm::Value * 3936 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3937 const OMPExecutableDirective &D, 3938 QualType KmpTaskTWithPrivatesPtrQTy, 3939 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3940 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3941 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3942 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3943 ASTContext &C = CGM.getContext(); 3944 FunctionArgList Args; 3945 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3946 KmpTaskTWithPrivatesPtrQTy, 3947 ImplicitParamDecl::Other); 3948 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3949 KmpTaskTWithPrivatesPtrQTy, 3950 ImplicitParamDecl::Other); 3951 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3952 ImplicitParamDecl::Other); 3953 Args.push_back(&DstArg); 3954 Args.push_back(&SrcArg); 3955 Args.push_back(&LastprivArg); 3956 const auto &TaskDupFnInfo = 3957 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3958 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3959 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3960 auto *TaskDup = llvm::Function::Create( 3961 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3962 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3963 TaskDup->setDoesNotRecurse(); 3964 CodeGenFunction CGF(CGM); 3965 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3966 Loc); 3967 3968 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3969 CGF.GetAddrOfLocalVar(&DstArg), 3970 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3971 // task_dst->liter = lastpriv; 3972 if (WithLastIter) { 3973 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3974 LValue Base = CGF.EmitLValueForField( 3975 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3976 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3977 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3978 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3979 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3980 } 3981 3982 // Emit initial values for private copies (if any). 3983 assert(!Privates.empty()); 3984 Address KmpTaskSharedsPtr = Address::invalid(); 3985 if (!Data.FirstprivateVars.empty()) { 3986 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3987 CGF.GetAddrOfLocalVar(&SrcArg), 3988 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3989 LValue Base = CGF.EmitLValueForField( 3990 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3991 KmpTaskSharedsPtr = Address( 3992 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3993 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3994 KmpTaskTShareds)), 3995 Loc), 3996 CGM.getNaturalTypeAlignment(SharedsTy)); 3997 } 3998 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3999 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4000 CGF.FinishFunction(); 4001 return TaskDup; 4002 } 4003 4004 /// Checks if destructor function is required to be generated. 4005 /// \return true if cleanups are required, false otherwise. 4006 static bool 4007 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4008 ArrayRef<PrivateDataTy> Privates) { 4009 for (const PrivateDataTy &P : Privates) { 4010 if (P.second.isLocalPrivate()) 4011 continue; 4012 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4013 if (Ty.isDestructedType()) 4014 return true; 4015 } 4016 return false; 4017 } 4018 4019 namespace { 4020 /// Loop generator for OpenMP iterator expression. 4021 class OMPIteratorGeneratorScope final 4022 : public CodeGenFunction::OMPPrivateScope { 4023 CodeGenFunction &CGF; 4024 const OMPIteratorExpr *E = nullptr; 4025 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4026 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4027 OMPIteratorGeneratorScope() = delete; 4028 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4029 4030 public: 4031 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4032 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4033 if (!E) 4034 return; 4035 SmallVector<llvm::Value *, 4> Uppers; 4036 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4037 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4038 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4039 addPrivate(VD, [&CGF, VD]() { 4040 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4041 }); 4042 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4043 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4044 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4045 "counter.addr"); 4046 }); 4047 } 4048 Privatize(); 4049 4050 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4051 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4052 LValue CLVal = 4053 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4054 HelperData.CounterVD->getType()); 4055 // Counter = 0; 4056 CGF.EmitStoreOfScalar( 4057 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4058 CLVal); 4059 CodeGenFunction::JumpDest &ContDest = 4060 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4061 CodeGenFunction::JumpDest &ExitDest = 4062 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4063 // N = <number-of_iterations>; 4064 llvm::Value *N = Uppers[I]; 4065 // cont: 4066 // if (Counter < N) goto body; else goto exit; 4067 CGF.EmitBlock(ContDest.getBlock()); 4068 auto *CVal = 4069 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4070 llvm::Value *Cmp = 4071 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4072 ? CGF.Builder.CreateICmpSLT(CVal, N) 4073 : CGF.Builder.CreateICmpULT(CVal, N); 4074 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4075 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4076 // body: 4077 CGF.EmitBlock(BodyBB); 4078 // Iteri = Begini + Counter * Stepi; 4079 CGF.EmitIgnoredExpr(HelperData.Update); 4080 } 4081 } 4082 ~OMPIteratorGeneratorScope() { 4083 if (!E) 4084 return; 4085 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4086 // Counter = Counter + 1; 4087 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4088 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4089 // goto cont; 4090 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4091 // exit: 4092 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4093 } 4094 } 4095 }; 4096 } // namespace 4097 4098 static std::pair<llvm::Value *, llvm::Value *> 4099 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4100 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4101 llvm::Value *Addr; 4102 if (OASE) { 4103 const Expr *Base = OASE->getBase(); 4104 Addr = CGF.EmitScalarExpr(Base); 4105 } else { 4106 Addr = CGF.EmitLValue(E).getPointer(CGF); 4107 } 4108 llvm::Value *SizeVal; 4109 QualType Ty = E->getType(); 4110 if (OASE) { 4111 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4112 for (const Expr *SE : OASE->getDimensions()) { 4113 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4114 Sz = CGF.EmitScalarConversion( 4115 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4116 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4117 } 4118 } else if (const auto *ASE = 4119 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4120 LValue UpAddrLVal = 4121 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4122 llvm::Value *UpAddr = 4123 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4124 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4125 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4126 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4127 } else { 4128 SizeVal = CGF.getTypeSize(Ty); 4129 } 4130 return std::make_pair(Addr, SizeVal); 4131 } 4132 4133 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4134 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4135 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4136 if (KmpTaskAffinityInfoTy.isNull()) { 4137 RecordDecl *KmpAffinityInfoRD = 4138 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4139 KmpAffinityInfoRD->startDefinition(); 4140 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4141 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4142 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4143 KmpAffinityInfoRD->completeDefinition(); 4144 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4145 } 4146 } 4147 4148 CGOpenMPRuntime::TaskResultTy 4149 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4150 const OMPExecutableDirective &D, 4151 llvm::Function *TaskFunction, QualType SharedsTy, 4152 Address Shareds, const OMPTaskDataTy &Data) { 4153 ASTContext &C = CGM.getContext(); 4154 llvm::SmallVector<PrivateDataTy, 4> Privates; 4155 // Aggregate privates and sort them by the alignment. 4156 const auto *I = Data.PrivateCopies.begin(); 4157 for (const Expr *E : Data.PrivateVars) { 4158 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4159 Privates.emplace_back( 4160 C.getDeclAlign(VD), 4161 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4162 /*PrivateElemInit=*/nullptr)); 4163 ++I; 4164 } 4165 I = Data.FirstprivateCopies.begin(); 4166 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4167 for (const Expr *E : Data.FirstprivateVars) { 4168 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4169 Privates.emplace_back( 4170 C.getDeclAlign(VD), 4171 PrivateHelpersTy( 4172 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4173 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4174 ++I; 4175 ++IElemInitRef; 4176 } 4177 I = Data.LastprivateCopies.begin(); 4178 for (const Expr *E : Data.LastprivateVars) { 4179 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4180 Privates.emplace_back( 4181 C.getDeclAlign(VD), 4182 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4183 /*PrivateElemInit=*/nullptr)); 4184 ++I; 4185 } 4186 for (const VarDecl *VD : Data.PrivateLocals) { 4187 if (isAllocatableDecl(VD)) 4188 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4189 else 4190 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4191 } 4192 llvm::stable_sort(Privates, 4193 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4194 return L.first > R.first; 4195 }); 4196 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4197 // Build type kmp_routine_entry_t (if not built yet). 4198 emitKmpRoutineEntryT(KmpInt32Ty); 4199 // Build type kmp_task_t (if not built yet). 4200 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4201 if (SavedKmpTaskloopTQTy.isNull()) { 4202 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4203 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4204 } 4205 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4206 } else { 4207 assert((D.getDirectiveKind() == OMPD_task || 4208 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4209 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4210 "Expected taskloop, task or target directive"); 4211 if (SavedKmpTaskTQTy.isNull()) { 4212 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4213 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4214 } 4215 KmpTaskTQTy = SavedKmpTaskTQTy; 4216 } 4217 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4218 // Build particular struct kmp_task_t for the given task. 4219 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4220 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4221 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4222 QualType KmpTaskTWithPrivatesPtrQTy = 4223 C.getPointerType(KmpTaskTWithPrivatesQTy); 4224 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4225 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4226 KmpTaskTWithPrivatesTy->getPointerTo(); 4227 llvm::Value *KmpTaskTWithPrivatesTySize = 4228 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4229 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4230 4231 // Emit initial values for private copies (if any). 4232 llvm::Value *TaskPrivatesMap = nullptr; 4233 llvm::Type *TaskPrivatesMapTy = 4234 std::next(TaskFunction->arg_begin(), 3)->getType(); 4235 if (!Privates.empty()) { 4236 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4237 TaskPrivatesMap = 4238 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4239 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4240 TaskPrivatesMap, TaskPrivatesMapTy); 4241 } else { 4242 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4243 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4244 } 4245 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4246 // kmp_task_t *tt); 4247 llvm::Function *TaskEntry = emitProxyTaskFunction( 4248 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4249 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4250 TaskPrivatesMap); 4251 4252 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4253 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4254 // kmp_routine_entry_t *task_entry); 4255 // Task flags. Format is taken from 4256 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4257 // description of kmp_tasking_flags struct. 4258 enum { 4259 TiedFlag = 0x1, 4260 FinalFlag = 0x2, 4261 DestructorsFlag = 0x8, 4262 PriorityFlag = 0x20, 4263 DetachableFlag = 0x40, 4264 }; 4265 unsigned Flags = Data.Tied ? TiedFlag : 0; 4266 bool NeedsCleanup = false; 4267 if (!Privates.empty()) { 4268 NeedsCleanup = 4269 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4270 if (NeedsCleanup) 4271 Flags = Flags | DestructorsFlag; 4272 } 4273 if (Data.Priority.getInt()) 4274 Flags = Flags | PriorityFlag; 4275 if (D.hasClausesOfKind<OMPDetachClause>()) 4276 Flags = Flags | DetachableFlag; 4277 llvm::Value *TaskFlags = 4278 Data.Final.getPointer() 4279 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4280 CGF.Builder.getInt32(FinalFlag), 4281 CGF.Builder.getInt32(/*C=*/0)) 4282 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4283 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4284 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4285 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4286 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4287 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4288 TaskEntry, KmpRoutineEntryPtrTy)}; 4289 llvm::Value *NewTask; 4290 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4291 // Check if we have any device clause associated with the directive. 4292 const Expr *Device = nullptr; 4293 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4294 Device = C->getDevice(); 4295 // Emit device ID if any otherwise use default value. 4296 llvm::Value *DeviceID; 4297 if (Device) 4298 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4299 CGF.Int64Ty, /*isSigned=*/true); 4300 else 4301 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4302 AllocArgs.push_back(DeviceID); 4303 NewTask = CGF.EmitRuntimeCall( 4304 OMPBuilder.getOrCreateRuntimeFunction( 4305 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4306 AllocArgs); 4307 } else { 4308 NewTask = 4309 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4310 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4311 AllocArgs); 4312 } 4313 // Emit detach clause initialization. 4314 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4315 // task_descriptor); 4316 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4317 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4318 LValue EvtLVal = CGF.EmitLValue(Evt); 4319 4320 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4321 // int gtid, kmp_task_t *task); 4322 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4323 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4324 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4325 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4326 OMPBuilder.getOrCreateRuntimeFunction( 4327 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4328 {Loc, Tid, NewTask}); 4329 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4330 Evt->getExprLoc()); 4331 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4332 } 4333 // Process affinity clauses. 4334 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4335 // Process list of affinity data. 4336 ASTContext &C = CGM.getContext(); 4337 Address AffinitiesArray = Address::invalid(); 4338 // Calculate number of elements to form the array of affinity data. 4339 llvm::Value *NumOfElements = nullptr; 4340 unsigned NumAffinities = 0; 4341 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4342 if (const Expr *Modifier = C->getModifier()) { 4343 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4344 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4345 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4346 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4347 NumOfElements = 4348 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4349 } 4350 } else { 4351 NumAffinities += C->varlist_size(); 4352 } 4353 } 4354 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4355 // Fields ids in kmp_task_affinity_info record. 4356 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4357 4358 QualType KmpTaskAffinityInfoArrayTy; 4359 if (NumOfElements) { 4360 NumOfElements = CGF.Builder.CreateNUWAdd( 4361 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4362 OpaqueValueExpr OVE( 4363 Loc, 4364 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4365 VK_RValue); 4366 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4367 RValue::get(NumOfElements)); 4368 KmpTaskAffinityInfoArrayTy = 4369 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4370 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4371 // Properly emit variable-sized array. 4372 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4373 ImplicitParamDecl::Other); 4374 CGF.EmitVarDecl(*PD); 4375 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4376 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4377 /*isSigned=*/false); 4378 } else { 4379 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4380 KmpTaskAffinityInfoTy, 4381 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4382 ArrayType::Normal, /*IndexTypeQuals=*/0); 4383 AffinitiesArray = 4384 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4385 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4386 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4387 /*isSigned=*/false); 4388 } 4389 4390 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4391 // Fill array by elements without iterators. 4392 unsigned Pos = 0; 4393 bool HasIterator = false; 4394 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4395 if (C->getModifier()) { 4396 HasIterator = true; 4397 continue; 4398 } 4399 for (const Expr *E : C->varlists()) { 4400 llvm::Value *Addr; 4401 llvm::Value *Size; 4402 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4403 LValue Base = 4404 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4405 KmpTaskAffinityInfoTy); 4406 // affs[i].base_addr = &<Affinities[i].second>; 4407 LValue BaseAddrLVal = CGF.EmitLValueForField( 4408 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4409 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4410 BaseAddrLVal); 4411 // affs[i].len = sizeof(<Affinities[i].second>); 4412 LValue LenLVal = CGF.EmitLValueForField( 4413 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4414 CGF.EmitStoreOfScalar(Size, LenLVal); 4415 ++Pos; 4416 } 4417 } 4418 LValue PosLVal; 4419 if (HasIterator) { 4420 PosLVal = CGF.MakeAddrLValue( 4421 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4422 C.getSizeType()); 4423 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4424 } 4425 // Process elements with iterators. 4426 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4427 const Expr *Modifier = C->getModifier(); 4428 if (!Modifier) 4429 continue; 4430 OMPIteratorGeneratorScope IteratorScope( 4431 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4432 for (const Expr *E : C->varlists()) { 4433 llvm::Value *Addr; 4434 llvm::Value *Size; 4435 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4436 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4437 LValue Base = CGF.MakeAddrLValue( 4438 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4439 AffinitiesArray.getAlignment()), 4440 KmpTaskAffinityInfoTy); 4441 // affs[i].base_addr = &<Affinities[i].second>; 4442 LValue BaseAddrLVal = CGF.EmitLValueForField( 4443 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4444 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4445 BaseAddrLVal); 4446 // affs[i].len = sizeof(<Affinities[i].second>); 4447 LValue LenLVal = CGF.EmitLValueForField( 4448 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4449 CGF.EmitStoreOfScalar(Size, LenLVal); 4450 Idx = CGF.Builder.CreateNUWAdd( 4451 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4452 CGF.EmitStoreOfScalar(Idx, PosLVal); 4453 } 4454 } 4455 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4456 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4457 // naffins, kmp_task_affinity_info_t *affin_list); 4458 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4459 llvm::Value *GTid = getThreadID(CGF, Loc); 4460 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4461 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4462 // FIXME: Emit the function and ignore its result for now unless the 4463 // runtime function is properly implemented. 4464 (void)CGF.EmitRuntimeCall( 4465 OMPBuilder.getOrCreateRuntimeFunction( 4466 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4467 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4468 } 4469 llvm::Value *NewTaskNewTaskTTy = 4470 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4471 NewTask, KmpTaskTWithPrivatesPtrTy); 4472 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4473 KmpTaskTWithPrivatesQTy); 4474 LValue TDBase = 4475 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4476 // Fill the data in the resulting kmp_task_t record. 4477 // Copy shareds if there are any. 4478 Address KmpTaskSharedsPtr = Address::invalid(); 4479 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4480 KmpTaskSharedsPtr = 4481 Address(CGF.EmitLoadOfScalar( 4482 CGF.EmitLValueForField( 4483 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4484 KmpTaskTShareds)), 4485 Loc), 4486 CGM.getNaturalTypeAlignment(SharedsTy)); 4487 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4488 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4489 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4490 } 4491 // Emit initial values for private copies (if any). 4492 TaskResultTy Result; 4493 if (!Privates.empty()) { 4494 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4495 SharedsTy, SharedsPtrTy, Data, Privates, 4496 /*ForDup=*/false); 4497 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4498 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4499 Result.TaskDupFn = emitTaskDupFunction( 4500 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4501 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4502 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4503 } 4504 } 4505 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4506 enum { Priority = 0, Destructors = 1 }; 4507 // Provide pointer to function with destructors for privates. 4508 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4509 const RecordDecl *KmpCmplrdataUD = 4510 (*FI)->getType()->getAsUnionType()->getDecl(); 4511 if (NeedsCleanup) { 4512 llvm::Value *DestructorFn = emitDestructorsFunction( 4513 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4514 KmpTaskTWithPrivatesQTy); 4515 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4516 LValue DestructorsLV = CGF.EmitLValueForField( 4517 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4518 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4519 DestructorFn, KmpRoutineEntryPtrTy), 4520 DestructorsLV); 4521 } 4522 // Set priority. 4523 if (Data.Priority.getInt()) { 4524 LValue Data2LV = CGF.EmitLValueForField( 4525 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4526 LValue PriorityLV = CGF.EmitLValueForField( 4527 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4528 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4529 } 4530 Result.NewTask = NewTask; 4531 Result.TaskEntry = TaskEntry; 4532 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4533 Result.TDBase = TDBase; 4534 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4535 return Result; 4536 } 4537 4538 namespace { 4539 /// Dependence kind for RTL. 4540 enum RTLDependenceKindTy { 4541 DepIn = 0x01, 4542 DepInOut = 0x3, 4543 DepMutexInOutSet = 0x4 4544 }; 4545 /// Fields ids in kmp_depend_info record. 4546 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4547 } // namespace 4548 4549 /// Translates internal dependency kind into the runtime kind. 4550 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4551 RTLDependenceKindTy DepKind; 4552 switch (K) { 4553 case OMPC_DEPEND_in: 4554 DepKind = DepIn; 4555 break; 4556 // Out and InOut dependencies must use the same code. 4557 case OMPC_DEPEND_out: 4558 case OMPC_DEPEND_inout: 4559 DepKind = DepInOut; 4560 break; 4561 case OMPC_DEPEND_mutexinoutset: 4562 DepKind = DepMutexInOutSet; 4563 break; 4564 case OMPC_DEPEND_source: 4565 case OMPC_DEPEND_sink: 4566 case OMPC_DEPEND_depobj: 4567 case OMPC_DEPEND_unknown: 4568 llvm_unreachable("Unknown task dependence type"); 4569 } 4570 return DepKind; 4571 } 4572 4573 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4574 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4575 QualType &FlagsTy) { 4576 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4577 if (KmpDependInfoTy.isNull()) { 4578 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4579 KmpDependInfoRD->startDefinition(); 4580 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4581 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4582 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4583 KmpDependInfoRD->completeDefinition(); 4584 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4585 } 4586 } 4587 4588 std::pair<llvm::Value *, LValue> 4589 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4590 SourceLocation Loc) { 4591 ASTContext &C = CGM.getContext(); 4592 QualType FlagsTy; 4593 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4594 RecordDecl *KmpDependInfoRD = 4595 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4596 LValue Base = CGF.EmitLoadOfPointerLValue( 4597 DepobjLVal.getAddress(CGF), 4598 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4599 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4600 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4601 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4602 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4603 Base.getTBAAInfo()); 4604 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4605 Addr.getPointer(), 4606 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4607 LValue NumDepsBase = CGF.MakeAddrLValue( 4608 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4609 Base.getBaseInfo(), Base.getTBAAInfo()); 4610 // NumDeps = deps[i].base_addr; 4611 LValue BaseAddrLVal = CGF.EmitLValueForField( 4612 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4613 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4614 return std::make_pair(NumDeps, Base); 4615 } 4616 4617 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4618 llvm::PointerUnion<unsigned *, LValue *> Pos, 4619 const OMPTaskDataTy::DependData &Data, 4620 Address DependenciesArray) { 4621 CodeGenModule &CGM = CGF.CGM; 4622 ASTContext &C = CGM.getContext(); 4623 QualType FlagsTy; 4624 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4625 RecordDecl *KmpDependInfoRD = 4626 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4627 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4628 4629 OMPIteratorGeneratorScope IteratorScope( 4630 CGF, cast_or_null<OMPIteratorExpr>( 4631 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4632 : nullptr)); 4633 for (const Expr *E : Data.DepExprs) { 4634 llvm::Value *Addr; 4635 llvm::Value *Size; 4636 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4637 LValue Base; 4638 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4639 Base = CGF.MakeAddrLValue( 4640 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4641 } else { 4642 LValue &PosLVal = *Pos.get<LValue *>(); 4643 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4644 Base = CGF.MakeAddrLValue( 4645 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4646 DependenciesArray.getAlignment()), 4647 KmpDependInfoTy); 4648 } 4649 // deps[i].base_addr = &<Dependencies[i].second>; 4650 LValue BaseAddrLVal = CGF.EmitLValueForField( 4651 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4652 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4653 BaseAddrLVal); 4654 // deps[i].len = sizeof(<Dependencies[i].second>); 4655 LValue LenLVal = CGF.EmitLValueForField( 4656 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4657 CGF.EmitStoreOfScalar(Size, LenLVal); 4658 // deps[i].flags = <Dependencies[i].first>; 4659 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4660 LValue FlagsLVal = CGF.EmitLValueForField( 4661 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4662 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4663 FlagsLVal); 4664 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4665 ++(*P); 4666 } else { 4667 LValue &PosLVal = *Pos.get<LValue *>(); 4668 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4669 Idx = CGF.Builder.CreateNUWAdd(Idx, 4670 llvm::ConstantInt::get(Idx->getType(), 1)); 4671 CGF.EmitStoreOfScalar(Idx, PosLVal); 4672 } 4673 } 4674 } 4675 4676 static SmallVector<llvm::Value *, 4> 4677 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4678 const OMPTaskDataTy::DependData &Data) { 4679 assert(Data.DepKind == OMPC_DEPEND_depobj && 4680 "Expected depobj dependecy kind."); 4681 SmallVector<llvm::Value *, 4> Sizes; 4682 SmallVector<LValue, 4> SizeLVals; 4683 ASTContext &C = CGF.getContext(); 4684 QualType FlagsTy; 4685 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4686 RecordDecl *KmpDependInfoRD = 4687 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4688 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4689 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4690 { 4691 OMPIteratorGeneratorScope IteratorScope( 4692 CGF, cast_or_null<OMPIteratorExpr>( 4693 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4694 : nullptr)); 4695 for (const Expr *E : Data.DepExprs) { 4696 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4697 LValue Base = CGF.EmitLoadOfPointerLValue( 4698 DepobjLVal.getAddress(CGF), 4699 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4700 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4701 Base.getAddress(CGF), KmpDependInfoPtrT); 4702 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4703 Base.getTBAAInfo()); 4704 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4705 Addr.getPointer(), 4706 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4707 LValue NumDepsBase = CGF.MakeAddrLValue( 4708 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4709 Base.getBaseInfo(), Base.getTBAAInfo()); 4710 // NumDeps = deps[i].base_addr; 4711 LValue BaseAddrLVal = CGF.EmitLValueForField( 4712 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4713 llvm::Value *NumDeps = 4714 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4715 LValue NumLVal = CGF.MakeAddrLValue( 4716 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4717 C.getUIntPtrType()); 4718 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4719 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4720 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4721 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4722 CGF.EmitStoreOfScalar(Add, NumLVal); 4723 SizeLVals.push_back(NumLVal); 4724 } 4725 } 4726 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4727 llvm::Value *Size = 4728 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4729 Sizes.push_back(Size); 4730 } 4731 return Sizes; 4732 } 4733 4734 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4735 LValue PosLVal, 4736 const OMPTaskDataTy::DependData &Data, 4737 Address DependenciesArray) { 4738 assert(Data.DepKind == OMPC_DEPEND_depobj && 4739 "Expected depobj dependecy kind."); 4740 ASTContext &C = CGF.getContext(); 4741 QualType FlagsTy; 4742 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4743 RecordDecl *KmpDependInfoRD = 4744 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4745 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4746 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4747 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4748 { 4749 OMPIteratorGeneratorScope IteratorScope( 4750 CGF, cast_or_null<OMPIteratorExpr>( 4751 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4752 : nullptr)); 4753 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4754 const Expr *E = Data.DepExprs[I]; 4755 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4756 LValue Base = CGF.EmitLoadOfPointerLValue( 4757 DepobjLVal.getAddress(CGF), 4758 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4759 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4760 Base.getAddress(CGF), KmpDependInfoPtrT); 4761 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4762 Base.getTBAAInfo()); 4763 4764 // Get number of elements in a single depobj. 4765 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4766 Addr.getPointer(), 4767 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4768 LValue NumDepsBase = CGF.MakeAddrLValue( 4769 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4770 Base.getBaseInfo(), Base.getTBAAInfo()); 4771 // NumDeps = deps[i].base_addr; 4772 LValue BaseAddrLVal = CGF.EmitLValueForField( 4773 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4774 llvm::Value *NumDeps = 4775 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4776 4777 // memcopy dependency data. 4778 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4779 ElSize, 4780 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4781 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4782 Address DepAddr = 4783 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4784 DependenciesArray.getAlignment()); 4785 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4786 4787 // Increase pos. 4788 // pos += size; 4789 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4790 CGF.EmitStoreOfScalar(Add, PosLVal); 4791 } 4792 } 4793 } 4794 4795 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4796 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4797 SourceLocation Loc) { 4798 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4799 return D.DepExprs.empty(); 4800 })) 4801 return std::make_pair(nullptr, Address::invalid()); 4802 // Process list of dependencies. 4803 ASTContext &C = CGM.getContext(); 4804 Address DependenciesArray = Address::invalid(); 4805 llvm::Value *NumOfElements = nullptr; 4806 unsigned NumDependencies = std::accumulate( 4807 Dependencies.begin(), Dependencies.end(), 0, 4808 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4809 return D.DepKind == OMPC_DEPEND_depobj 4810 ? V 4811 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4812 }); 4813 QualType FlagsTy; 4814 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4815 bool HasDepobjDeps = false; 4816 bool HasRegularWithIterators = false; 4817 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4818 llvm::Value *NumOfRegularWithIterators = 4819 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4820 // Calculate number of depobj dependecies and regular deps with the iterators. 4821 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4822 if (D.DepKind == OMPC_DEPEND_depobj) { 4823 SmallVector<llvm::Value *, 4> Sizes = 4824 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4825 for (llvm::Value *Size : Sizes) { 4826 NumOfDepobjElements = 4827 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4828 } 4829 HasDepobjDeps = true; 4830 continue; 4831 } 4832 // Include number of iterations, if any. 4833 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4834 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4835 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4836 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4837 NumOfRegularWithIterators = 4838 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4839 } 4840 HasRegularWithIterators = true; 4841 continue; 4842 } 4843 } 4844 4845 QualType KmpDependInfoArrayTy; 4846 if (HasDepobjDeps || HasRegularWithIterators) { 4847 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4848 /*isSigned=*/false); 4849 if (HasDepobjDeps) { 4850 NumOfElements = 4851 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4852 } 4853 if (HasRegularWithIterators) { 4854 NumOfElements = 4855 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4856 } 4857 OpaqueValueExpr OVE(Loc, 4858 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4859 VK_RValue); 4860 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4861 RValue::get(NumOfElements)); 4862 KmpDependInfoArrayTy = 4863 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4864 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4865 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4866 // Properly emit variable-sized array. 4867 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4868 ImplicitParamDecl::Other); 4869 CGF.EmitVarDecl(*PD); 4870 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4871 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4872 /*isSigned=*/false); 4873 } else { 4874 KmpDependInfoArrayTy = C.getConstantArrayType( 4875 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4876 ArrayType::Normal, /*IndexTypeQuals=*/0); 4877 DependenciesArray = 4878 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4879 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4880 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4881 /*isSigned=*/false); 4882 } 4883 unsigned Pos = 0; 4884 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4885 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4886 Dependencies[I].IteratorExpr) 4887 continue; 4888 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4889 DependenciesArray); 4890 } 4891 // Copy regular dependecies with iterators. 4892 LValue PosLVal = CGF.MakeAddrLValue( 4893 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4894 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4895 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4896 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4897 !Dependencies[I].IteratorExpr) 4898 continue; 4899 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4900 DependenciesArray); 4901 } 4902 // Copy final depobj arrays without iterators. 4903 if (HasDepobjDeps) { 4904 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4905 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4906 continue; 4907 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4908 DependenciesArray); 4909 } 4910 } 4911 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4912 DependenciesArray, CGF.VoidPtrTy); 4913 return std::make_pair(NumOfElements, DependenciesArray); 4914 } 4915 4916 Address CGOpenMPRuntime::emitDepobjDependClause( 4917 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4918 SourceLocation Loc) { 4919 if (Dependencies.DepExprs.empty()) 4920 return Address::invalid(); 4921 // Process list of dependencies. 4922 ASTContext &C = CGM.getContext(); 4923 Address DependenciesArray = Address::invalid(); 4924 unsigned NumDependencies = Dependencies.DepExprs.size(); 4925 QualType FlagsTy; 4926 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4927 RecordDecl *KmpDependInfoRD = 4928 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4929 4930 llvm::Value *Size; 4931 // Define type kmp_depend_info[<Dependencies.size()>]; 4932 // For depobj reserve one extra element to store the number of elements. 4933 // It is required to handle depobj(x) update(in) construct. 4934 // kmp_depend_info[<Dependencies.size()>] deps; 4935 llvm::Value *NumDepsVal; 4936 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4937 if (const auto *IE = 4938 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4939 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4940 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4941 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4942 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4943 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4944 } 4945 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4946 NumDepsVal); 4947 CharUnits SizeInBytes = 4948 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4949 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4950 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4951 NumDepsVal = 4952 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4953 } else { 4954 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4955 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4956 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4957 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4958 Size = CGM.getSize(Sz.alignTo(Align)); 4959 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4960 } 4961 // Need to allocate on the dynamic memory. 4962 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4963 // Use default allocator. 4964 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4965 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4966 4967 llvm::Value *Addr = 4968 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4969 CGM.getModule(), OMPRTL___kmpc_alloc), 4970 Args, ".dep.arr.addr"); 4971 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4972 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4973 DependenciesArray = Address(Addr, Align); 4974 // Write number of elements in the first element of array for depobj. 4975 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4976 // deps[i].base_addr = NumDependencies; 4977 LValue BaseAddrLVal = CGF.EmitLValueForField( 4978 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4979 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4980 llvm::PointerUnion<unsigned *, LValue *> Pos; 4981 unsigned Idx = 1; 4982 LValue PosLVal; 4983 if (Dependencies.IteratorExpr) { 4984 PosLVal = CGF.MakeAddrLValue( 4985 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4986 C.getSizeType()); 4987 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4988 /*IsInit=*/true); 4989 Pos = &PosLVal; 4990 } else { 4991 Pos = &Idx; 4992 } 4993 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4994 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4995 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4996 return DependenciesArray; 4997 } 4998 4999 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5000 SourceLocation Loc) { 5001 ASTContext &C = CGM.getContext(); 5002 QualType FlagsTy; 5003 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5004 LValue Base = CGF.EmitLoadOfPointerLValue( 5005 DepobjLVal.getAddress(CGF), 5006 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5007 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5008 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5009 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5010 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5011 Addr.getPointer(), 5012 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5013 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5014 CGF.VoidPtrTy); 5015 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5016 // Use default allocator. 5017 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5018 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5019 5020 // _kmpc_free(gtid, addr, nullptr); 5021 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5022 CGM.getModule(), OMPRTL___kmpc_free), 5023 Args); 5024 } 5025 5026 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5027 OpenMPDependClauseKind NewDepKind, 5028 SourceLocation Loc) { 5029 ASTContext &C = CGM.getContext(); 5030 QualType FlagsTy; 5031 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5032 RecordDecl *KmpDependInfoRD = 5033 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5034 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5035 llvm::Value *NumDeps; 5036 LValue Base; 5037 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5038 5039 Address Begin = Base.getAddress(CGF); 5040 // Cast from pointer to array type to pointer to single element. 5041 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5042 // The basic structure here is a while-do loop. 5043 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5044 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5045 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5046 CGF.EmitBlock(BodyBB); 5047 llvm::PHINode *ElementPHI = 5048 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5049 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5050 Begin = Address(ElementPHI, Begin.getAlignment()); 5051 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5052 Base.getTBAAInfo()); 5053 // deps[i].flags = NewDepKind; 5054 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5055 LValue FlagsLVal = CGF.EmitLValueForField( 5056 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5057 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5058 FlagsLVal); 5059 5060 // Shift the address forward by one element. 5061 Address ElementNext = 5062 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5063 ElementPHI->addIncoming(ElementNext.getPointer(), 5064 CGF.Builder.GetInsertBlock()); 5065 llvm::Value *IsEmpty = 5066 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5067 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5068 // Done. 5069 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5070 } 5071 5072 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5073 const OMPExecutableDirective &D, 5074 llvm::Function *TaskFunction, 5075 QualType SharedsTy, Address Shareds, 5076 const Expr *IfCond, 5077 const OMPTaskDataTy &Data) { 5078 if (!CGF.HaveInsertPoint()) 5079 return; 5080 5081 TaskResultTy Result = 5082 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5083 llvm::Value *NewTask = Result.NewTask; 5084 llvm::Function *TaskEntry = Result.TaskEntry; 5085 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5086 LValue TDBase = Result.TDBase; 5087 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5088 // Process list of dependences. 5089 Address DependenciesArray = Address::invalid(); 5090 llvm::Value *NumOfElements; 5091 std::tie(NumOfElements, DependenciesArray) = 5092 emitDependClause(CGF, Data.Dependences, Loc); 5093 5094 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5095 // libcall. 5096 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5097 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5098 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5099 // list is not empty 5100 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5101 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5102 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5103 llvm::Value *DepTaskArgs[7]; 5104 if (!Data.Dependences.empty()) { 5105 DepTaskArgs[0] = UpLoc; 5106 DepTaskArgs[1] = ThreadID; 5107 DepTaskArgs[2] = NewTask; 5108 DepTaskArgs[3] = NumOfElements; 5109 DepTaskArgs[4] = DependenciesArray.getPointer(); 5110 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5111 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5112 } 5113 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5114 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5115 if (!Data.Tied) { 5116 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5117 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5118 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5119 } 5120 if (!Data.Dependences.empty()) { 5121 CGF.EmitRuntimeCall( 5122 OMPBuilder.getOrCreateRuntimeFunction( 5123 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5124 DepTaskArgs); 5125 } else { 5126 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5127 CGM.getModule(), OMPRTL___kmpc_omp_task), 5128 TaskArgs); 5129 } 5130 // Check if parent region is untied and build return for untied task; 5131 if (auto *Region = 5132 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5133 Region->emitUntiedSwitch(CGF); 5134 }; 5135 5136 llvm::Value *DepWaitTaskArgs[6]; 5137 if (!Data.Dependences.empty()) { 5138 DepWaitTaskArgs[0] = UpLoc; 5139 DepWaitTaskArgs[1] = ThreadID; 5140 DepWaitTaskArgs[2] = NumOfElements; 5141 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5142 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5143 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5144 } 5145 auto &M = CGM.getModule(); 5146 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5147 TaskEntry, &Data, &DepWaitTaskArgs, 5148 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5149 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5150 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5151 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5152 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5153 // is specified. 5154 if (!Data.Dependences.empty()) 5155 CGF.EmitRuntimeCall( 5156 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5157 DepWaitTaskArgs); 5158 // Call proxy_task_entry(gtid, new_task); 5159 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5160 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5161 Action.Enter(CGF); 5162 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5163 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5164 OutlinedFnArgs); 5165 }; 5166 5167 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5168 // kmp_task_t *new_task); 5169 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5170 // kmp_task_t *new_task); 5171 RegionCodeGenTy RCG(CodeGen); 5172 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5173 M, OMPRTL___kmpc_omp_task_begin_if0), 5174 TaskArgs, 5175 OMPBuilder.getOrCreateRuntimeFunction( 5176 M, OMPRTL___kmpc_omp_task_complete_if0), 5177 TaskArgs); 5178 RCG.setAction(Action); 5179 RCG(CGF); 5180 }; 5181 5182 if (IfCond) { 5183 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5184 } else { 5185 RegionCodeGenTy ThenRCG(ThenCodeGen); 5186 ThenRCG(CGF); 5187 } 5188 } 5189 5190 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5191 const OMPLoopDirective &D, 5192 llvm::Function *TaskFunction, 5193 QualType SharedsTy, Address Shareds, 5194 const Expr *IfCond, 5195 const OMPTaskDataTy &Data) { 5196 if (!CGF.HaveInsertPoint()) 5197 return; 5198 TaskResultTy Result = 5199 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5200 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5201 // libcall. 5202 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5203 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5204 // sched, kmp_uint64 grainsize, void *task_dup); 5205 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5206 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5207 llvm::Value *IfVal; 5208 if (IfCond) { 5209 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5210 /*isSigned=*/true); 5211 } else { 5212 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5213 } 5214 5215 LValue LBLVal = CGF.EmitLValueForField( 5216 Result.TDBase, 5217 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5218 const auto *LBVar = 5219 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5220 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5221 LBLVal.getQuals(), 5222 /*IsInitializer=*/true); 5223 LValue UBLVal = CGF.EmitLValueForField( 5224 Result.TDBase, 5225 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5226 const auto *UBVar = 5227 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5228 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5229 UBLVal.getQuals(), 5230 /*IsInitializer=*/true); 5231 LValue StLVal = CGF.EmitLValueForField( 5232 Result.TDBase, 5233 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5234 const auto *StVar = 5235 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5236 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5237 StLVal.getQuals(), 5238 /*IsInitializer=*/true); 5239 // Store reductions address. 5240 LValue RedLVal = CGF.EmitLValueForField( 5241 Result.TDBase, 5242 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5243 if (Data.Reductions) { 5244 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5245 } else { 5246 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5247 CGF.getContext().VoidPtrTy); 5248 } 5249 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5250 llvm::Value *TaskArgs[] = { 5251 UpLoc, 5252 ThreadID, 5253 Result.NewTask, 5254 IfVal, 5255 LBLVal.getPointer(CGF), 5256 UBLVal.getPointer(CGF), 5257 CGF.EmitLoadOfScalar(StLVal, Loc), 5258 llvm::ConstantInt::getSigned( 5259 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5260 llvm::ConstantInt::getSigned( 5261 CGF.IntTy, Data.Schedule.getPointer() 5262 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5263 : NoSchedule), 5264 Data.Schedule.getPointer() 5265 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5266 /*isSigned=*/false) 5267 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5268 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5269 Result.TaskDupFn, CGF.VoidPtrTy) 5270 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5271 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5272 CGM.getModule(), OMPRTL___kmpc_taskloop), 5273 TaskArgs); 5274 } 5275 5276 /// Emit reduction operation for each element of array (required for 5277 /// array sections) LHS op = RHS. 5278 /// \param Type Type of array. 5279 /// \param LHSVar Variable on the left side of the reduction operation 5280 /// (references element of array in original variable). 5281 /// \param RHSVar Variable on the right side of the reduction operation 5282 /// (references element of array in original variable). 5283 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5284 /// RHSVar. 5285 static void EmitOMPAggregateReduction( 5286 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5287 const VarDecl *RHSVar, 5288 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5289 const Expr *, const Expr *)> &RedOpGen, 5290 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5291 const Expr *UpExpr = nullptr) { 5292 // Perform element-by-element initialization. 5293 QualType ElementTy; 5294 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5295 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5296 5297 // Drill down to the base element type on both arrays. 5298 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5299 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5300 5301 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5302 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5303 // Cast from pointer to array type to pointer to single element. 5304 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5305 // The basic structure here is a while-do loop. 5306 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5307 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5308 llvm::Value *IsEmpty = 5309 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5310 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5311 5312 // Enter the loop body, making that address the current address. 5313 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5314 CGF.EmitBlock(BodyBB); 5315 5316 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5317 5318 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5319 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5320 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5321 Address RHSElementCurrent = 5322 Address(RHSElementPHI, 5323 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5324 5325 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5326 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5327 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5328 Address LHSElementCurrent = 5329 Address(LHSElementPHI, 5330 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5331 5332 // Emit copy. 5333 CodeGenFunction::OMPPrivateScope Scope(CGF); 5334 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5335 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5336 Scope.Privatize(); 5337 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5338 Scope.ForceCleanup(); 5339 5340 // Shift the address forward by one element. 5341 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5342 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5343 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5344 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5345 // Check whether we've reached the end. 5346 llvm::Value *Done = 5347 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5348 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5349 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5350 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5351 5352 // Done. 5353 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5354 } 5355 5356 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5357 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5358 /// UDR combiner function. 5359 static void emitReductionCombiner(CodeGenFunction &CGF, 5360 const Expr *ReductionOp) { 5361 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5362 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5363 if (const auto *DRE = 5364 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5365 if (const auto *DRD = 5366 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5367 std::pair<llvm::Function *, llvm::Function *> Reduction = 5368 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5369 RValue Func = RValue::get(Reduction.first); 5370 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5371 CGF.EmitIgnoredExpr(ReductionOp); 5372 return; 5373 } 5374 CGF.EmitIgnoredExpr(ReductionOp); 5375 } 5376 5377 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5378 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5379 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5380 ArrayRef<const Expr *> ReductionOps) { 5381 ASTContext &C = CGM.getContext(); 5382 5383 // void reduction_func(void *LHSArg, void *RHSArg); 5384 FunctionArgList Args; 5385 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5386 ImplicitParamDecl::Other); 5387 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5388 ImplicitParamDecl::Other); 5389 Args.push_back(&LHSArg); 5390 Args.push_back(&RHSArg); 5391 const auto &CGFI = 5392 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5393 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5395 llvm::GlobalValue::InternalLinkage, Name, 5396 &CGM.getModule()); 5397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5398 Fn->setDoesNotRecurse(); 5399 CodeGenFunction CGF(CGM); 5400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5401 5402 // Dst = (void*[n])(LHSArg); 5403 // Src = (void*[n])(RHSArg); 5404 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5405 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5406 ArgsType), CGF.getPointerAlign()); 5407 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5408 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5409 ArgsType), CGF.getPointerAlign()); 5410 5411 // ... 5412 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5413 // ... 5414 CodeGenFunction::OMPPrivateScope Scope(CGF); 5415 auto IPriv = Privates.begin(); 5416 unsigned Idx = 0; 5417 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5418 const auto *RHSVar = 5419 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5420 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5421 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5422 }); 5423 const auto *LHSVar = 5424 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5425 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5426 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5427 }); 5428 QualType PrivTy = (*IPriv)->getType(); 5429 if (PrivTy->isVariablyModifiedType()) { 5430 // Get array size and emit VLA type. 5431 ++Idx; 5432 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5433 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5434 const VariableArrayType *VLA = 5435 CGF.getContext().getAsVariableArrayType(PrivTy); 5436 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5437 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5438 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5439 CGF.EmitVariablyModifiedType(PrivTy); 5440 } 5441 } 5442 Scope.Privatize(); 5443 IPriv = Privates.begin(); 5444 auto ILHS = LHSExprs.begin(); 5445 auto IRHS = RHSExprs.begin(); 5446 for (const Expr *E : ReductionOps) { 5447 if ((*IPriv)->getType()->isArrayType()) { 5448 // Emit reduction for array section. 5449 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5450 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5451 EmitOMPAggregateReduction( 5452 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5453 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5454 emitReductionCombiner(CGF, E); 5455 }); 5456 } else { 5457 // Emit reduction for array subscript or single variable. 5458 emitReductionCombiner(CGF, E); 5459 } 5460 ++IPriv; 5461 ++ILHS; 5462 ++IRHS; 5463 } 5464 Scope.ForceCleanup(); 5465 CGF.FinishFunction(); 5466 return Fn; 5467 } 5468 5469 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5470 const Expr *ReductionOp, 5471 const Expr *PrivateRef, 5472 const DeclRefExpr *LHS, 5473 const DeclRefExpr *RHS) { 5474 if (PrivateRef->getType()->isArrayType()) { 5475 // Emit reduction for array section. 5476 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5477 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5478 EmitOMPAggregateReduction( 5479 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5480 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5481 emitReductionCombiner(CGF, ReductionOp); 5482 }); 5483 } else { 5484 // Emit reduction for array subscript or single variable. 5485 emitReductionCombiner(CGF, ReductionOp); 5486 } 5487 } 5488 5489 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5490 ArrayRef<const Expr *> Privates, 5491 ArrayRef<const Expr *> LHSExprs, 5492 ArrayRef<const Expr *> RHSExprs, 5493 ArrayRef<const Expr *> ReductionOps, 5494 ReductionOptionsTy Options) { 5495 if (!CGF.HaveInsertPoint()) 5496 return; 5497 5498 bool WithNowait = Options.WithNowait; 5499 bool SimpleReduction = Options.SimpleReduction; 5500 5501 // Next code should be emitted for reduction: 5502 // 5503 // static kmp_critical_name lock = { 0 }; 5504 // 5505 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5506 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5507 // ... 5508 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5509 // *(Type<n>-1*)rhs[<n>-1]); 5510 // } 5511 // 5512 // ... 5513 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5514 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5515 // RedList, reduce_func, &<lock>)) { 5516 // case 1: 5517 // ... 5518 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5519 // ... 5520 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5521 // break; 5522 // case 2: 5523 // ... 5524 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5525 // ... 5526 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5527 // break; 5528 // default:; 5529 // } 5530 // 5531 // if SimpleReduction is true, only the next code is generated: 5532 // ... 5533 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5534 // ... 5535 5536 ASTContext &C = CGM.getContext(); 5537 5538 if (SimpleReduction) { 5539 CodeGenFunction::RunCleanupsScope Scope(CGF); 5540 auto IPriv = Privates.begin(); 5541 auto ILHS = LHSExprs.begin(); 5542 auto IRHS = RHSExprs.begin(); 5543 for (const Expr *E : ReductionOps) { 5544 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5545 cast<DeclRefExpr>(*IRHS)); 5546 ++IPriv; 5547 ++ILHS; 5548 ++IRHS; 5549 } 5550 return; 5551 } 5552 5553 // 1. Build a list of reduction variables. 5554 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5555 auto Size = RHSExprs.size(); 5556 for (const Expr *E : Privates) { 5557 if (E->getType()->isVariablyModifiedType()) 5558 // Reserve place for array size. 5559 ++Size; 5560 } 5561 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5562 QualType ReductionArrayTy = 5563 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5564 /*IndexTypeQuals=*/0); 5565 Address ReductionList = 5566 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5567 auto IPriv = Privates.begin(); 5568 unsigned Idx = 0; 5569 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5570 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5571 CGF.Builder.CreateStore( 5572 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5573 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5574 Elem); 5575 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5576 // Store array size. 5577 ++Idx; 5578 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5579 llvm::Value *Size = CGF.Builder.CreateIntCast( 5580 CGF.getVLASize( 5581 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5582 .NumElts, 5583 CGF.SizeTy, /*isSigned=*/false); 5584 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5585 Elem); 5586 } 5587 } 5588 5589 // 2. Emit reduce_func(). 5590 llvm::Function *ReductionFn = emitReductionFunction( 5591 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5592 LHSExprs, RHSExprs, ReductionOps); 5593 5594 // 3. Create static kmp_critical_name lock = { 0 }; 5595 std::string Name = getName({"reduction"}); 5596 llvm::Value *Lock = getCriticalRegionLock(Name); 5597 5598 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5599 // RedList, reduce_func, &<lock>); 5600 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5601 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5602 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5603 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5604 ReductionList.getPointer(), CGF.VoidPtrTy); 5605 llvm::Value *Args[] = { 5606 IdentTLoc, // ident_t *<loc> 5607 ThreadId, // i32 <gtid> 5608 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5609 ReductionArrayTySize, // size_type sizeof(RedList) 5610 RL, // void *RedList 5611 ReductionFn, // void (*) (void *, void *) <reduce_func> 5612 Lock // kmp_critical_name *&<lock> 5613 }; 5614 llvm::Value *Res = CGF.EmitRuntimeCall( 5615 OMPBuilder.getOrCreateRuntimeFunction( 5616 CGM.getModule(), 5617 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5618 Args); 5619 5620 // 5. Build switch(res) 5621 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5622 llvm::SwitchInst *SwInst = 5623 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5624 5625 // 6. Build case 1: 5626 // ... 5627 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5628 // ... 5629 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5630 // break; 5631 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5632 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5633 CGF.EmitBlock(Case1BB); 5634 5635 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5636 llvm::Value *EndArgs[] = { 5637 IdentTLoc, // ident_t *<loc> 5638 ThreadId, // i32 <gtid> 5639 Lock // kmp_critical_name *&<lock> 5640 }; 5641 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5642 CodeGenFunction &CGF, PrePostActionTy &Action) { 5643 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5644 auto IPriv = Privates.begin(); 5645 auto ILHS = LHSExprs.begin(); 5646 auto IRHS = RHSExprs.begin(); 5647 for (const Expr *E : ReductionOps) { 5648 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5649 cast<DeclRefExpr>(*IRHS)); 5650 ++IPriv; 5651 ++ILHS; 5652 ++IRHS; 5653 } 5654 }; 5655 RegionCodeGenTy RCG(CodeGen); 5656 CommonActionTy Action( 5657 nullptr, llvm::None, 5658 OMPBuilder.getOrCreateRuntimeFunction( 5659 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5660 : OMPRTL___kmpc_end_reduce), 5661 EndArgs); 5662 RCG.setAction(Action); 5663 RCG(CGF); 5664 5665 CGF.EmitBranch(DefaultBB); 5666 5667 // 7. Build case 2: 5668 // ... 5669 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5670 // ... 5671 // break; 5672 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5673 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5674 CGF.EmitBlock(Case2BB); 5675 5676 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5677 CodeGenFunction &CGF, PrePostActionTy &Action) { 5678 auto ILHS = LHSExprs.begin(); 5679 auto IRHS = RHSExprs.begin(); 5680 auto IPriv = Privates.begin(); 5681 for (const Expr *E : ReductionOps) { 5682 const Expr *XExpr = nullptr; 5683 const Expr *EExpr = nullptr; 5684 const Expr *UpExpr = nullptr; 5685 BinaryOperatorKind BO = BO_Comma; 5686 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5687 if (BO->getOpcode() == BO_Assign) { 5688 XExpr = BO->getLHS(); 5689 UpExpr = BO->getRHS(); 5690 } 5691 } 5692 // Try to emit update expression as a simple atomic. 5693 const Expr *RHSExpr = UpExpr; 5694 if (RHSExpr) { 5695 // Analyze RHS part of the whole expression. 5696 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5697 RHSExpr->IgnoreParenImpCasts())) { 5698 // If this is a conditional operator, analyze its condition for 5699 // min/max reduction operator. 5700 RHSExpr = ACO->getCond(); 5701 } 5702 if (const auto *BORHS = 5703 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5704 EExpr = BORHS->getRHS(); 5705 BO = BORHS->getOpcode(); 5706 } 5707 } 5708 if (XExpr) { 5709 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5710 auto &&AtomicRedGen = [BO, VD, 5711 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5712 const Expr *EExpr, const Expr *UpExpr) { 5713 LValue X = CGF.EmitLValue(XExpr); 5714 RValue E; 5715 if (EExpr) 5716 E = CGF.EmitAnyExpr(EExpr); 5717 CGF.EmitOMPAtomicSimpleUpdateExpr( 5718 X, E, BO, /*IsXLHSInRHSPart=*/true, 5719 llvm::AtomicOrdering::Monotonic, Loc, 5720 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5721 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5722 PrivateScope.addPrivate( 5723 VD, [&CGF, VD, XRValue, Loc]() { 5724 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5725 CGF.emitOMPSimpleStore( 5726 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5727 VD->getType().getNonReferenceType(), Loc); 5728 return LHSTemp; 5729 }); 5730 (void)PrivateScope.Privatize(); 5731 return CGF.EmitAnyExpr(UpExpr); 5732 }); 5733 }; 5734 if ((*IPriv)->getType()->isArrayType()) { 5735 // Emit atomic reduction for array section. 5736 const auto *RHSVar = 5737 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5738 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5739 AtomicRedGen, XExpr, EExpr, UpExpr); 5740 } else { 5741 // Emit atomic reduction for array subscript or single variable. 5742 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5743 } 5744 } else { 5745 // Emit as a critical region. 5746 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5747 const Expr *, const Expr *) { 5748 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5749 std::string Name = RT.getName({"atomic_reduction"}); 5750 RT.emitCriticalRegion( 5751 CGF, Name, 5752 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5753 Action.Enter(CGF); 5754 emitReductionCombiner(CGF, E); 5755 }, 5756 Loc); 5757 }; 5758 if ((*IPriv)->getType()->isArrayType()) { 5759 const auto *LHSVar = 5760 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5761 const auto *RHSVar = 5762 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5763 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5764 CritRedGen); 5765 } else { 5766 CritRedGen(CGF, nullptr, nullptr, nullptr); 5767 } 5768 } 5769 ++ILHS; 5770 ++IRHS; 5771 ++IPriv; 5772 } 5773 }; 5774 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5775 if (!WithNowait) { 5776 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5777 llvm::Value *EndArgs[] = { 5778 IdentTLoc, // ident_t *<loc> 5779 ThreadId, // i32 <gtid> 5780 Lock // kmp_critical_name *&<lock> 5781 }; 5782 CommonActionTy Action(nullptr, llvm::None, 5783 OMPBuilder.getOrCreateRuntimeFunction( 5784 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5785 EndArgs); 5786 AtomicRCG.setAction(Action); 5787 AtomicRCG(CGF); 5788 } else { 5789 AtomicRCG(CGF); 5790 } 5791 5792 CGF.EmitBranch(DefaultBB); 5793 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5794 } 5795 5796 /// Generates unique name for artificial threadprivate variables. 5797 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5798 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5799 const Expr *Ref) { 5800 SmallString<256> Buffer; 5801 llvm::raw_svector_ostream Out(Buffer); 5802 const clang::DeclRefExpr *DE; 5803 const VarDecl *D = ::getBaseDecl(Ref, DE); 5804 if (!D) 5805 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5806 D = D->getCanonicalDecl(); 5807 std::string Name = CGM.getOpenMPRuntime().getName( 5808 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5809 Out << Prefix << Name << "_" 5810 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5811 return std::string(Out.str()); 5812 } 5813 5814 /// Emits reduction initializer function: 5815 /// \code 5816 /// void @.red_init(void* %arg, void* %orig) { 5817 /// %0 = bitcast void* %arg to <type>* 5818 /// store <type> <init>, <type>* %0 5819 /// ret void 5820 /// } 5821 /// \endcode 5822 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5823 SourceLocation Loc, 5824 ReductionCodeGen &RCG, unsigned N) { 5825 ASTContext &C = CGM.getContext(); 5826 QualType VoidPtrTy = C.VoidPtrTy; 5827 VoidPtrTy.addRestrict(); 5828 FunctionArgList Args; 5829 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5830 ImplicitParamDecl::Other); 5831 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5832 ImplicitParamDecl::Other); 5833 Args.emplace_back(&Param); 5834 Args.emplace_back(&ParamOrig); 5835 const auto &FnInfo = 5836 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5837 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5838 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5839 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5840 Name, &CGM.getModule()); 5841 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5842 Fn->setDoesNotRecurse(); 5843 CodeGenFunction CGF(CGM); 5844 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5845 Address PrivateAddr = CGF.EmitLoadOfPointer( 5846 CGF.GetAddrOfLocalVar(&Param), 5847 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5848 llvm::Value *Size = nullptr; 5849 // If the size of the reduction item is non-constant, load it from global 5850 // threadprivate variable. 5851 if (RCG.getSizes(N).second) { 5852 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5853 CGF, CGM.getContext().getSizeType(), 5854 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5855 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5856 CGM.getContext().getSizeType(), Loc); 5857 } 5858 RCG.emitAggregateType(CGF, N, Size); 5859 LValue OrigLVal; 5860 // If initializer uses initializer from declare reduction construct, emit a 5861 // pointer to the address of the original reduction item (reuired by reduction 5862 // initializer) 5863 if (RCG.usesReductionInitializer(N)) { 5864 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5865 SharedAddr = CGF.EmitLoadOfPointer( 5866 SharedAddr, 5867 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5868 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5869 } else { 5870 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5871 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5872 CGM.getContext().VoidPtrTy); 5873 } 5874 // Emit the initializer: 5875 // %0 = bitcast void* %arg to <type>* 5876 // store <type> <init>, <type>* %0 5877 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5878 [](CodeGenFunction &) { return false; }); 5879 CGF.FinishFunction(); 5880 return Fn; 5881 } 5882 5883 /// Emits reduction combiner function: 5884 /// \code 5885 /// void @.red_comb(void* %arg0, void* %arg1) { 5886 /// %lhs = bitcast void* %arg0 to <type>* 5887 /// %rhs = bitcast void* %arg1 to <type>* 5888 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5889 /// store <type> %2, <type>* %lhs 5890 /// ret void 5891 /// } 5892 /// \endcode 5893 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5894 SourceLocation Loc, 5895 ReductionCodeGen &RCG, unsigned N, 5896 const Expr *ReductionOp, 5897 const Expr *LHS, const Expr *RHS, 5898 const Expr *PrivateRef) { 5899 ASTContext &C = CGM.getContext(); 5900 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5901 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5902 FunctionArgList Args; 5903 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5904 C.VoidPtrTy, ImplicitParamDecl::Other); 5905 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5906 ImplicitParamDecl::Other); 5907 Args.emplace_back(&ParamInOut); 5908 Args.emplace_back(&ParamIn); 5909 const auto &FnInfo = 5910 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5911 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5912 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5913 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5914 Name, &CGM.getModule()); 5915 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5916 Fn->setDoesNotRecurse(); 5917 CodeGenFunction CGF(CGM); 5918 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5919 llvm::Value *Size = nullptr; 5920 // If the size of the reduction item is non-constant, load it from global 5921 // threadprivate variable. 5922 if (RCG.getSizes(N).second) { 5923 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5924 CGF, CGM.getContext().getSizeType(), 5925 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5926 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5927 CGM.getContext().getSizeType(), Loc); 5928 } 5929 RCG.emitAggregateType(CGF, N, Size); 5930 // Remap lhs and rhs variables to the addresses of the function arguments. 5931 // %lhs = bitcast void* %arg0 to <type>* 5932 // %rhs = bitcast void* %arg1 to <type>* 5933 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5934 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5935 // Pull out the pointer to the variable. 5936 Address PtrAddr = CGF.EmitLoadOfPointer( 5937 CGF.GetAddrOfLocalVar(&ParamInOut), 5938 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5939 return CGF.Builder.CreateElementBitCast( 5940 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5941 }); 5942 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5943 // Pull out the pointer to the variable. 5944 Address PtrAddr = CGF.EmitLoadOfPointer( 5945 CGF.GetAddrOfLocalVar(&ParamIn), 5946 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5947 return CGF.Builder.CreateElementBitCast( 5948 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5949 }); 5950 PrivateScope.Privatize(); 5951 // Emit the combiner body: 5952 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5953 // store <type> %2, <type>* %lhs 5954 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5955 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5956 cast<DeclRefExpr>(RHS)); 5957 CGF.FinishFunction(); 5958 return Fn; 5959 } 5960 5961 /// Emits reduction finalizer function: 5962 /// \code 5963 /// void @.red_fini(void* %arg) { 5964 /// %0 = bitcast void* %arg to <type>* 5965 /// <destroy>(<type>* %0) 5966 /// ret void 5967 /// } 5968 /// \endcode 5969 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5970 SourceLocation Loc, 5971 ReductionCodeGen &RCG, unsigned N) { 5972 if (!RCG.needCleanups(N)) 5973 return nullptr; 5974 ASTContext &C = CGM.getContext(); 5975 FunctionArgList Args; 5976 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5977 ImplicitParamDecl::Other); 5978 Args.emplace_back(&Param); 5979 const auto &FnInfo = 5980 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5981 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5982 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5983 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5984 Name, &CGM.getModule()); 5985 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5986 Fn->setDoesNotRecurse(); 5987 CodeGenFunction CGF(CGM); 5988 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5989 Address PrivateAddr = CGF.EmitLoadOfPointer( 5990 CGF.GetAddrOfLocalVar(&Param), 5991 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5992 llvm::Value *Size = nullptr; 5993 // If the size of the reduction item is non-constant, load it from global 5994 // threadprivate variable. 5995 if (RCG.getSizes(N).second) { 5996 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5997 CGF, CGM.getContext().getSizeType(), 5998 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5999 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6000 CGM.getContext().getSizeType(), Loc); 6001 } 6002 RCG.emitAggregateType(CGF, N, Size); 6003 // Emit the finalizer body: 6004 // <destroy>(<type>* %0) 6005 RCG.emitCleanups(CGF, N, PrivateAddr); 6006 CGF.FinishFunction(Loc); 6007 return Fn; 6008 } 6009 6010 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6011 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6012 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6013 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6014 return nullptr; 6015 6016 // Build typedef struct: 6017 // kmp_taskred_input { 6018 // void *reduce_shar; // shared reduction item 6019 // void *reduce_orig; // original reduction item used for initialization 6020 // size_t reduce_size; // size of data item 6021 // void *reduce_init; // data initialization routine 6022 // void *reduce_fini; // data finalization routine 6023 // void *reduce_comb; // data combiner routine 6024 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6025 // } kmp_taskred_input_t; 6026 ASTContext &C = CGM.getContext(); 6027 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6028 RD->startDefinition(); 6029 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6030 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6031 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6032 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6033 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6034 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6035 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6036 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6037 RD->completeDefinition(); 6038 QualType RDType = C.getRecordType(RD); 6039 unsigned Size = Data.ReductionVars.size(); 6040 llvm::APInt ArraySize(/*numBits=*/64, Size); 6041 QualType ArrayRDType = C.getConstantArrayType( 6042 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6043 // kmp_task_red_input_t .rd_input.[Size]; 6044 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6045 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6046 Data.ReductionCopies, Data.ReductionOps); 6047 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6048 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6049 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6050 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6051 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6052 TaskRedInput.getPointer(), Idxs, 6053 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6054 ".rd_input.gep."); 6055 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6056 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6057 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6058 RCG.emitSharedOrigLValue(CGF, Cnt); 6059 llvm::Value *CastedShared = 6060 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6061 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6062 // ElemLVal.reduce_orig = &Origs[Cnt]; 6063 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6064 llvm::Value *CastedOrig = 6065 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6066 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6067 RCG.emitAggregateType(CGF, Cnt); 6068 llvm::Value *SizeValInChars; 6069 llvm::Value *SizeVal; 6070 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6071 // We use delayed creation/initialization for VLAs and array sections. It is 6072 // required because runtime does not provide the way to pass the sizes of 6073 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6074 // threadprivate global variables are used to store these values and use 6075 // them in the functions. 6076 bool DelayedCreation = !!SizeVal; 6077 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6078 /*isSigned=*/false); 6079 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6080 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6081 // ElemLVal.reduce_init = init; 6082 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6083 llvm::Value *InitAddr = 6084 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6085 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6086 // ElemLVal.reduce_fini = fini; 6087 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6088 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6089 llvm::Value *FiniAddr = Fini 6090 ? CGF.EmitCastToVoidPtr(Fini) 6091 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6092 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6093 // ElemLVal.reduce_comb = comb; 6094 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6095 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6096 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6097 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6098 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6099 // ElemLVal.flags = 0; 6100 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6101 if (DelayedCreation) { 6102 CGF.EmitStoreOfScalar( 6103 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6104 FlagsLVal); 6105 } else 6106 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6107 FlagsLVal.getType()); 6108 } 6109 if (Data.IsReductionWithTaskMod) { 6110 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6111 // is_ws, int num, void *data); 6112 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6113 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6114 CGM.IntTy, /*isSigned=*/true); 6115 llvm::Value *Args[] = { 6116 IdentTLoc, GTid, 6117 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6118 /*isSigned=*/true), 6119 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6120 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6121 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6122 return CGF.EmitRuntimeCall( 6123 OMPBuilder.getOrCreateRuntimeFunction( 6124 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6125 Args); 6126 } 6127 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6128 llvm::Value *Args[] = { 6129 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6130 /*isSigned=*/true), 6131 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6132 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6133 CGM.VoidPtrTy)}; 6134 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6135 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6136 Args); 6137 } 6138 6139 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6140 SourceLocation Loc, 6141 bool IsWorksharingReduction) { 6142 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6143 // is_ws, int num, void *data); 6144 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6145 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6146 CGM.IntTy, /*isSigned=*/true); 6147 llvm::Value *Args[] = {IdentTLoc, GTid, 6148 llvm::ConstantInt::get(CGM.IntTy, 6149 IsWorksharingReduction ? 1 : 0, 6150 /*isSigned=*/true)}; 6151 (void)CGF.EmitRuntimeCall( 6152 OMPBuilder.getOrCreateRuntimeFunction( 6153 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6154 Args); 6155 } 6156 6157 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6158 SourceLocation Loc, 6159 ReductionCodeGen &RCG, 6160 unsigned N) { 6161 auto Sizes = RCG.getSizes(N); 6162 // Emit threadprivate global variable if the type is non-constant 6163 // (Sizes.second = nullptr). 6164 if (Sizes.second) { 6165 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6166 /*isSigned=*/false); 6167 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6168 CGF, CGM.getContext().getSizeType(), 6169 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6170 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6171 } 6172 } 6173 6174 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6175 SourceLocation Loc, 6176 llvm::Value *ReductionsPtr, 6177 LValue SharedLVal) { 6178 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6179 // *d); 6180 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6181 CGM.IntTy, 6182 /*isSigned=*/true), 6183 ReductionsPtr, 6184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6185 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6186 return Address( 6187 CGF.EmitRuntimeCall( 6188 OMPBuilder.getOrCreateRuntimeFunction( 6189 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6190 Args), 6191 SharedLVal.getAlignment()); 6192 } 6193 6194 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6195 SourceLocation Loc) { 6196 if (!CGF.HaveInsertPoint()) 6197 return; 6198 6199 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6200 OMPBuilder.createTaskwait(CGF.Builder); 6201 } else { 6202 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6203 // global_tid); 6204 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6205 // Ignore return result until untied tasks are supported. 6206 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6207 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6208 Args); 6209 } 6210 6211 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6212 Region->emitUntiedSwitch(CGF); 6213 } 6214 6215 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6216 OpenMPDirectiveKind InnerKind, 6217 const RegionCodeGenTy &CodeGen, 6218 bool HasCancel) { 6219 if (!CGF.HaveInsertPoint()) 6220 return; 6221 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6222 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6223 } 6224 6225 namespace { 6226 enum RTCancelKind { 6227 CancelNoreq = 0, 6228 CancelParallel = 1, 6229 CancelLoop = 2, 6230 CancelSections = 3, 6231 CancelTaskgroup = 4 6232 }; 6233 } // anonymous namespace 6234 6235 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6236 RTCancelKind CancelKind = CancelNoreq; 6237 if (CancelRegion == OMPD_parallel) 6238 CancelKind = CancelParallel; 6239 else if (CancelRegion == OMPD_for) 6240 CancelKind = CancelLoop; 6241 else if (CancelRegion == OMPD_sections) 6242 CancelKind = CancelSections; 6243 else { 6244 assert(CancelRegion == OMPD_taskgroup); 6245 CancelKind = CancelTaskgroup; 6246 } 6247 return CancelKind; 6248 } 6249 6250 void CGOpenMPRuntime::emitCancellationPointCall( 6251 CodeGenFunction &CGF, SourceLocation Loc, 6252 OpenMPDirectiveKind CancelRegion) { 6253 if (!CGF.HaveInsertPoint()) 6254 return; 6255 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6256 // global_tid, kmp_int32 cncl_kind); 6257 if (auto *OMPRegionInfo = 6258 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6259 // For 'cancellation point taskgroup', the task region info may not have a 6260 // cancel. This may instead happen in another adjacent task. 6261 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6262 llvm::Value *Args[] = { 6263 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6264 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6265 // Ignore return result until untied tasks are supported. 6266 llvm::Value *Result = CGF.EmitRuntimeCall( 6267 OMPBuilder.getOrCreateRuntimeFunction( 6268 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6269 Args); 6270 // if (__kmpc_cancellationpoint()) { 6271 // exit from construct; 6272 // } 6273 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6274 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6275 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6276 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6277 CGF.EmitBlock(ExitBB); 6278 // exit from construct; 6279 CodeGenFunction::JumpDest CancelDest = 6280 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6281 CGF.EmitBranchThroughCleanup(CancelDest); 6282 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6283 } 6284 } 6285 } 6286 6287 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6288 const Expr *IfCond, 6289 OpenMPDirectiveKind CancelRegion) { 6290 if (!CGF.HaveInsertPoint()) 6291 return; 6292 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6293 // kmp_int32 cncl_kind); 6294 auto &M = CGM.getModule(); 6295 if (auto *OMPRegionInfo = 6296 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6297 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6298 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6299 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6300 llvm::Value *Args[] = { 6301 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6302 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6303 // Ignore return result until untied tasks are supported. 6304 llvm::Value *Result = CGF.EmitRuntimeCall( 6305 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6306 // if (__kmpc_cancel()) { 6307 // exit from construct; 6308 // } 6309 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6310 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6311 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6312 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6313 CGF.EmitBlock(ExitBB); 6314 // exit from construct; 6315 CodeGenFunction::JumpDest CancelDest = 6316 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6317 CGF.EmitBranchThroughCleanup(CancelDest); 6318 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6319 }; 6320 if (IfCond) { 6321 emitIfClause(CGF, IfCond, ThenGen, 6322 [](CodeGenFunction &, PrePostActionTy &) {}); 6323 } else { 6324 RegionCodeGenTy ThenRCG(ThenGen); 6325 ThenRCG(CGF); 6326 } 6327 } 6328 } 6329 6330 namespace { 6331 /// Cleanup action for uses_allocators support. 6332 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6333 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6334 6335 public: 6336 OMPUsesAllocatorsActionTy( 6337 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6338 : Allocators(Allocators) {} 6339 void Enter(CodeGenFunction &CGF) override { 6340 if (!CGF.HaveInsertPoint()) 6341 return; 6342 for (const auto &AllocatorData : Allocators) { 6343 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6344 CGF, AllocatorData.first, AllocatorData.second); 6345 } 6346 } 6347 void Exit(CodeGenFunction &CGF) override { 6348 if (!CGF.HaveInsertPoint()) 6349 return; 6350 for (const auto &AllocatorData : Allocators) { 6351 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6352 AllocatorData.first); 6353 } 6354 } 6355 }; 6356 } // namespace 6357 6358 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6359 const OMPExecutableDirective &D, StringRef ParentName, 6360 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6361 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6362 assert(!ParentName.empty() && "Invalid target region parent name!"); 6363 HasEmittedTargetRegion = true; 6364 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6365 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6366 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6367 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6368 if (!D.AllocatorTraits) 6369 continue; 6370 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6371 } 6372 } 6373 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6374 CodeGen.setAction(UsesAllocatorAction); 6375 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6376 IsOffloadEntry, CodeGen); 6377 } 6378 6379 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6380 const Expr *Allocator, 6381 const Expr *AllocatorTraits) { 6382 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6383 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6384 // Use default memspace handle. 6385 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6386 llvm::Value *NumTraits = llvm::ConstantInt::get( 6387 CGF.IntTy, cast<ConstantArrayType>( 6388 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6389 ->getSize() 6390 .getLimitedValue()); 6391 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6392 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6393 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6394 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6395 AllocatorTraitsLVal.getBaseInfo(), 6396 AllocatorTraitsLVal.getTBAAInfo()); 6397 llvm::Value *Traits = 6398 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6399 6400 llvm::Value *AllocatorVal = 6401 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6402 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6403 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6404 // Store to allocator. 6405 CGF.EmitVarDecl(*cast<VarDecl>( 6406 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6407 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6408 AllocatorVal = 6409 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6410 Allocator->getType(), Allocator->getExprLoc()); 6411 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6412 } 6413 6414 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6415 const Expr *Allocator) { 6416 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6417 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6418 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6419 llvm::Value *AllocatorVal = 6420 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6421 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6422 CGF.getContext().VoidPtrTy, 6423 Allocator->getExprLoc()); 6424 (void)CGF.EmitRuntimeCall( 6425 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6426 OMPRTL___kmpc_destroy_allocator), 6427 {ThreadId, AllocatorVal}); 6428 } 6429 6430 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6431 const OMPExecutableDirective &D, StringRef ParentName, 6432 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6433 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6434 // Create a unique name for the entry function using the source location 6435 // information of the current target region. The name will be something like: 6436 // 6437 // __omp_offloading_DD_FFFF_PP_lBB 6438 // 6439 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6440 // mangled name of the function that encloses the target region and BB is the 6441 // line number of the target region. 6442 6443 unsigned DeviceID; 6444 unsigned FileID; 6445 unsigned Line; 6446 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6447 Line); 6448 SmallString<64> EntryFnName; 6449 { 6450 llvm::raw_svector_ostream OS(EntryFnName); 6451 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6452 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6453 } 6454 6455 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6456 6457 CodeGenFunction CGF(CGM, true); 6458 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6459 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6460 6461 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6462 6463 // If this target outline function is not an offload entry, we don't need to 6464 // register it. 6465 if (!IsOffloadEntry) 6466 return; 6467 6468 // The target region ID is used by the runtime library to identify the current 6469 // target region, so it only has to be unique and not necessarily point to 6470 // anything. It could be the pointer to the outlined function that implements 6471 // the target region, but we aren't using that so that the compiler doesn't 6472 // need to keep that, and could therefore inline the host function if proven 6473 // worthwhile during optimization. In the other hand, if emitting code for the 6474 // device, the ID has to be the function address so that it can retrieved from 6475 // the offloading entry and launched by the runtime library. We also mark the 6476 // outlined function to have external linkage in case we are emitting code for 6477 // the device, because these functions will be entry points to the device. 6478 6479 if (CGM.getLangOpts().OpenMPIsDevice) { 6480 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6481 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6482 OutlinedFn->setDSOLocal(false); 6483 if (CGM.getTriple().isAMDGCN()) 6484 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6485 } else { 6486 std::string Name = getName({EntryFnName, "region_id"}); 6487 OutlinedFnID = new llvm::GlobalVariable( 6488 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6489 llvm::GlobalValue::WeakAnyLinkage, 6490 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6491 } 6492 6493 // Register the information for the entry associated with this target region. 6494 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6495 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6496 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6497 } 6498 6499 /// Checks if the expression is constant or does not have non-trivial function 6500 /// calls. 6501 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6502 // We can skip constant expressions. 6503 // We can skip expressions with trivial calls or simple expressions. 6504 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6505 !E->hasNonTrivialCall(Ctx)) && 6506 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6507 } 6508 6509 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6510 const Stmt *Body) { 6511 const Stmt *Child = Body->IgnoreContainers(); 6512 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6513 Child = nullptr; 6514 for (const Stmt *S : C->body()) { 6515 if (const auto *E = dyn_cast<Expr>(S)) { 6516 if (isTrivial(Ctx, E)) 6517 continue; 6518 } 6519 // Some of the statements can be ignored. 6520 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6521 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6522 continue; 6523 // Analyze declarations. 6524 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6525 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6526 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6527 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6528 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6529 isa<UsingDirectiveDecl>(D) || 6530 isa<OMPDeclareReductionDecl>(D) || 6531 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6532 return true; 6533 const auto *VD = dyn_cast<VarDecl>(D); 6534 if (!VD) 6535 return false; 6536 return VD->isConstexpr() || 6537 ((VD->getType().isTrivialType(Ctx) || 6538 VD->getType()->isReferenceType()) && 6539 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6540 })) 6541 continue; 6542 } 6543 // Found multiple children - cannot get the one child only. 6544 if (Child) 6545 return nullptr; 6546 Child = S; 6547 } 6548 if (Child) 6549 Child = Child->IgnoreContainers(); 6550 } 6551 return Child; 6552 } 6553 6554 /// Emit the number of teams for a target directive. Inspect the num_teams 6555 /// clause associated with a teams construct combined or closely nested 6556 /// with the target directive. 6557 /// 6558 /// Emit a team of size one for directives such as 'target parallel' that 6559 /// have no associated teams construct. 6560 /// 6561 /// Otherwise, return nullptr. 6562 static llvm::Value * 6563 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6564 const OMPExecutableDirective &D) { 6565 assert(!CGF.getLangOpts().OpenMPIsDevice && 6566 "Clauses associated with the teams directive expected to be emitted " 6567 "only for the host!"); 6568 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6569 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6570 "Expected target-based executable directive."); 6571 CGBuilderTy &Bld = CGF.Builder; 6572 switch (DirectiveKind) { 6573 case OMPD_target: { 6574 const auto *CS = D.getInnermostCapturedStmt(); 6575 const auto *Body = 6576 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6577 const Stmt *ChildStmt = 6578 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6579 if (const auto *NestedDir = 6580 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6581 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6582 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6583 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6584 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6585 const Expr *NumTeams = 6586 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6587 llvm::Value *NumTeamsVal = 6588 CGF.EmitScalarExpr(NumTeams, 6589 /*IgnoreResultAssign*/ true); 6590 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6591 /*isSigned=*/true); 6592 } 6593 return Bld.getInt32(0); 6594 } 6595 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6596 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6597 return Bld.getInt32(1); 6598 return Bld.getInt32(0); 6599 } 6600 return nullptr; 6601 } 6602 case OMPD_target_teams: 6603 case OMPD_target_teams_distribute: 6604 case OMPD_target_teams_distribute_simd: 6605 case OMPD_target_teams_distribute_parallel_for: 6606 case OMPD_target_teams_distribute_parallel_for_simd: { 6607 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6608 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6609 const Expr *NumTeams = 6610 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6611 llvm::Value *NumTeamsVal = 6612 CGF.EmitScalarExpr(NumTeams, 6613 /*IgnoreResultAssign*/ true); 6614 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6615 /*isSigned=*/true); 6616 } 6617 return Bld.getInt32(0); 6618 } 6619 case OMPD_target_parallel: 6620 case OMPD_target_parallel_for: 6621 case OMPD_target_parallel_for_simd: 6622 case OMPD_target_simd: 6623 return Bld.getInt32(1); 6624 case OMPD_parallel: 6625 case OMPD_for: 6626 case OMPD_parallel_for: 6627 case OMPD_parallel_master: 6628 case OMPD_parallel_sections: 6629 case OMPD_for_simd: 6630 case OMPD_parallel_for_simd: 6631 case OMPD_cancel: 6632 case OMPD_cancellation_point: 6633 case OMPD_ordered: 6634 case OMPD_threadprivate: 6635 case OMPD_allocate: 6636 case OMPD_task: 6637 case OMPD_simd: 6638 case OMPD_tile: 6639 case OMPD_sections: 6640 case OMPD_section: 6641 case OMPD_single: 6642 case OMPD_master: 6643 case OMPD_critical: 6644 case OMPD_taskyield: 6645 case OMPD_barrier: 6646 case OMPD_taskwait: 6647 case OMPD_taskgroup: 6648 case OMPD_atomic: 6649 case OMPD_flush: 6650 case OMPD_depobj: 6651 case OMPD_scan: 6652 case OMPD_teams: 6653 case OMPD_target_data: 6654 case OMPD_target_exit_data: 6655 case OMPD_target_enter_data: 6656 case OMPD_distribute: 6657 case OMPD_distribute_simd: 6658 case OMPD_distribute_parallel_for: 6659 case OMPD_distribute_parallel_for_simd: 6660 case OMPD_teams_distribute: 6661 case OMPD_teams_distribute_simd: 6662 case OMPD_teams_distribute_parallel_for: 6663 case OMPD_teams_distribute_parallel_for_simd: 6664 case OMPD_target_update: 6665 case OMPD_declare_simd: 6666 case OMPD_declare_variant: 6667 case OMPD_begin_declare_variant: 6668 case OMPD_end_declare_variant: 6669 case OMPD_declare_target: 6670 case OMPD_end_declare_target: 6671 case OMPD_declare_reduction: 6672 case OMPD_declare_mapper: 6673 case OMPD_taskloop: 6674 case OMPD_taskloop_simd: 6675 case OMPD_master_taskloop: 6676 case OMPD_master_taskloop_simd: 6677 case OMPD_parallel_master_taskloop: 6678 case OMPD_parallel_master_taskloop_simd: 6679 case OMPD_requires: 6680 case OMPD_unknown: 6681 break; 6682 default: 6683 break; 6684 } 6685 llvm_unreachable("Unexpected directive kind."); 6686 } 6687 6688 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6689 llvm::Value *DefaultThreadLimitVal) { 6690 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6691 CGF.getContext(), CS->getCapturedStmt()); 6692 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6693 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6694 llvm::Value *NumThreads = nullptr; 6695 llvm::Value *CondVal = nullptr; 6696 // Handle if clause. If if clause present, the number of threads is 6697 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6698 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6699 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6700 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6701 const OMPIfClause *IfClause = nullptr; 6702 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6703 if (C->getNameModifier() == OMPD_unknown || 6704 C->getNameModifier() == OMPD_parallel) { 6705 IfClause = C; 6706 break; 6707 } 6708 } 6709 if (IfClause) { 6710 const Expr *Cond = IfClause->getCondition(); 6711 bool Result; 6712 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6713 if (!Result) 6714 return CGF.Builder.getInt32(1); 6715 } else { 6716 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6717 if (const auto *PreInit = 6718 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6719 for (const auto *I : PreInit->decls()) { 6720 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6721 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6722 } else { 6723 CodeGenFunction::AutoVarEmission Emission = 6724 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6725 CGF.EmitAutoVarCleanups(Emission); 6726 } 6727 } 6728 } 6729 CondVal = CGF.EvaluateExprAsBool(Cond); 6730 } 6731 } 6732 } 6733 // Check the value of num_threads clause iff if clause was not specified 6734 // or is not evaluated to false. 6735 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6736 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6737 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6738 const auto *NumThreadsClause = 6739 Dir->getSingleClause<OMPNumThreadsClause>(); 6740 CodeGenFunction::LexicalScope Scope( 6741 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6742 if (const auto *PreInit = 6743 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6744 for (const auto *I : PreInit->decls()) { 6745 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6746 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6747 } else { 6748 CodeGenFunction::AutoVarEmission Emission = 6749 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6750 CGF.EmitAutoVarCleanups(Emission); 6751 } 6752 } 6753 } 6754 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6755 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6756 /*isSigned=*/false); 6757 if (DefaultThreadLimitVal) 6758 NumThreads = CGF.Builder.CreateSelect( 6759 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6760 DefaultThreadLimitVal, NumThreads); 6761 } else { 6762 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6763 : CGF.Builder.getInt32(0); 6764 } 6765 // Process condition of the if clause. 6766 if (CondVal) { 6767 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6768 CGF.Builder.getInt32(1)); 6769 } 6770 return NumThreads; 6771 } 6772 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6773 return CGF.Builder.getInt32(1); 6774 return DefaultThreadLimitVal; 6775 } 6776 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6777 : CGF.Builder.getInt32(0); 6778 } 6779 6780 /// Emit the number of threads for a target directive. Inspect the 6781 /// thread_limit clause associated with a teams construct combined or closely 6782 /// nested with the target directive. 6783 /// 6784 /// Emit the num_threads clause for directives such as 'target parallel' that 6785 /// have no associated teams construct. 6786 /// 6787 /// Otherwise, return nullptr. 6788 static llvm::Value * 6789 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6790 const OMPExecutableDirective &D) { 6791 assert(!CGF.getLangOpts().OpenMPIsDevice && 6792 "Clauses associated with the teams directive expected to be emitted " 6793 "only for the host!"); 6794 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6795 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6796 "Expected target-based executable directive."); 6797 CGBuilderTy &Bld = CGF.Builder; 6798 llvm::Value *ThreadLimitVal = nullptr; 6799 llvm::Value *NumThreadsVal = nullptr; 6800 switch (DirectiveKind) { 6801 case OMPD_target: { 6802 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6803 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6804 return NumThreads; 6805 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6806 CGF.getContext(), CS->getCapturedStmt()); 6807 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6808 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6809 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6810 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6811 const auto *ThreadLimitClause = 6812 Dir->getSingleClause<OMPThreadLimitClause>(); 6813 CodeGenFunction::LexicalScope Scope( 6814 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6815 if (const auto *PreInit = 6816 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6817 for (const auto *I : PreInit->decls()) { 6818 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6819 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6820 } else { 6821 CodeGenFunction::AutoVarEmission Emission = 6822 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6823 CGF.EmitAutoVarCleanups(Emission); 6824 } 6825 } 6826 } 6827 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6828 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6829 ThreadLimitVal = 6830 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6831 } 6832 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6833 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6834 CS = Dir->getInnermostCapturedStmt(); 6835 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6836 CGF.getContext(), CS->getCapturedStmt()); 6837 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6838 } 6839 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6840 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6841 CS = Dir->getInnermostCapturedStmt(); 6842 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6843 return NumThreads; 6844 } 6845 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6846 return Bld.getInt32(1); 6847 } 6848 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6849 } 6850 case OMPD_target_teams: { 6851 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6852 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6853 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6854 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6855 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6856 ThreadLimitVal = 6857 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6858 } 6859 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6860 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6861 return NumThreads; 6862 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6863 CGF.getContext(), CS->getCapturedStmt()); 6864 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6865 if (Dir->getDirectiveKind() == OMPD_distribute) { 6866 CS = Dir->getInnermostCapturedStmt(); 6867 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6868 return NumThreads; 6869 } 6870 } 6871 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6872 } 6873 case OMPD_target_teams_distribute: 6874 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6875 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6876 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6877 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6878 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6879 ThreadLimitVal = 6880 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6881 } 6882 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6883 case OMPD_target_parallel: 6884 case OMPD_target_parallel_for: 6885 case OMPD_target_parallel_for_simd: 6886 case OMPD_target_teams_distribute_parallel_for: 6887 case OMPD_target_teams_distribute_parallel_for_simd: { 6888 llvm::Value *CondVal = nullptr; 6889 // Handle if clause. If if clause present, the number of threads is 6890 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6891 if (D.hasClausesOfKind<OMPIfClause>()) { 6892 const OMPIfClause *IfClause = nullptr; 6893 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6894 if (C->getNameModifier() == OMPD_unknown || 6895 C->getNameModifier() == OMPD_parallel) { 6896 IfClause = C; 6897 break; 6898 } 6899 } 6900 if (IfClause) { 6901 const Expr *Cond = IfClause->getCondition(); 6902 bool Result; 6903 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6904 if (!Result) 6905 return Bld.getInt32(1); 6906 } else { 6907 CodeGenFunction::RunCleanupsScope Scope(CGF); 6908 CondVal = CGF.EvaluateExprAsBool(Cond); 6909 } 6910 } 6911 } 6912 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6913 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6914 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6915 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6916 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6917 ThreadLimitVal = 6918 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6919 } 6920 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6921 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6922 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6923 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6924 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6925 NumThreadsVal = 6926 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6927 ThreadLimitVal = ThreadLimitVal 6928 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6929 ThreadLimitVal), 6930 NumThreadsVal, ThreadLimitVal) 6931 : NumThreadsVal; 6932 } 6933 if (!ThreadLimitVal) 6934 ThreadLimitVal = Bld.getInt32(0); 6935 if (CondVal) 6936 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6937 return ThreadLimitVal; 6938 } 6939 case OMPD_target_teams_distribute_simd: 6940 case OMPD_target_simd: 6941 return Bld.getInt32(1); 6942 case OMPD_parallel: 6943 case OMPD_for: 6944 case OMPD_parallel_for: 6945 case OMPD_parallel_master: 6946 case OMPD_parallel_sections: 6947 case OMPD_for_simd: 6948 case OMPD_parallel_for_simd: 6949 case OMPD_cancel: 6950 case OMPD_cancellation_point: 6951 case OMPD_ordered: 6952 case OMPD_threadprivate: 6953 case OMPD_allocate: 6954 case OMPD_task: 6955 case OMPD_simd: 6956 case OMPD_tile: 6957 case OMPD_sections: 6958 case OMPD_section: 6959 case OMPD_single: 6960 case OMPD_master: 6961 case OMPD_critical: 6962 case OMPD_taskyield: 6963 case OMPD_barrier: 6964 case OMPD_taskwait: 6965 case OMPD_taskgroup: 6966 case OMPD_atomic: 6967 case OMPD_flush: 6968 case OMPD_depobj: 6969 case OMPD_scan: 6970 case OMPD_teams: 6971 case OMPD_target_data: 6972 case OMPD_target_exit_data: 6973 case OMPD_target_enter_data: 6974 case OMPD_distribute: 6975 case OMPD_distribute_simd: 6976 case OMPD_distribute_parallel_for: 6977 case OMPD_distribute_parallel_for_simd: 6978 case OMPD_teams_distribute: 6979 case OMPD_teams_distribute_simd: 6980 case OMPD_teams_distribute_parallel_for: 6981 case OMPD_teams_distribute_parallel_for_simd: 6982 case OMPD_target_update: 6983 case OMPD_declare_simd: 6984 case OMPD_declare_variant: 6985 case OMPD_begin_declare_variant: 6986 case OMPD_end_declare_variant: 6987 case OMPD_declare_target: 6988 case OMPD_end_declare_target: 6989 case OMPD_declare_reduction: 6990 case OMPD_declare_mapper: 6991 case OMPD_taskloop: 6992 case OMPD_taskloop_simd: 6993 case OMPD_master_taskloop: 6994 case OMPD_master_taskloop_simd: 6995 case OMPD_parallel_master_taskloop: 6996 case OMPD_parallel_master_taskloop_simd: 6997 case OMPD_requires: 6998 case OMPD_unknown: 6999 break; 7000 default: 7001 break; 7002 } 7003 llvm_unreachable("Unsupported directive kind."); 7004 } 7005 7006 namespace { 7007 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7008 7009 // Utility to handle information from clauses associated with a given 7010 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7011 // It provides a convenient interface to obtain the information and generate 7012 // code for that information. 7013 class MappableExprsHandler { 7014 public: 7015 /// Values for bit flags used to specify the mapping type for 7016 /// offloading. 7017 enum OpenMPOffloadMappingFlags : uint64_t { 7018 /// No flags 7019 OMP_MAP_NONE = 0x0, 7020 /// Allocate memory on the device and move data from host to device. 7021 OMP_MAP_TO = 0x01, 7022 /// Allocate memory on the device and move data from device to host. 7023 OMP_MAP_FROM = 0x02, 7024 /// Always perform the requested mapping action on the element, even 7025 /// if it was already mapped before. 7026 OMP_MAP_ALWAYS = 0x04, 7027 /// Delete the element from the device environment, ignoring the 7028 /// current reference count associated with the element. 7029 OMP_MAP_DELETE = 0x08, 7030 /// The element being mapped is a pointer-pointee pair; both the 7031 /// pointer and the pointee should be mapped. 7032 OMP_MAP_PTR_AND_OBJ = 0x10, 7033 /// This flags signals that the base address of an entry should be 7034 /// passed to the target kernel as an argument. 7035 OMP_MAP_TARGET_PARAM = 0x20, 7036 /// Signal that the runtime library has to return the device pointer 7037 /// in the current position for the data being mapped. Used when we have the 7038 /// use_device_ptr or use_device_addr clause. 7039 OMP_MAP_RETURN_PARAM = 0x40, 7040 /// This flag signals that the reference being passed is a pointer to 7041 /// private data. 7042 OMP_MAP_PRIVATE = 0x80, 7043 /// Pass the element to the device by value. 7044 OMP_MAP_LITERAL = 0x100, 7045 /// Implicit map 7046 OMP_MAP_IMPLICIT = 0x200, 7047 /// Close is a hint to the runtime to allocate memory close to 7048 /// the target device. 7049 OMP_MAP_CLOSE = 0x400, 7050 /// 0x800 is reserved for compatibility with XLC. 7051 /// Produce a runtime error if the data is not already allocated. 7052 OMP_MAP_PRESENT = 0x1000, 7053 /// Signal that the runtime library should use args as an array of 7054 /// descriptor_dim pointers and use args_size as dims. Used when we have 7055 /// non-contiguous list items in target update directive 7056 OMP_MAP_NON_CONTIG = 0x100000000000, 7057 /// The 16 MSBs of the flags indicate whether the entry is member of some 7058 /// struct/class. 7059 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7060 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7061 }; 7062 7063 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7064 static unsigned getFlagMemberOffset() { 7065 unsigned Offset = 0; 7066 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7067 Remain = Remain >> 1) 7068 Offset++; 7069 return Offset; 7070 } 7071 7072 /// Class that holds debugging information for a data mapping to be passed to 7073 /// the runtime library. 7074 class MappingExprInfo { 7075 /// The variable declaration used for the data mapping. 7076 const ValueDecl *MapDecl = nullptr; 7077 /// The original expression used in the map clause, or null if there is 7078 /// none. 7079 const Expr *MapExpr = nullptr; 7080 7081 public: 7082 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7083 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7084 7085 const ValueDecl *getMapDecl() const { return MapDecl; } 7086 const Expr *getMapExpr() const { return MapExpr; } 7087 }; 7088 7089 /// Class that associates information with a base pointer to be passed to the 7090 /// runtime library. 7091 class BasePointerInfo { 7092 /// The base pointer. 7093 llvm::Value *Ptr = nullptr; 7094 /// The base declaration that refers to this device pointer, or null if 7095 /// there is none. 7096 const ValueDecl *DevPtrDecl = nullptr; 7097 7098 public: 7099 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7100 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7101 llvm::Value *operator*() const { return Ptr; } 7102 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7103 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7104 }; 7105 7106 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7107 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7108 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7109 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7110 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7111 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7112 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7113 7114 /// This structure contains combined information generated for mappable 7115 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7116 /// mappers, and non-contiguous information. 7117 struct MapCombinedInfoTy { 7118 struct StructNonContiguousInfo { 7119 bool IsNonContiguous = false; 7120 MapDimArrayTy Dims; 7121 MapNonContiguousArrayTy Offsets; 7122 MapNonContiguousArrayTy Counts; 7123 MapNonContiguousArrayTy Strides; 7124 }; 7125 MapExprsArrayTy Exprs; 7126 MapBaseValuesArrayTy BasePointers; 7127 MapValuesArrayTy Pointers; 7128 MapValuesArrayTy Sizes; 7129 MapFlagsArrayTy Types; 7130 MapMappersArrayTy Mappers; 7131 StructNonContiguousInfo NonContigInfo; 7132 7133 /// Append arrays in \a CurInfo. 7134 void append(MapCombinedInfoTy &CurInfo) { 7135 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7136 BasePointers.append(CurInfo.BasePointers.begin(), 7137 CurInfo.BasePointers.end()); 7138 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7139 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7140 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7141 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7142 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7143 CurInfo.NonContigInfo.Dims.end()); 7144 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7145 CurInfo.NonContigInfo.Offsets.end()); 7146 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7147 CurInfo.NonContigInfo.Counts.end()); 7148 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7149 CurInfo.NonContigInfo.Strides.end()); 7150 } 7151 }; 7152 7153 /// Map between a struct and the its lowest & highest elements which have been 7154 /// mapped. 7155 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7156 /// HE(FieldIndex, Pointer)} 7157 struct StructRangeInfoTy { 7158 MapCombinedInfoTy PreliminaryMapData; 7159 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7160 0, Address::invalid()}; 7161 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7162 0, Address::invalid()}; 7163 Address Base = Address::invalid(); 7164 Address LB = Address::invalid(); 7165 bool IsArraySection = false; 7166 bool HasCompleteRecord = false; 7167 }; 7168 7169 private: 7170 /// Kind that defines how a device pointer has to be returned. 7171 struct MapInfo { 7172 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7173 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7174 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7175 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7176 bool ReturnDevicePointer = false; 7177 bool IsImplicit = false; 7178 const ValueDecl *Mapper = nullptr; 7179 const Expr *VarRef = nullptr; 7180 bool ForDeviceAddr = false; 7181 7182 MapInfo() = default; 7183 MapInfo( 7184 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7185 OpenMPMapClauseKind MapType, 7186 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7187 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7188 bool ReturnDevicePointer, bool IsImplicit, 7189 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7190 bool ForDeviceAddr = false) 7191 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7192 MotionModifiers(MotionModifiers), 7193 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7194 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7195 }; 7196 7197 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7198 /// member and there is no map information about it, then emission of that 7199 /// entry is deferred until the whole struct has been processed. 7200 struct DeferredDevicePtrEntryTy { 7201 const Expr *IE = nullptr; 7202 const ValueDecl *VD = nullptr; 7203 bool ForDeviceAddr = false; 7204 7205 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7206 bool ForDeviceAddr) 7207 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7208 }; 7209 7210 /// The target directive from where the mappable clauses were extracted. It 7211 /// is either a executable directive or a user-defined mapper directive. 7212 llvm::PointerUnion<const OMPExecutableDirective *, 7213 const OMPDeclareMapperDecl *> 7214 CurDir; 7215 7216 /// Function the directive is being generated for. 7217 CodeGenFunction &CGF; 7218 7219 /// Set of all first private variables in the current directive. 7220 /// bool data is set to true if the variable is implicitly marked as 7221 /// firstprivate, false otherwise. 7222 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7223 7224 /// Map between device pointer declarations and their expression components. 7225 /// The key value for declarations in 'this' is null. 7226 llvm::DenseMap< 7227 const ValueDecl *, 7228 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7229 DevPointersMap; 7230 7231 llvm::Value *getExprTypeSize(const Expr *E) const { 7232 QualType ExprTy = E->getType().getCanonicalType(); 7233 7234 // Calculate the size for array shaping expression. 7235 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7236 llvm::Value *Size = 7237 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7238 for (const Expr *SE : OAE->getDimensions()) { 7239 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7240 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7241 CGF.getContext().getSizeType(), 7242 SE->getExprLoc()); 7243 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7244 } 7245 return Size; 7246 } 7247 7248 // Reference types are ignored for mapping purposes. 7249 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7250 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7251 7252 // Given that an array section is considered a built-in type, we need to 7253 // do the calculation based on the length of the section instead of relying 7254 // on CGF.getTypeSize(E->getType()). 7255 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7256 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7257 OAE->getBase()->IgnoreParenImpCasts()) 7258 .getCanonicalType(); 7259 7260 // If there is no length associated with the expression and lower bound is 7261 // not specified too, that means we are using the whole length of the 7262 // base. 7263 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7264 !OAE->getLowerBound()) 7265 return CGF.getTypeSize(BaseTy); 7266 7267 llvm::Value *ElemSize; 7268 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7269 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7270 } else { 7271 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7272 assert(ATy && "Expecting array type if not a pointer type."); 7273 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7274 } 7275 7276 // If we don't have a length at this point, that is because we have an 7277 // array section with a single element. 7278 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7279 return ElemSize; 7280 7281 if (const Expr *LenExpr = OAE->getLength()) { 7282 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7283 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7284 CGF.getContext().getSizeType(), 7285 LenExpr->getExprLoc()); 7286 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7287 } 7288 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7289 OAE->getLowerBound() && "expected array_section[lb:]."); 7290 // Size = sizetype - lb * elemtype; 7291 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7292 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7293 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7294 CGF.getContext().getSizeType(), 7295 OAE->getLowerBound()->getExprLoc()); 7296 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7297 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7298 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7299 LengthVal = CGF.Builder.CreateSelect( 7300 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7301 return LengthVal; 7302 } 7303 return CGF.getTypeSize(ExprTy); 7304 } 7305 7306 /// Return the corresponding bits for a given map clause modifier. Add 7307 /// a flag marking the map as a pointer if requested. Add a flag marking the 7308 /// map as the first one of a series of maps that relate to the same map 7309 /// expression. 7310 OpenMPOffloadMappingFlags getMapTypeBits( 7311 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7312 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7313 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7314 OpenMPOffloadMappingFlags Bits = 7315 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7316 switch (MapType) { 7317 case OMPC_MAP_alloc: 7318 case OMPC_MAP_release: 7319 // alloc and release is the default behavior in the runtime library, i.e. 7320 // if we don't pass any bits alloc/release that is what the runtime is 7321 // going to do. Therefore, we don't need to signal anything for these two 7322 // type modifiers. 7323 break; 7324 case OMPC_MAP_to: 7325 Bits |= OMP_MAP_TO; 7326 break; 7327 case OMPC_MAP_from: 7328 Bits |= OMP_MAP_FROM; 7329 break; 7330 case OMPC_MAP_tofrom: 7331 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7332 break; 7333 case OMPC_MAP_delete: 7334 Bits |= OMP_MAP_DELETE; 7335 break; 7336 case OMPC_MAP_unknown: 7337 llvm_unreachable("Unexpected map type!"); 7338 } 7339 if (AddPtrFlag) 7340 Bits |= OMP_MAP_PTR_AND_OBJ; 7341 if (AddIsTargetParamFlag) 7342 Bits |= OMP_MAP_TARGET_PARAM; 7343 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7344 != MapModifiers.end()) 7345 Bits |= OMP_MAP_ALWAYS; 7346 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7347 != MapModifiers.end()) 7348 Bits |= OMP_MAP_CLOSE; 7349 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7350 MapModifiers.end() || 7351 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7352 MotionModifiers.end()) 7353 Bits |= OMP_MAP_PRESENT; 7354 if (IsNonContiguous) 7355 Bits |= OMP_MAP_NON_CONTIG; 7356 return Bits; 7357 } 7358 7359 /// Return true if the provided expression is a final array section. A 7360 /// final array section, is one whose length can't be proved to be one. 7361 bool isFinalArraySectionExpression(const Expr *E) const { 7362 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7363 7364 // It is not an array section and therefore not a unity-size one. 7365 if (!OASE) 7366 return false; 7367 7368 // An array section with no colon always refer to a single element. 7369 if (OASE->getColonLocFirst().isInvalid()) 7370 return false; 7371 7372 const Expr *Length = OASE->getLength(); 7373 7374 // If we don't have a length we have to check if the array has size 1 7375 // for this dimension. Also, we should always expect a length if the 7376 // base type is pointer. 7377 if (!Length) { 7378 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7379 OASE->getBase()->IgnoreParenImpCasts()) 7380 .getCanonicalType(); 7381 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7382 return ATy->getSize().getSExtValue() != 1; 7383 // If we don't have a constant dimension length, we have to consider 7384 // the current section as having any size, so it is not necessarily 7385 // unitary. If it happen to be unity size, that's user fault. 7386 return true; 7387 } 7388 7389 // Check if the length evaluates to 1. 7390 Expr::EvalResult Result; 7391 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7392 return true; // Can have more that size 1. 7393 7394 llvm::APSInt ConstLength = Result.Val.getInt(); 7395 return ConstLength.getSExtValue() != 1; 7396 } 7397 7398 /// Generate the base pointers, section pointers, sizes, map type bits, and 7399 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7400 /// map type, map or motion modifiers, and expression components. 7401 /// \a IsFirstComponent should be set to true if the provided set of 7402 /// components is the first associated with a capture. 7403 void generateInfoForComponentList( 7404 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7405 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7406 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7407 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7408 bool IsFirstComponentList, bool IsImplicit, 7409 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7410 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7411 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7412 OverlappedElements = llvm::None) const { 7413 // The following summarizes what has to be generated for each map and the 7414 // types below. The generated information is expressed in this order: 7415 // base pointer, section pointer, size, flags 7416 // (to add to the ones that come from the map type and modifier). 7417 // 7418 // double d; 7419 // int i[100]; 7420 // float *p; 7421 // 7422 // struct S1 { 7423 // int i; 7424 // float f[50]; 7425 // } 7426 // struct S2 { 7427 // int i; 7428 // float f[50]; 7429 // S1 s; 7430 // double *p; 7431 // struct S2 *ps; 7432 // } 7433 // S2 s; 7434 // S2 *ps; 7435 // 7436 // map(d) 7437 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7438 // 7439 // map(i) 7440 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7441 // 7442 // map(i[1:23]) 7443 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7444 // 7445 // map(p) 7446 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7447 // 7448 // map(p[1:24]) 7449 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7450 // in unified shared memory mode or for local pointers 7451 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7452 // 7453 // map(s) 7454 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7455 // 7456 // map(s.i) 7457 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7458 // 7459 // map(s.s.f) 7460 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7461 // 7462 // map(s.p) 7463 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7464 // 7465 // map(to: s.p[:22]) 7466 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7467 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7468 // &(s.p), &(s.p[0]), 22*sizeof(double), 7469 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7470 // (*) alloc space for struct members, only this is a target parameter 7471 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7472 // optimizes this entry out, same in the examples below) 7473 // (***) map the pointee (map: to) 7474 // 7475 // map(s.ps) 7476 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7477 // 7478 // map(from: s.ps->s.i) 7479 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7480 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7481 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7482 // 7483 // map(to: s.ps->ps) 7484 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7485 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7486 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7487 // 7488 // map(s.ps->ps->ps) 7489 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7490 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7491 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7492 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7493 // 7494 // map(to: s.ps->ps->s.f[:22]) 7495 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7496 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7497 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7498 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7499 // 7500 // map(ps) 7501 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7502 // 7503 // map(ps->i) 7504 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7505 // 7506 // map(ps->s.f) 7507 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7508 // 7509 // map(from: ps->p) 7510 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7511 // 7512 // map(to: ps->p[:22]) 7513 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7514 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7515 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7516 // 7517 // map(ps->ps) 7518 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7519 // 7520 // map(from: ps->ps->s.i) 7521 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7522 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7523 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7524 // 7525 // map(from: ps->ps->ps) 7526 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7527 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7528 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7529 // 7530 // map(ps->ps->ps->ps) 7531 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7532 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7533 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7534 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7535 // 7536 // map(to: ps->ps->ps->s.f[:22]) 7537 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7538 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7539 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7540 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7541 // 7542 // map(to: s.f[:22]) map(from: s.p[:33]) 7543 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7544 // sizeof(double*) (**), TARGET_PARAM 7545 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7546 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7547 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7548 // (*) allocate contiguous space needed to fit all mapped members even if 7549 // we allocate space for members not mapped (in this example, 7550 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7551 // them as well because they fall between &s.f[0] and &s.p) 7552 // 7553 // map(from: s.f[:22]) map(to: ps->p[:33]) 7554 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7555 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7556 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7557 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7558 // (*) the struct this entry pertains to is the 2nd element in the list of 7559 // arguments, hence MEMBER_OF(2) 7560 // 7561 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7562 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7563 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7564 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7565 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7566 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7567 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7568 // (*) the struct this entry pertains to is the 4th element in the list 7569 // of arguments, hence MEMBER_OF(4) 7570 7571 // Track if the map information being generated is the first for a capture. 7572 bool IsCaptureFirstInfo = IsFirstComponentList; 7573 // When the variable is on a declare target link or in a to clause with 7574 // unified memory, a reference is needed to hold the host/device address 7575 // of the variable. 7576 bool RequiresReference = false; 7577 7578 // Scan the components from the base to the complete expression. 7579 auto CI = Components.rbegin(); 7580 auto CE = Components.rend(); 7581 auto I = CI; 7582 7583 // Track if the map information being generated is the first for a list of 7584 // components. 7585 bool IsExpressionFirstInfo = true; 7586 bool FirstPointerInComplexData = false; 7587 Address BP = Address::invalid(); 7588 const Expr *AssocExpr = I->getAssociatedExpression(); 7589 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7590 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7591 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7592 7593 if (isa<MemberExpr>(AssocExpr)) { 7594 // The base is the 'this' pointer. The content of the pointer is going 7595 // to be the base of the field being mapped. 7596 BP = CGF.LoadCXXThisAddress(); 7597 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7598 (OASE && 7599 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7600 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7601 } else if (OAShE && 7602 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7603 BP = Address( 7604 CGF.EmitScalarExpr(OAShE->getBase()), 7605 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7606 } else { 7607 // The base is the reference to the variable. 7608 // BP = &Var. 7609 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7610 if (const auto *VD = 7611 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7612 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7613 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7614 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7615 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7616 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7617 RequiresReference = true; 7618 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7619 } 7620 } 7621 } 7622 7623 // If the variable is a pointer and is being dereferenced (i.e. is not 7624 // the last component), the base has to be the pointer itself, not its 7625 // reference. References are ignored for mapping purposes. 7626 QualType Ty = 7627 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7628 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7629 // No need to generate individual map information for the pointer, it 7630 // can be associated with the combined storage if shared memory mode is 7631 // active or the base declaration is not global variable. 7632 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7633 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7634 !VD || VD->hasLocalStorage()) 7635 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7636 else 7637 FirstPointerInComplexData = true; 7638 ++I; 7639 } 7640 } 7641 7642 // Track whether a component of the list should be marked as MEMBER_OF some 7643 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7644 // in a component list should be marked as MEMBER_OF, all subsequent entries 7645 // do not belong to the base struct. E.g. 7646 // struct S2 s; 7647 // s.ps->ps->ps->f[:] 7648 // (1) (2) (3) (4) 7649 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7650 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7651 // is the pointee of ps(2) which is not member of struct s, so it should not 7652 // be marked as such (it is still PTR_AND_OBJ). 7653 // The variable is initialized to false so that PTR_AND_OBJ entries which 7654 // are not struct members are not considered (e.g. array of pointers to 7655 // data). 7656 bool ShouldBeMemberOf = false; 7657 7658 // Variable keeping track of whether or not we have encountered a component 7659 // in the component list which is a member expression. Useful when we have a 7660 // pointer or a final array section, in which case it is the previous 7661 // component in the list which tells us whether we have a member expression. 7662 // E.g. X.f[:] 7663 // While processing the final array section "[:]" it is "f" which tells us 7664 // whether we are dealing with a member of a declared struct. 7665 const MemberExpr *EncounteredME = nullptr; 7666 7667 // Track for the total number of dimension. Start from one for the dummy 7668 // dimension. 7669 uint64_t DimSize = 1; 7670 7671 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7672 7673 for (; I != CE; ++I) { 7674 // If the current component is member of a struct (parent struct) mark it. 7675 if (!EncounteredME) { 7676 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7677 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7678 // as MEMBER_OF the parent struct. 7679 if (EncounteredME) { 7680 ShouldBeMemberOf = true; 7681 // Do not emit as complex pointer if this is actually not array-like 7682 // expression. 7683 if (FirstPointerInComplexData) { 7684 QualType Ty = std::prev(I) 7685 ->getAssociatedDeclaration() 7686 ->getType() 7687 .getNonReferenceType(); 7688 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7689 FirstPointerInComplexData = false; 7690 } 7691 } 7692 } 7693 7694 auto Next = std::next(I); 7695 7696 // We need to generate the addresses and sizes if this is the last 7697 // component, if the component is a pointer or if it is an array section 7698 // whose length can't be proved to be one. If this is a pointer, it 7699 // becomes the base address for the following components. 7700 7701 // A final array section, is one whose length can't be proved to be one. 7702 // If the map item is non-contiguous then we don't treat any array section 7703 // as final array section. 7704 bool IsFinalArraySection = 7705 !IsNonContiguous && 7706 isFinalArraySectionExpression(I->getAssociatedExpression()); 7707 7708 // If we have a declaration for the mapping use that, otherwise use 7709 // the base declaration of the map clause. 7710 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7711 ? I->getAssociatedDeclaration() 7712 : BaseDecl; 7713 7714 // Get information on whether the element is a pointer. Have to do a 7715 // special treatment for array sections given that they are built-in 7716 // types. 7717 const auto *OASE = 7718 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7719 const auto *OAShE = 7720 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7721 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7722 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7723 bool IsPointer = 7724 OAShE || 7725 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7726 .getCanonicalType() 7727 ->isAnyPointerType()) || 7728 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7729 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7730 7731 if (OASE) 7732 ++DimSize; 7733 7734 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7735 // If this is not the last component, we expect the pointer to be 7736 // associated with an array expression or member expression. 7737 assert((Next == CE || 7738 isa<MemberExpr>(Next->getAssociatedExpression()) || 7739 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7740 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7741 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7742 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7743 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7744 "Unexpected expression"); 7745 7746 Address LB = Address::invalid(); 7747 if (OAShE) { 7748 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7749 CGF.getContext().getTypeAlignInChars( 7750 OAShE->getBase()->getType())); 7751 } else { 7752 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7753 .getAddress(CGF); 7754 } 7755 7756 // If this component is a pointer inside the base struct then we don't 7757 // need to create any entry for it - it will be combined with the object 7758 // it is pointing to into a single PTR_AND_OBJ entry. 7759 bool IsMemberPointerOrAddr = 7760 (IsPointer || ForDeviceAddr) && EncounteredME && 7761 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7762 EncounteredME); 7763 if (!OverlappedElements.empty() && Next == CE) { 7764 // Handle base element with the info for overlapped elements. 7765 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7766 assert(!IsPointer && 7767 "Unexpected base element with the pointer type."); 7768 // Mark the whole struct as the struct that requires allocation on the 7769 // device. 7770 PartialStruct.LowestElem = {0, LB}; 7771 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7772 I->getAssociatedExpression()->getType()); 7773 Address HB = CGF.Builder.CreateConstGEP( 7774 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7775 CGF.VoidPtrTy), 7776 TypeSize.getQuantity() - 1); 7777 PartialStruct.HighestElem = { 7778 std::numeric_limits<decltype( 7779 PartialStruct.HighestElem.first)>::max(), 7780 HB}; 7781 PartialStruct.Base = BP; 7782 PartialStruct.LB = LB; 7783 assert( 7784 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7785 "Overlapped elements must be used only once for the variable."); 7786 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7787 // Emit data for non-overlapped data. 7788 OpenMPOffloadMappingFlags Flags = 7789 OMP_MAP_MEMBER_OF | 7790 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7791 /*AddPtrFlag=*/false, 7792 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7793 llvm::Value *Size = nullptr; 7794 // Do bitcopy of all non-overlapped structure elements. 7795 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7796 Component : OverlappedElements) { 7797 Address ComponentLB = Address::invalid(); 7798 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7799 Component) { 7800 if (MC.getAssociatedDeclaration()) { 7801 ComponentLB = 7802 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7803 .getAddress(CGF); 7804 Size = CGF.Builder.CreatePtrDiff( 7805 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7806 CGF.EmitCastToVoidPtr(LB.getPointer())); 7807 break; 7808 } 7809 } 7810 assert(Size && "Failed to determine structure size"); 7811 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7812 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7813 CombinedInfo.Pointers.push_back(LB.getPointer()); 7814 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7815 Size, CGF.Int64Ty, /*isSigned=*/true)); 7816 CombinedInfo.Types.push_back(Flags); 7817 CombinedInfo.Mappers.push_back(nullptr); 7818 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7819 : 1); 7820 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7821 } 7822 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7823 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7824 CombinedInfo.Pointers.push_back(LB.getPointer()); 7825 Size = CGF.Builder.CreatePtrDiff( 7826 CGF.EmitCastToVoidPtr( 7827 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7828 CGF.EmitCastToVoidPtr(LB.getPointer())); 7829 CombinedInfo.Sizes.push_back( 7830 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7831 CombinedInfo.Types.push_back(Flags); 7832 CombinedInfo.Mappers.push_back(nullptr); 7833 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7834 : 1); 7835 break; 7836 } 7837 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7838 if (!IsMemberPointerOrAddr || 7839 (Next == CE && MapType != OMPC_MAP_unknown)) { 7840 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7841 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7842 CombinedInfo.Pointers.push_back(LB.getPointer()); 7843 CombinedInfo.Sizes.push_back( 7844 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7845 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7846 : 1); 7847 7848 // If Mapper is valid, the last component inherits the mapper. 7849 bool HasMapper = Mapper && Next == CE; 7850 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7851 7852 // We need to add a pointer flag for each map that comes from the 7853 // same expression except for the first one. We also need to signal 7854 // this map is the first one that relates with the current capture 7855 // (there is a set of entries for each capture). 7856 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7857 MapType, MapModifiers, MotionModifiers, IsImplicit, 7858 !IsExpressionFirstInfo || RequiresReference || 7859 FirstPointerInComplexData, 7860 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7861 7862 if (!IsExpressionFirstInfo) { 7863 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7864 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7865 if (IsPointer) 7866 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7867 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7868 7869 if (ShouldBeMemberOf) { 7870 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7871 // should be later updated with the correct value of MEMBER_OF. 7872 Flags |= OMP_MAP_MEMBER_OF; 7873 // From now on, all subsequent PTR_AND_OBJ entries should not be 7874 // marked as MEMBER_OF. 7875 ShouldBeMemberOf = false; 7876 } 7877 } 7878 7879 CombinedInfo.Types.push_back(Flags); 7880 } 7881 7882 // If we have encountered a member expression so far, keep track of the 7883 // mapped member. If the parent is "*this", then the value declaration 7884 // is nullptr. 7885 if (EncounteredME) { 7886 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7887 unsigned FieldIndex = FD->getFieldIndex(); 7888 7889 // Update info about the lowest and highest elements for this struct 7890 if (!PartialStruct.Base.isValid()) { 7891 PartialStruct.LowestElem = {FieldIndex, LB}; 7892 if (IsFinalArraySection) { 7893 Address HB = 7894 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7895 .getAddress(CGF); 7896 PartialStruct.HighestElem = {FieldIndex, HB}; 7897 } else { 7898 PartialStruct.HighestElem = {FieldIndex, LB}; 7899 } 7900 PartialStruct.Base = BP; 7901 PartialStruct.LB = BP; 7902 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7903 PartialStruct.LowestElem = {FieldIndex, LB}; 7904 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7905 PartialStruct.HighestElem = {FieldIndex, LB}; 7906 } 7907 } 7908 7909 // Need to emit combined struct for array sections. 7910 if (IsFinalArraySection || IsNonContiguous) 7911 PartialStruct.IsArraySection = true; 7912 7913 // If we have a final array section, we are done with this expression. 7914 if (IsFinalArraySection) 7915 break; 7916 7917 // The pointer becomes the base for the next element. 7918 if (Next != CE) 7919 BP = LB; 7920 7921 IsExpressionFirstInfo = false; 7922 IsCaptureFirstInfo = false; 7923 FirstPointerInComplexData = false; 7924 } else if (FirstPointerInComplexData) { 7925 QualType Ty = Components.rbegin() 7926 ->getAssociatedDeclaration() 7927 ->getType() 7928 .getNonReferenceType(); 7929 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7930 FirstPointerInComplexData = false; 7931 } 7932 } 7933 // If ran into the whole component - allocate the space for the whole 7934 // record. 7935 if (!EncounteredME) 7936 PartialStruct.HasCompleteRecord = true; 7937 7938 if (!IsNonContiguous) 7939 return; 7940 7941 const ASTContext &Context = CGF.getContext(); 7942 7943 // For supporting stride in array section, we need to initialize the first 7944 // dimension size as 1, first offset as 0, and first count as 1 7945 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7946 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7947 MapValuesArrayTy CurStrides; 7948 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7949 uint64_t ElementTypeSize; 7950 7951 // Collect Size information for each dimension and get the element size as 7952 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7953 // should be [10, 10] and the first stride is 4 btyes. 7954 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7955 Components) { 7956 const Expr *AssocExpr = Component.getAssociatedExpression(); 7957 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7958 7959 if (!OASE) 7960 continue; 7961 7962 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7963 auto *CAT = Context.getAsConstantArrayType(Ty); 7964 auto *VAT = Context.getAsVariableArrayType(Ty); 7965 7966 // We need all the dimension size except for the last dimension. 7967 assert((VAT || CAT || &Component == &*Components.begin()) && 7968 "Should be either ConstantArray or VariableArray if not the " 7969 "first Component"); 7970 7971 // Get element size if CurStrides is empty. 7972 if (CurStrides.empty()) { 7973 const Type *ElementType = nullptr; 7974 if (CAT) 7975 ElementType = CAT->getElementType().getTypePtr(); 7976 else if (VAT) 7977 ElementType = VAT->getElementType().getTypePtr(); 7978 else 7979 assert(&Component == &*Components.begin() && 7980 "Only expect pointer (non CAT or VAT) when this is the " 7981 "first Component"); 7982 // If ElementType is null, then it means the base is a pointer 7983 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7984 // for next iteration. 7985 if (ElementType) { 7986 // For the case that having pointer as base, we need to remove one 7987 // level of indirection. 7988 if (&Component != &*Components.begin()) 7989 ElementType = ElementType->getPointeeOrArrayElementType(); 7990 ElementTypeSize = 7991 Context.getTypeSizeInChars(ElementType).getQuantity(); 7992 CurStrides.push_back( 7993 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7994 } 7995 } 7996 // Get dimension value except for the last dimension since we don't need 7997 // it. 7998 if (DimSizes.size() < Components.size() - 1) { 7999 if (CAT) 8000 DimSizes.push_back(llvm::ConstantInt::get( 8001 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8002 else if (VAT) 8003 DimSizes.push_back(CGF.Builder.CreateIntCast( 8004 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8005 /*IsSigned=*/false)); 8006 } 8007 } 8008 8009 // Skip the dummy dimension since we have already have its information. 8010 auto DI = DimSizes.begin() + 1; 8011 // Product of dimension. 8012 llvm::Value *DimProd = 8013 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8014 8015 // Collect info for non-contiguous. Notice that offset, count, and stride 8016 // are only meaningful for array-section, so we insert a null for anything 8017 // other than array-section. 8018 // Also, the size of offset, count, and stride are not the same as 8019 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8020 // count, and stride are the same as the number of non-contiguous 8021 // declaration in target update to/from clause. 8022 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8023 Components) { 8024 const Expr *AssocExpr = Component.getAssociatedExpression(); 8025 8026 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8027 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8028 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8029 /*isSigned=*/false); 8030 CurOffsets.push_back(Offset); 8031 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8032 CurStrides.push_back(CurStrides.back()); 8033 continue; 8034 } 8035 8036 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8037 8038 if (!OASE) 8039 continue; 8040 8041 // Offset 8042 const Expr *OffsetExpr = OASE->getLowerBound(); 8043 llvm::Value *Offset = nullptr; 8044 if (!OffsetExpr) { 8045 // If offset is absent, then we just set it to zero. 8046 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8047 } else { 8048 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8049 CGF.Int64Ty, 8050 /*isSigned=*/false); 8051 } 8052 CurOffsets.push_back(Offset); 8053 8054 // Count 8055 const Expr *CountExpr = OASE->getLength(); 8056 llvm::Value *Count = nullptr; 8057 if (!CountExpr) { 8058 // In Clang, once a high dimension is an array section, we construct all 8059 // the lower dimension as array section, however, for case like 8060 // arr[0:2][2], Clang construct the inner dimension as an array section 8061 // but it actually is not in an array section form according to spec. 8062 if (!OASE->getColonLocFirst().isValid() && 8063 !OASE->getColonLocSecond().isValid()) { 8064 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8065 } else { 8066 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8067 // When the length is absent it defaults to ⌈(size − 8068 // lower-bound)/stride⌉, where size is the size of the array 8069 // dimension. 8070 const Expr *StrideExpr = OASE->getStride(); 8071 llvm::Value *Stride = 8072 StrideExpr 8073 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8074 CGF.Int64Ty, /*isSigned=*/false) 8075 : nullptr; 8076 if (Stride) 8077 Count = CGF.Builder.CreateUDiv( 8078 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8079 else 8080 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8081 } 8082 } else { 8083 Count = CGF.EmitScalarExpr(CountExpr); 8084 } 8085 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8086 CurCounts.push_back(Count); 8087 8088 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8089 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8090 // Offset Count Stride 8091 // D0 0 1 4 (int) <- dummy dimension 8092 // D1 0 2 8 (2 * (1) * 4) 8093 // D2 1 2 20 (1 * (1 * 5) * 4) 8094 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8095 const Expr *StrideExpr = OASE->getStride(); 8096 llvm::Value *Stride = 8097 StrideExpr 8098 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8099 CGF.Int64Ty, /*isSigned=*/false) 8100 : nullptr; 8101 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8102 if (Stride) 8103 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8104 else 8105 CurStrides.push_back(DimProd); 8106 if (DI != DimSizes.end()) 8107 ++DI; 8108 } 8109 8110 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8111 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8112 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8113 } 8114 8115 /// Return the adjusted map modifiers if the declaration a capture refers to 8116 /// appears in a first-private clause. This is expected to be used only with 8117 /// directives that start with 'target'. 8118 MappableExprsHandler::OpenMPOffloadMappingFlags 8119 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8120 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8121 8122 // A first private variable captured by reference will use only the 8123 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8124 // declaration is known as first-private in this handler. 8125 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8126 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8127 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8128 return MappableExprsHandler::OMP_MAP_ALWAYS | 8129 MappableExprsHandler::OMP_MAP_TO; 8130 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8131 return MappableExprsHandler::OMP_MAP_TO | 8132 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8133 return MappableExprsHandler::OMP_MAP_PRIVATE | 8134 MappableExprsHandler::OMP_MAP_TO; 8135 } 8136 return MappableExprsHandler::OMP_MAP_TO | 8137 MappableExprsHandler::OMP_MAP_FROM; 8138 } 8139 8140 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8141 // Rotate by getFlagMemberOffset() bits. 8142 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8143 << getFlagMemberOffset()); 8144 } 8145 8146 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8147 OpenMPOffloadMappingFlags MemberOfFlag) { 8148 // If the entry is PTR_AND_OBJ but has not been marked with the special 8149 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8150 // marked as MEMBER_OF. 8151 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8152 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8153 return; 8154 8155 // Reset the placeholder value to prepare the flag for the assignment of the 8156 // proper MEMBER_OF value. 8157 Flags &= ~OMP_MAP_MEMBER_OF; 8158 Flags |= MemberOfFlag; 8159 } 8160 8161 void getPlainLayout(const CXXRecordDecl *RD, 8162 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8163 bool AsBase) const { 8164 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8165 8166 llvm::StructType *St = 8167 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8168 8169 unsigned NumElements = St->getNumElements(); 8170 llvm::SmallVector< 8171 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8172 RecordLayout(NumElements); 8173 8174 // Fill bases. 8175 for (const auto &I : RD->bases()) { 8176 if (I.isVirtual()) 8177 continue; 8178 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8179 // Ignore empty bases. 8180 if (Base->isEmpty() || CGF.getContext() 8181 .getASTRecordLayout(Base) 8182 .getNonVirtualSize() 8183 .isZero()) 8184 continue; 8185 8186 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8187 RecordLayout[FieldIndex] = Base; 8188 } 8189 // Fill in virtual bases. 8190 for (const auto &I : RD->vbases()) { 8191 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8192 // Ignore empty bases. 8193 if (Base->isEmpty()) 8194 continue; 8195 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8196 if (RecordLayout[FieldIndex]) 8197 continue; 8198 RecordLayout[FieldIndex] = Base; 8199 } 8200 // Fill in all the fields. 8201 assert(!RD->isUnion() && "Unexpected union."); 8202 for (const auto *Field : RD->fields()) { 8203 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8204 // will fill in later.) 8205 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8206 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8207 RecordLayout[FieldIndex] = Field; 8208 } 8209 } 8210 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8211 &Data : RecordLayout) { 8212 if (Data.isNull()) 8213 continue; 8214 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8215 getPlainLayout(Base, Layout, /*AsBase=*/true); 8216 else 8217 Layout.push_back(Data.get<const FieldDecl *>()); 8218 } 8219 } 8220 8221 /// Generate all the base pointers, section pointers, sizes, map types, and 8222 /// mappers for the extracted mappable expressions (all included in \a 8223 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8224 /// pair of the relevant declaration and index where it occurs is appended to 8225 /// the device pointers info array. 8226 void generateAllInfoForClauses( 8227 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8228 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8229 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8230 // We have to process the component lists that relate with the same 8231 // declaration in a single chunk so that we can generate the map flags 8232 // correctly. Therefore, we organize all lists in a map. 8233 enum MapKind { Present, Allocs, Other, Total }; 8234 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8235 SmallVector<SmallVector<MapInfo, 8>, 4>> 8236 Info; 8237 8238 // Helper function to fill the information map for the different supported 8239 // clauses. 8240 auto &&InfoGen = 8241 [&Info, &SkipVarSet]( 8242 const ValueDecl *D, MapKind Kind, 8243 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8244 OpenMPMapClauseKind MapType, 8245 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8246 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8247 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8248 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8249 if (SkipVarSet.contains(D)) 8250 return; 8251 auto It = Info.find(D); 8252 if (It == Info.end()) 8253 It = Info 8254 .insert(std::make_pair( 8255 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8256 .first; 8257 It->second[Kind].emplace_back( 8258 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8259 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8260 }; 8261 8262 for (const auto *Cl : Clauses) { 8263 const auto *C = dyn_cast<OMPMapClause>(Cl); 8264 if (!C) 8265 continue; 8266 MapKind Kind = Other; 8267 if (!C->getMapTypeModifiers().empty() && 8268 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8269 return K == OMPC_MAP_MODIFIER_present; 8270 })) 8271 Kind = Present; 8272 else if (C->getMapType() == OMPC_MAP_alloc) 8273 Kind = Allocs; 8274 const auto *EI = C->getVarRefs().begin(); 8275 for (const auto L : C->component_lists()) { 8276 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8277 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8278 C->getMapTypeModifiers(), llvm::None, 8279 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8280 E); 8281 ++EI; 8282 } 8283 } 8284 for (const auto *Cl : Clauses) { 8285 const auto *C = dyn_cast<OMPToClause>(Cl); 8286 if (!C) 8287 continue; 8288 MapKind Kind = Other; 8289 if (!C->getMotionModifiers().empty() && 8290 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8291 return K == OMPC_MOTION_MODIFIER_present; 8292 })) 8293 Kind = Present; 8294 const auto *EI = C->getVarRefs().begin(); 8295 for (const auto L : C->component_lists()) { 8296 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8297 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8298 C->isImplicit(), std::get<2>(L), *EI); 8299 ++EI; 8300 } 8301 } 8302 for (const auto *Cl : Clauses) { 8303 const auto *C = dyn_cast<OMPFromClause>(Cl); 8304 if (!C) 8305 continue; 8306 MapKind Kind = Other; 8307 if (!C->getMotionModifiers().empty() && 8308 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8309 return K == OMPC_MOTION_MODIFIER_present; 8310 })) 8311 Kind = Present; 8312 const auto *EI = C->getVarRefs().begin(); 8313 for (const auto L : C->component_lists()) { 8314 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8315 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8316 C->isImplicit(), std::get<2>(L), *EI); 8317 ++EI; 8318 } 8319 } 8320 8321 // Look at the use_device_ptr clause information and mark the existing map 8322 // entries as such. If there is no map information for an entry in the 8323 // use_device_ptr list, we create one with map type 'alloc' and zero size 8324 // section. It is the user fault if that was not mapped before. If there is 8325 // no map information and the pointer is a struct member, then we defer the 8326 // emission of that entry until the whole struct has been processed. 8327 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8328 SmallVector<DeferredDevicePtrEntryTy, 4>> 8329 DeferredInfo; 8330 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8331 8332 for (const auto *Cl : Clauses) { 8333 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8334 if (!C) 8335 continue; 8336 for (const auto L : C->component_lists()) { 8337 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8338 std::get<1>(L); 8339 assert(!Components.empty() && 8340 "Not expecting empty list of components!"); 8341 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8342 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8343 const Expr *IE = Components.back().getAssociatedExpression(); 8344 // If the first component is a member expression, we have to look into 8345 // 'this', which maps to null in the map of map information. Otherwise 8346 // look directly for the information. 8347 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8348 8349 // We potentially have map information for this declaration already. 8350 // Look for the first set of components that refer to it. 8351 if (It != Info.end()) { 8352 bool Found = false; 8353 for (auto &Data : It->second) { 8354 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8355 return MI.Components.back().getAssociatedDeclaration() == VD; 8356 }); 8357 // If we found a map entry, signal that the pointer has to be 8358 // returned and move on to the next declaration. Exclude cases where 8359 // the base pointer is mapped as array subscript, array section or 8360 // array shaping. The base address is passed as a pointer to base in 8361 // this case and cannot be used as a base for use_device_ptr list 8362 // item. 8363 if (CI != Data.end()) { 8364 auto PrevCI = std::next(CI->Components.rbegin()); 8365 const auto *VarD = dyn_cast<VarDecl>(VD); 8366 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8367 isa<MemberExpr>(IE) || 8368 !VD->getType().getNonReferenceType()->isPointerType() || 8369 PrevCI == CI->Components.rend() || 8370 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8371 VarD->hasLocalStorage()) { 8372 CI->ReturnDevicePointer = true; 8373 Found = true; 8374 break; 8375 } 8376 } 8377 } 8378 if (Found) 8379 continue; 8380 } 8381 8382 // We didn't find any match in our map information - generate a zero 8383 // size array section - if the pointer is a struct member we defer this 8384 // action until the whole struct has been processed. 8385 if (isa<MemberExpr>(IE)) { 8386 // Insert the pointer into Info to be processed by 8387 // generateInfoForComponentList. Because it is a member pointer 8388 // without a pointee, no entry will be generated for it, therefore 8389 // we need to generate one after the whole struct has been processed. 8390 // Nonetheless, generateInfoForComponentList must be called to take 8391 // the pointer into account for the calculation of the range of the 8392 // partial struct. 8393 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8394 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8395 nullptr); 8396 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8397 } else { 8398 llvm::Value *Ptr = 8399 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8400 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8401 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8402 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8403 UseDevicePtrCombinedInfo.Sizes.push_back( 8404 llvm::Constant::getNullValue(CGF.Int64Ty)); 8405 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8406 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8407 } 8408 } 8409 } 8410 8411 // Look at the use_device_addr clause information and mark the existing map 8412 // entries as such. If there is no map information for an entry in the 8413 // use_device_addr list, we create one with map type 'alloc' and zero size 8414 // section. It is the user fault if that was not mapped before. If there is 8415 // no map information and the pointer is a struct member, then we defer the 8416 // emission of that entry until the whole struct has been processed. 8417 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8418 for (const auto *Cl : Clauses) { 8419 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8420 if (!C) 8421 continue; 8422 for (const auto L : C->component_lists()) { 8423 assert(!std::get<1>(L).empty() && 8424 "Not expecting empty list of components!"); 8425 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8426 if (!Processed.insert(VD).second) 8427 continue; 8428 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8429 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8430 // If the first component is a member expression, we have to look into 8431 // 'this', which maps to null in the map of map information. Otherwise 8432 // look directly for the information. 8433 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8434 8435 // We potentially have map information for this declaration already. 8436 // Look for the first set of components that refer to it. 8437 if (It != Info.end()) { 8438 bool Found = false; 8439 for (auto &Data : It->second) { 8440 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8441 return MI.Components.back().getAssociatedDeclaration() == VD; 8442 }); 8443 // If we found a map entry, signal that the pointer has to be 8444 // returned and move on to the next declaration. 8445 if (CI != Data.end()) { 8446 CI->ReturnDevicePointer = true; 8447 Found = true; 8448 break; 8449 } 8450 } 8451 if (Found) 8452 continue; 8453 } 8454 8455 // We didn't find any match in our map information - generate a zero 8456 // size array section - if the pointer is a struct member we defer this 8457 // action until the whole struct has been processed. 8458 if (isa<MemberExpr>(IE)) { 8459 // Insert the pointer into Info to be processed by 8460 // generateInfoForComponentList. Because it is a member pointer 8461 // without a pointee, no entry will be generated for it, therefore 8462 // we need to generate one after the whole struct has been processed. 8463 // Nonetheless, generateInfoForComponentList must be called to take 8464 // the pointer into account for the calculation of the range of the 8465 // partial struct. 8466 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8467 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8468 nullptr, nullptr, /*ForDeviceAddr=*/true); 8469 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8470 } else { 8471 llvm::Value *Ptr; 8472 if (IE->isGLValue()) 8473 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8474 else 8475 Ptr = CGF.EmitScalarExpr(IE); 8476 CombinedInfo.Exprs.push_back(VD); 8477 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8478 CombinedInfo.Pointers.push_back(Ptr); 8479 CombinedInfo.Sizes.push_back( 8480 llvm::Constant::getNullValue(CGF.Int64Ty)); 8481 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8482 CombinedInfo.Mappers.push_back(nullptr); 8483 } 8484 } 8485 } 8486 8487 for (const auto &Data : Info) { 8488 StructRangeInfoTy PartialStruct; 8489 // Temporary generated information. 8490 MapCombinedInfoTy CurInfo; 8491 const Decl *D = Data.first; 8492 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8493 for (const auto &M : Data.second) { 8494 for (const MapInfo &L : M) { 8495 assert(!L.Components.empty() && 8496 "Not expecting declaration with no component lists."); 8497 8498 // Remember the current base pointer index. 8499 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8500 CurInfo.NonContigInfo.IsNonContiguous = 8501 L.Components.back().isNonContiguous(); 8502 generateInfoForComponentList( 8503 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8504 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8505 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8506 8507 // If this entry relates with a device pointer, set the relevant 8508 // declaration and add the 'return pointer' flag. 8509 if (L.ReturnDevicePointer) { 8510 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8511 "Unexpected number of mapped base pointers."); 8512 8513 const ValueDecl *RelevantVD = 8514 L.Components.back().getAssociatedDeclaration(); 8515 assert(RelevantVD && 8516 "No relevant declaration related with device pointer??"); 8517 8518 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8519 RelevantVD); 8520 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8521 } 8522 } 8523 } 8524 8525 // Append any pending zero-length pointers which are struct members and 8526 // used with use_device_ptr or use_device_addr. 8527 auto CI = DeferredInfo.find(Data.first); 8528 if (CI != DeferredInfo.end()) { 8529 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8530 llvm::Value *BasePtr; 8531 llvm::Value *Ptr; 8532 if (L.ForDeviceAddr) { 8533 if (L.IE->isGLValue()) 8534 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8535 else 8536 Ptr = this->CGF.EmitScalarExpr(L.IE); 8537 BasePtr = Ptr; 8538 // Entry is RETURN_PARAM. Also, set the placeholder value 8539 // MEMBER_OF=FFFF so that the entry is later updated with the 8540 // correct value of MEMBER_OF. 8541 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8542 } else { 8543 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8544 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8545 L.IE->getExprLoc()); 8546 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8547 // placeholder value MEMBER_OF=FFFF so that the entry is later 8548 // updated with the correct value of MEMBER_OF. 8549 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8550 OMP_MAP_MEMBER_OF); 8551 } 8552 CurInfo.Exprs.push_back(L.VD); 8553 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8554 CurInfo.Pointers.push_back(Ptr); 8555 CurInfo.Sizes.push_back( 8556 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8557 CurInfo.Mappers.push_back(nullptr); 8558 } 8559 } 8560 // If there is an entry in PartialStruct it means we have a struct with 8561 // individual members mapped. Emit an extra combined entry. 8562 if (PartialStruct.Base.isValid()) { 8563 CurInfo.NonContigInfo.Dims.push_back(0); 8564 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8565 } 8566 8567 // We need to append the results of this capture to what we already 8568 // have. 8569 CombinedInfo.append(CurInfo); 8570 } 8571 // Append data for use_device_ptr clauses. 8572 CombinedInfo.append(UseDevicePtrCombinedInfo); 8573 } 8574 8575 public: 8576 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8577 : CurDir(&Dir), CGF(CGF) { 8578 // Extract firstprivate clause information. 8579 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8580 for (const auto *D : C->varlists()) 8581 FirstPrivateDecls.try_emplace( 8582 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8583 // Extract implicit firstprivates from uses_allocators clauses. 8584 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8585 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8586 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8587 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8588 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8589 /*Implicit=*/true); 8590 else if (const auto *VD = dyn_cast<VarDecl>( 8591 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8592 ->getDecl())) 8593 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8594 } 8595 } 8596 // Extract device pointer clause information. 8597 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8598 for (auto L : C->component_lists()) 8599 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8600 } 8601 8602 /// Constructor for the declare mapper directive. 8603 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8604 : CurDir(&Dir), CGF(CGF) {} 8605 8606 /// Generate code for the combined entry if we have a partially mapped struct 8607 /// and take care of the mapping flags of the arguments corresponding to 8608 /// individual struct members. 8609 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8610 MapFlagsArrayTy &CurTypes, 8611 const StructRangeInfoTy &PartialStruct, 8612 const ValueDecl *VD = nullptr, 8613 bool NotTargetParams = true) const { 8614 if (CurTypes.size() == 1 && 8615 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8616 !PartialStruct.IsArraySection) 8617 return; 8618 Address LBAddr = PartialStruct.LowestElem.second; 8619 Address HBAddr = PartialStruct.HighestElem.second; 8620 if (PartialStruct.HasCompleteRecord) { 8621 LBAddr = PartialStruct.LB; 8622 HBAddr = PartialStruct.LB; 8623 } 8624 CombinedInfo.Exprs.push_back(VD); 8625 // Base is the base of the struct 8626 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8627 // Pointer is the address of the lowest element 8628 llvm::Value *LB = LBAddr.getPointer(); 8629 CombinedInfo.Pointers.push_back(LB); 8630 // There should not be a mapper for a combined entry. 8631 CombinedInfo.Mappers.push_back(nullptr); 8632 // Size is (addr of {highest+1} element) - (addr of lowest element) 8633 llvm::Value *HB = HBAddr.getPointer(); 8634 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8635 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8636 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8637 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8638 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8639 /*isSigned=*/false); 8640 CombinedInfo.Sizes.push_back(Size); 8641 // Map type is always TARGET_PARAM, if generate info for captures. 8642 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8643 : OMP_MAP_TARGET_PARAM); 8644 // If any element has the present modifier, then make sure the runtime 8645 // doesn't attempt to allocate the struct. 8646 if (CurTypes.end() != 8647 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8648 return Type & OMP_MAP_PRESENT; 8649 })) 8650 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8651 // Remove TARGET_PARAM flag from the first element 8652 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8653 8654 // All other current entries will be MEMBER_OF the combined entry 8655 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8656 // 0xFFFF in the MEMBER_OF field). 8657 OpenMPOffloadMappingFlags MemberOfFlag = 8658 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8659 for (auto &M : CurTypes) 8660 setCorrectMemberOfFlag(M, MemberOfFlag); 8661 } 8662 8663 /// Generate all the base pointers, section pointers, sizes, map types, and 8664 /// mappers for the extracted mappable expressions (all included in \a 8665 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8666 /// pair of the relevant declaration and index where it occurs is appended to 8667 /// the device pointers info array. 8668 void generateAllInfo( 8669 MapCombinedInfoTy &CombinedInfo, 8670 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8671 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8672 assert(CurDir.is<const OMPExecutableDirective *>() && 8673 "Expect a executable directive"); 8674 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8675 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8676 } 8677 8678 /// Generate all the base pointers, section pointers, sizes, map types, and 8679 /// mappers for the extracted map clauses of user-defined mapper (all included 8680 /// in \a CombinedInfo). 8681 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8682 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8683 "Expect a declare mapper directive"); 8684 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8685 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8686 } 8687 8688 /// Emit capture info for lambdas for variables captured by reference. 8689 void generateInfoForLambdaCaptures( 8690 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8691 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8692 const auto *RD = VD->getType() 8693 .getCanonicalType() 8694 .getNonReferenceType() 8695 ->getAsCXXRecordDecl(); 8696 if (!RD || !RD->isLambda()) 8697 return; 8698 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8699 LValue VDLVal = CGF.MakeAddrLValue( 8700 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8701 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8702 FieldDecl *ThisCapture = nullptr; 8703 RD->getCaptureFields(Captures, ThisCapture); 8704 if (ThisCapture) { 8705 LValue ThisLVal = 8706 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8707 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8708 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8709 VDLVal.getPointer(CGF)); 8710 CombinedInfo.Exprs.push_back(VD); 8711 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8712 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8713 CombinedInfo.Sizes.push_back( 8714 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8715 CGF.Int64Ty, /*isSigned=*/true)); 8716 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8717 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8718 CombinedInfo.Mappers.push_back(nullptr); 8719 } 8720 for (const LambdaCapture &LC : RD->captures()) { 8721 if (!LC.capturesVariable()) 8722 continue; 8723 const VarDecl *VD = LC.getCapturedVar(); 8724 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8725 continue; 8726 auto It = Captures.find(VD); 8727 assert(It != Captures.end() && "Found lambda capture without field."); 8728 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8729 if (LC.getCaptureKind() == LCK_ByRef) { 8730 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8731 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8732 VDLVal.getPointer(CGF)); 8733 CombinedInfo.Exprs.push_back(VD); 8734 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8735 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8736 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8737 CGF.getTypeSize( 8738 VD->getType().getCanonicalType().getNonReferenceType()), 8739 CGF.Int64Ty, /*isSigned=*/true)); 8740 } else { 8741 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8742 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8743 VDLVal.getPointer(CGF)); 8744 CombinedInfo.Exprs.push_back(VD); 8745 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8746 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8747 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8748 } 8749 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8750 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8751 CombinedInfo.Mappers.push_back(nullptr); 8752 } 8753 } 8754 8755 /// Set correct indices for lambdas captures. 8756 void adjustMemberOfForLambdaCaptures( 8757 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8758 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8759 MapFlagsArrayTy &Types) const { 8760 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8761 // Set correct member_of idx for all implicit lambda captures. 8762 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8763 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8764 continue; 8765 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8766 assert(BasePtr && "Unable to find base lambda address."); 8767 int TgtIdx = -1; 8768 for (unsigned J = I; J > 0; --J) { 8769 unsigned Idx = J - 1; 8770 if (Pointers[Idx] != BasePtr) 8771 continue; 8772 TgtIdx = Idx; 8773 break; 8774 } 8775 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8776 // All other current entries will be MEMBER_OF the combined entry 8777 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8778 // 0xFFFF in the MEMBER_OF field). 8779 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8780 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8781 } 8782 } 8783 8784 /// Generate the base pointers, section pointers, sizes, map types, and 8785 /// mappers associated to a given capture (all included in \a CombinedInfo). 8786 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8787 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8788 StructRangeInfoTy &PartialStruct) const { 8789 assert(!Cap->capturesVariableArrayType() && 8790 "Not expecting to generate map info for a variable array type!"); 8791 8792 // We need to know when we generating information for the first component 8793 const ValueDecl *VD = Cap->capturesThis() 8794 ? nullptr 8795 : Cap->getCapturedVar()->getCanonicalDecl(); 8796 8797 // If this declaration appears in a is_device_ptr clause we just have to 8798 // pass the pointer by value. If it is a reference to a declaration, we just 8799 // pass its value. 8800 if (DevPointersMap.count(VD)) { 8801 CombinedInfo.Exprs.push_back(VD); 8802 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8803 CombinedInfo.Pointers.push_back(Arg); 8804 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8805 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8806 /*isSigned=*/true)); 8807 CombinedInfo.Types.push_back( 8808 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8809 OMP_MAP_TARGET_PARAM); 8810 CombinedInfo.Mappers.push_back(nullptr); 8811 return; 8812 } 8813 8814 using MapData = 8815 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8816 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8817 const ValueDecl *, const Expr *>; 8818 SmallVector<MapData, 4> DeclComponentLists; 8819 assert(CurDir.is<const OMPExecutableDirective *>() && 8820 "Expect a executable directive"); 8821 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8822 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8823 const auto *EI = C->getVarRefs().begin(); 8824 for (const auto L : C->decl_component_lists(VD)) { 8825 const ValueDecl *VDecl, *Mapper; 8826 // The Expression is not correct if the mapping is implicit 8827 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8828 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8829 std::tie(VDecl, Components, Mapper) = L; 8830 assert(VDecl == VD && "We got information for the wrong declaration??"); 8831 assert(!Components.empty() && 8832 "Not expecting declaration with no component lists."); 8833 DeclComponentLists.emplace_back(Components, C->getMapType(), 8834 C->getMapTypeModifiers(), 8835 C->isImplicit(), Mapper, E); 8836 ++EI; 8837 } 8838 } 8839 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8840 const MapData &RHS) { 8841 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8842 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8843 bool HasPresent = !MapModifiers.empty() && 8844 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8845 return K == clang::OMPC_MAP_MODIFIER_present; 8846 }); 8847 bool HasAllocs = MapType == OMPC_MAP_alloc; 8848 MapModifiers = std::get<2>(RHS); 8849 MapType = std::get<1>(LHS); 8850 bool HasPresentR = 8851 !MapModifiers.empty() && 8852 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8853 return K == clang::OMPC_MAP_MODIFIER_present; 8854 }); 8855 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8856 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8857 }); 8858 8859 // Find overlapping elements (including the offset from the base element). 8860 llvm::SmallDenseMap< 8861 const MapData *, 8862 llvm::SmallVector< 8863 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8864 4> 8865 OverlappedData; 8866 size_t Count = 0; 8867 for (const MapData &L : DeclComponentLists) { 8868 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8869 OpenMPMapClauseKind MapType; 8870 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8871 bool IsImplicit; 8872 const ValueDecl *Mapper; 8873 const Expr *VarRef; 8874 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8875 L; 8876 ++Count; 8877 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8878 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8879 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8880 VarRef) = L1; 8881 auto CI = Components.rbegin(); 8882 auto CE = Components.rend(); 8883 auto SI = Components1.rbegin(); 8884 auto SE = Components1.rend(); 8885 for (; CI != CE && SI != SE; ++CI, ++SI) { 8886 if (CI->getAssociatedExpression()->getStmtClass() != 8887 SI->getAssociatedExpression()->getStmtClass()) 8888 break; 8889 // Are we dealing with different variables/fields? 8890 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8891 break; 8892 } 8893 // Found overlapping if, at least for one component, reached the head 8894 // of the components list. 8895 if (CI == CE || SI == SE) { 8896 // Ignore it if it is the same component. 8897 if (CI == CE && SI == SE) 8898 continue; 8899 const auto It = (SI == SE) ? CI : SI; 8900 // If one component is a pointer and another one is a kind of 8901 // dereference of this pointer (array subscript, section, dereference, 8902 // etc.), it is not an overlapping. 8903 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8904 std::prev(It) 8905 ->getAssociatedExpression() 8906 ->getType() 8907 .getNonReferenceType() 8908 ->isPointerType()) 8909 continue; 8910 const MapData &BaseData = CI == CE ? L : L1; 8911 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8912 SI == SE ? Components : Components1; 8913 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8914 OverlappedElements.getSecond().push_back(SubData); 8915 } 8916 } 8917 } 8918 // Sort the overlapped elements for each item. 8919 llvm::SmallVector<const FieldDecl *, 4> Layout; 8920 if (!OverlappedData.empty()) { 8921 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8922 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8923 while (BaseType != OrigType) { 8924 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8925 OrigType = BaseType->getPointeeOrArrayElementType(); 8926 } 8927 8928 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8929 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8930 else { 8931 const auto *RD = BaseType->getAsRecordDecl(); 8932 Layout.append(RD->field_begin(), RD->field_end()); 8933 } 8934 } 8935 for (auto &Pair : OverlappedData) { 8936 llvm::stable_sort( 8937 Pair.getSecond(), 8938 [&Layout]( 8939 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8940 OMPClauseMappableExprCommon::MappableExprComponentListRef 8941 Second) { 8942 auto CI = First.rbegin(); 8943 auto CE = First.rend(); 8944 auto SI = Second.rbegin(); 8945 auto SE = Second.rend(); 8946 for (; CI != CE && SI != SE; ++CI, ++SI) { 8947 if (CI->getAssociatedExpression()->getStmtClass() != 8948 SI->getAssociatedExpression()->getStmtClass()) 8949 break; 8950 // Are we dealing with different variables/fields? 8951 if (CI->getAssociatedDeclaration() != 8952 SI->getAssociatedDeclaration()) 8953 break; 8954 } 8955 8956 // Lists contain the same elements. 8957 if (CI == CE && SI == SE) 8958 return false; 8959 8960 // List with less elements is less than list with more elements. 8961 if (CI == CE || SI == SE) 8962 return CI == CE; 8963 8964 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8965 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8966 if (FD1->getParent() == FD2->getParent()) 8967 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8968 const auto It = 8969 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8970 return FD == FD1 || FD == FD2; 8971 }); 8972 return *It == FD1; 8973 }); 8974 } 8975 8976 // Associated with a capture, because the mapping flags depend on it. 8977 // Go through all of the elements with the overlapped elements. 8978 bool IsFirstComponentList = true; 8979 for (const auto &Pair : OverlappedData) { 8980 const MapData &L = *Pair.getFirst(); 8981 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8982 OpenMPMapClauseKind MapType; 8983 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8984 bool IsImplicit; 8985 const ValueDecl *Mapper; 8986 const Expr *VarRef; 8987 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8988 L; 8989 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8990 OverlappedComponents = Pair.getSecond(); 8991 generateInfoForComponentList( 8992 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8993 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8994 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8995 IsFirstComponentList = false; 8996 } 8997 // Go through other elements without overlapped elements. 8998 for (const MapData &L : DeclComponentLists) { 8999 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9000 OpenMPMapClauseKind MapType; 9001 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9002 bool IsImplicit; 9003 const ValueDecl *Mapper; 9004 const Expr *VarRef; 9005 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9006 L; 9007 auto It = OverlappedData.find(&L); 9008 if (It == OverlappedData.end()) 9009 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9010 Components, CombinedInfo, PartialStruct, 9011 IsFirstComponentList, IsImplicit, Mapper, 9012 /*ForDeviceAddr=*/false, VD, VarRef); 9013 IsFirstComponentList = false; 9014 } 9015 } 9016 9017 /// Generate the default map information for a given capture \a CI, 9018 /// record field declaration \a RI and captured value \a CV. 9019 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9020 const FieldDecl &RI, llvm::Value *CV, 9021 MapCombinedInfoTy &CombinedInfo) const { 9022 bool IsImplicit = true; 9023 // Do the default mapping. 9024 if (CI.capturesThis()) { 9025 CombinedInfo.Exprs.push_back(nullptr); 9026 CombinedInfo.BasePointers.push_back(CV); 9027 CombinedInfo.Pointers.push_back(CV); 9028 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9029 CombinedInfo.Sizes.push_back( 9030 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9031 CGF.Int64Ty, /*isSigned=*/true)); 9032 // Default map type. 9033 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9034 } else if (CI.capturesVariableByCopy()) { 9035 const VarDecl *VD = CI.getCapturedVar(); 9036 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9037 CombinedInfo.BasePointers.push_back(CV); 9038 CombinedInfo.Pointers.push_back(CV); 9039 if (!RI.getType()->isAnyPointerType()) { 9040 // We have to signal to the runtime captures passed by value that are 9041 // not pointers. 9042 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9043 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9044 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9045 } else { 9046 // Pointers are implicitly mapped with a zero size and no flags 9047 // (other than first map that is added for all implicit maps). 9048 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9049 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9050 } 9051 auto I = FirstPrivateDecls.find(VD); 9052 if (I != FirstPrivateDecls.end()) 9053 IsImplicit = I->getSecond(); 9054 } else { 9055 assert(CI.capturesVariable() && "Expected captured reference."); 9056 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9057 QualType ElementType = PtrTy->getPointeeType(); 9058 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9059 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9060 // The default map type for a scalar/complex type is 'to' because by 9061 // default the value doesn't have to be retrieved. For an aggregate 9062 // type, the default is 'tofrom'. 9063 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9064 const VarDecl *VD = CI.getCapturedVar(); 9065 auto I = FirstPrivateDecls.find(VD); 9066 if (I != FirstPrivateDecls.end() && 9067 VD->getType().isConstant(CGF.getContext())) { 9068 llvm::Constant *Addr = 9069 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9070 // Copy the value of the original variable to the new global copy. 9071 CGF.Builder.CreateMemCpy( 9072 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9073 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9074 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9075 // Use new global variable as the base pointers. 9076 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9077 CombinedInfo.BasePointers.push_back(Addr); 9078 CombinedInfo.Pointers.push_back(Addr); 9079 } else { 9080 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9081 CombinedInfo.BasePointers.push_back(CV); 9082 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9083 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9084 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9085 AlignmentSource::Decl)); 9086 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9087 } else { 9088 CombinedInfo.Pointers.push_back(CV); 9089 } 9090 } 9091 if (I != FirstPrivateDecls.end()) 9092 IsImplicit = I->getSecond(); 9093 } 9094 // Every default map produces a single argument which is a target parameter. 9095 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9096 9097 // Add flag stating this is an implicit map. 9098 if (IsImplicit) 9099 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9100 9101 // No user-defined mapper for default mapping. 9102 CombinedInfo.Mappers.push_back(nullptr); 9103 } 9104 }; 9105 } // anonymous namespace 9106 9107 static void emitNonContiguousDescriptor( 9108 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9109 CGOpenMPRuntime::TargetDataInfo &Info) { 9110 CodeGenModule &CGM = CGF.CGM; 9111 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9112 &NonContigInfo = CombinedInfo.NonContigInfo; 9113 9114 // Build an array of struct descriptor_dim and then assign it to 9115 // offload_args. 9116 // 9117 // struct descriptor_dim { 9118 // uint64_t offset; 9119 // uint64_t count; 9120 // uint64_t stride 9121 // }; 9122 ASTContext &C = CGF.getContext(); 9123 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9124 RecordDecl *RD; 9125 RD = C.buildImplicitRecord("descriptor_dim"); 9126 RD->startDefinition(); 9127 addFieldToRecordDecl(C, RD, Int64Ty); 9128 addFieldToRecordDecl(C, RD, Int64Ty); 9129 addFieldToRecordDecl(C, RD, Int64Ty); 9130 RD->completeDefinition(); 9131 QualType DimTy = C.getRecordType(RD); 9132 9133 enum { OffsetFD = 0, CountFD, StrideFD }; 9134 // We need two index variable here since the size of "Dims" is the same as the 9135 // size of Components, however, the size of offset, count, and stride is equal 9136 // to the size of base declaration that is non-contiguous. 9137 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9138 // Skip emitting ir if dimension size is 1 since it cannot be 9139 // non-contiguous. 9140 if (NonContigInfo.Dims[I] == 1) 9141 continue; 9142 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9143 QualType ArrayTy = 9144 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9145 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9146 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9147 unsigned RevIdx = EE - II - 1; 9148 LValue DimsLVal = CGF.MakeAddrLValue( 9149 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9150 // Offset 9151 LValue OffsetLVal = CGF.EmitLValueForField( 9152 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9153 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9154 // Count 9155 LValue CountLVal = CGF.EmitLValueForField( 9156 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9157 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9158 // Stride 9159 LValue StrideLVal = CGF.EmitLValueForField( 9160 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9161 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9162 } 9163 // args[I] = &dims 9164 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9165 DimsAddr, CGM.Int8PtrTy); 9166 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9167 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9168 Info.PointersArray, 0, I); 9169 Address PAddr(P, CGF.getPointerAlign()); 9170 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9171 ++L; 9172 } 9173 } 9174 9175 /// Emit a string constant containing the names of the values mapped to the 9176 /// offloading runtime library. 9177 llvm::Constant * 9178 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9179 MappableExprsHandler::MappingExprInfo &MapExprs) { 9180 llvm::Constant *SrcLocStr; 9181 if (!MapExprs.getMapDecl()) { 9182 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9183 } else { 9184 std::string ExprName = ""; 9185 if (MapExprs.getMapExpr()) { 9186 PrintingPolicy P(CGF.getContext().getLangOpts()); 9187 llvm::raw_string_ostream OS(ExprName); 9188 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9189 OS.flush(); 9190 } else { 9191 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9192 } 9193 9194 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9195 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9196 const char *FileName = PLoc.getFilename(); 9197 unsigned Line = PLoc.getLine(); 9198 unsigned Column = PLoc.getColumn(); 9199 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9200 Line, Column); 9201 } 9202 9203 return SrcLocStr; 9204 } 9205 9206 /// Emit the arrays used to pass the captures and map information to the 9207 /// offloading runtime library. If there is no map or capture information, 9208 /// return nullptr by reference. 9209 static void emitOffloadingArrays( 9210 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9211 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9212 bool IsNonContiguous = false) { 9213 CodeGenModule &CGM = CGF.CGM; 9214 ASTContext &Ctx = CGF.getContext(); 9215 9216 // Reset the array information. 9217 Info.clearArrayInfo(); 9218 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9219 9220 if (Info.NumberOfPtrs) { 9221 // Detect if we have any capture size requiring runtime evaluation of the 9222 // size so that a constant array could be eventually used. 9223 bool hasRuntimeEvaluationCaptureSize = false; 9224 for (llvm::Value *S : CombinedInfo.Sizes) 9225 if (!isa<llvm::Constant>(S)) { 9226 hasRuntimeEvaluationCaptureSize = true; 9227 break; 9228 } 9229 9230 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9231 QualType PointerArrayType = Ctx.getConstantArrayType( 9232 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9233 /*IndexTypeQuals=*/0); 9234 9235 Info.BasePointersArray = 9236 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9237 Info.PointersArray = 9238 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9239 Address MappersArray = 9240 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9241 Info.MappersArray = MappersArray.getPointer(); 9242 9243 // If we don't have any VLA types or other types that require runtime 9244 // evaluation, we can use a constant array for the map sizes, otherwise we 9245 // need to fill up the arrays as we do for the pointers. 9246 QualType Int64Ty = 9247 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9248 if (hasRuntimeEvaluationCaptureSize) { 9249 QualType SizeArrayType = Ctx.getConstantArrayType( 9250 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9251 /*IndexTypeQuals=*/0); 9252 Info.SizesArray = 9253 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9254 } else { 9255 // We expect all the sizes to be constant, so we collect them to create 9256 // a constant array. 9257 SmallVector<llvm::Constant *, 16> ConstSizes; 9258 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9259 if (IsNonContiguous && 9260 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9261 ConstSizes.push_back(llvm::ConstantInt::get( 9262 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9263 } else { 9264 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9265 } 9266 } 9267 9268 auto *SizesArrayInit = llvm::ConstantArray::get( 9269 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9270 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9271 auto *SizesArrayGbl = new llvm::GlobalVariable( 9272 CGM.getModule(), SizesArrayInit->getType(), 9273 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9274 SizesArrayInit, Name); 9275 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9276 Info.SizesArray = SizesArrayGbl; 9277 } 9278 9279 // The map types are always constant so we don't need to generate code to 9280 // fill arrays. Instead, we create an array constant. 9281 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9282 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9283 llvm::Constant *MapTypesArrayInit = 9284 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9285 std::string MaptypesName = 9286 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9287 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9288 CGM.getModule(), MapTypesArrayInit->getType(), 9289 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9290 MapTypesArrayInit, MaptypesName); 9291 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9292 Info.MapTypesArray = MapTypesArrayGbl; 9293 9294 // The information types are only built if there is debug information 9295 // requested. 9296 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9297 Info.MapNamesArray = llvm::Constant::getNullValue( 9298 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9299 } else { 9300 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9301 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9302 }; 9303 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9304 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9305 9306 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9307 llvm::ArrayType::get( 9308 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9309 CombinedInfo.Exprs.size()), 9310 InfoMap); 9311 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9312 CGM.getModule(), MapNamesArrayInit->getType(), 9313 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9314 MapNamesArrayInit, 9315 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9316 Info.MapNamesArray = MapNamesArrayGbl; 9317 } 9318 9319 // If there's a present map type modifier, it must not be applied to the end 9320 // of a region, so generate a separate map type array in that case. 9321 if (Info.separateBeginEndCalls()) { 9322 bool EndMapTypesDiffer = false; 9323 for (uint64_t &Type : Mapping) { 9324 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9325 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9326 EndMapTypesDiffer = true; 9327 } 9328 } 9329 if (EndMapTypesDiffer) { 9330 MapTypesArrayInit = 9331 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9332 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9333 MapTypesArrayGbl = new llvm::GlobalVariable( 9334 CGM.getModule(), MapTypesArrayInit->getType(), 9335 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9336 MapTypesArrayInit, MaptypesName); 9337 MapTypesArrayGbl->setUnnamedAddr( 9338 llvm::GlobalValue::UnnamedAddr::Global); 9339 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9340 } 9341 } 9342 9343 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9344 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9345 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9346 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9347 Info.BasePointersArray, 0, I); 9348 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9349 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9350 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9351 CGF.Builder.CreateStore(BPVal, BPAddr); 9352 9353 if (Info.requiresDevicePointerInfo()) 9354 if (const ValueDecl *DevVD = 9355 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9356 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9357 9358 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9359 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9360 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9361 Info.PointersArray, 0, I); 9362 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9363 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9364 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9365 CGF.Builder.CreateStore(PVal, PAddr); 9366 9367 if (hasRuntimeEvaluationCaptureSize) { 9368 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9369 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9370 Info.SizesArray, 9371 /*Idx0=*/0, 9372 /*Idx1=*/I); 9373 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9374 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9375 CGM.Int64Ty, 9376 /*isSigned=*/true), 9377 SAddr); 9378 } 9379 9380 // Fill up the mapper array. 9381 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9382 if (CombinedInfo.Mappers[I]) { 9383 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9384 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9385 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9386 Info.HasMapper = true; 9387 } 9388 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9389 CGF.Builder.CreateStore(MFunc, MAddr); 9390 } 9391 } 9392 9393 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9394 Info.NumberOfPtrs == 0) 9395 return; 9396 9397 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9398 } 9399 9400 namespace { 9401 /// Additional arguments for emitOffloadingArraysArgument function. 9402 struct ArgumentsOptions { 9403 bool ForEndCall = false; 9404 ArgumentsOptions() = default; 9405 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9406 }; 9407 } // namespace 9408 9409 /// Emit the arguments to be passed to the runtime library based on the 9410 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9411 /// ForEndCall, emit map types to be passed for the end of the region instead of 9412 /// the beginning. 9413 static void emitOffloadingArraysArgument( 9414 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9415 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9416 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9417 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9418 const ArgumentsOptions &Options = ArgumentsOptions()) { 9419 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9420 "expected region end call to runtime only when end call is separate"); 9421 CodeGenModule &CGM = CGF.CGM; 9422 if (Info.NumberOfPtrs) { 9423 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9424 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9425 Info.BasePointersArray, 9426 /*Idx0=*/0, /*Idx1=*/0); 9427 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9428 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9429 Info.PointersArray, 9430 /*Idx0=*/0, 9431 /*Idx1=*/0); 9432 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9433 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9434 /*Idx0=*/0, /*Idx1=*/0); 9435 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9436 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9437 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9438 : Info.MapTypesArray, 9439 /*Idx0=*/0, 9440 /*Idx1=*/0); 9441 9442 // Only emit the mapper information arrays if debug information is 9443 // requested. 9444 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9445 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9446 else 9447 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9448 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9449 Info.MapNamesArray, 9450 /*Idx0=*/0, 9451 /*Idx1=*/0); 9452 // If there is no user-defined mapper, set the mapper array to nullptr to 9453 // avoid an unnecessary data privatization 9454 if (!Info.HasMapper) 9455 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9456 else 9457 MappersArrayArg = 9458 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9459 } else { 9460 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9461 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9462 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9463 MapTypesArrayArg = 9464 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9465 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9466 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9467 } 9468 } 9469 9470 /// Check for inner distribute directive. 9471 static const OMPExecutableDirective * 9472 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9473 const auto *CS = D.getInnermostCapturedStmt(); 9474 const auto *Body = 9475 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9476 const Stmt *ChildStmt = 9477 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9478 9479 if (const auto *NestedDir = 9480 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9481 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9482 switch (D.getDirectiveKind()) { 9483 case OMPD_target: 9484 if (isOpenMPDistributeDirective(DKind)) 9485 return NestedDir; 9486 if (DKind == OMPD_teams) { 9487 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9488 /*IgnoreCaptured=*/true); 9489 if (!Body) 9490 return nullptr; 9491 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9492 if (const auto *NND = 9493 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9494 DKind = NND->getDirectiveKind(); 9495 if (isOpenMPDistributeDirective(DKind)) 9496 return NND; 9497 } 9498 } 9499 return nullptr; 9500 case OMPD_target_teams: 9501 if (isOpenMPDistributeDirective(DKind)) 9502 return NestedDir; 9503 return nullptr; 9504 case OMPD_target_parallel: 9505 case OMPD_target_simd: 9506 case OMPD_target_parallel_for: 9507 case OMPD_target_parallel_for_simd: 9508 return nullptr; 9509 case OMPD_target_teams_distribute: 9510 case OMPD_target_teams_distribute_simd: 9511 case OMPD_target_teams_distribute_parallel_for: 9512 case OMPD_target_teams_distribute_parallel_for_simd: 9513 case OMPD_parallel: 9514 case OMPD_for: 9515 case OMPD_parallel_for: 9516 case OMPD_parallel_master: 9517 case OMPD_parallel_sections: 9518 case OMPD_for_simd: 9519 case OMPD_parallel_for_simd: 9520 case OMPD_cancel: 9521 case OMPD_cancellation_point: 9522 case OMPD_ordered: 9523 case OMPD_threadprivate: 9524 case OMPD_allocate: 9525 case OMPD_task: 9526 case OMPD_simd: 9527 case OMPD_tile: 9528 case OMPD_sections: 9529 case OMPD_section: 9530 case OMPD_single: 9531 case OMPD_master: 9532 case OMPD_critical: 9533 case OMPD_taskyield: 9534 case OMPD_barrier: 9535 case OMPD_taskwait: 9536 case OMPD_taskgroup: 9537 case OMPD_atomic: 9538 case OMPD_flush: 9539 case OMPD_depobj: 9540 case OMPD_scan: 9541 case OMPD_teams: 9542 case OMPD_target_data: 9543 case OMPD_target_exit_data: 9544 case OMPD_target_enter_data: 9545 case OMPD_distribute: 9546 case OMPD_distribute_simd: 9547 case OMPD_distribute_parallel_for: 9548 case OMPD_distribute_parallel_for_simd: 9549 case OMPD_teams_distribute: 9550 case OMPD_teams_distribute_simd: 9551 case OMPD_teams_distribute_parallel_for: 9552 case OMPD_teams_distribute_parallel_for_simd: 9553 case OMPD_target_update: 9554 case OMPD_declare_simd: 9555 case OMPD_declare_variant: 9556 case OMPD_begin_declare_variant: 9557 case OMPD_end_declare_variant: 9558 case OMPD_declare_target: 9559 case OMPD_end_declare_target: 9560 case OMPD_declare_reduction: 9561 case OMPD_declare_mapper: 9562 case OMPD_taskloop: 9563 case OMPD_taskloop_simd: 9564 case OMPD_master_taskloop: 9565 case OMPD_master_taskloop_simd: 9566 case OMPD_parallel_master_taskloop: 9567 case OMPD_parallel_master_taskloop_simd: 9568 case OMPD_requires: 9569 case OMPD_unknown: 9570 default: 9571 llvm_unreachable("Unexpected directive."); 9572 } 9573 } 9574 9575 return nullptr; 9576 } 9577 9578 /// Emit the user-defined mapper function. The code generation follows the 9579 /// pattern in the example below. 9580 /// \code 9581 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9582 /// void *base, void *begin, 9583 /// int64_t size, int64_t type, 9584 /// void *name = nullptr) { 9585 /// // Allocate space for an array section first or add a base/begin for 9586 /// // pointer dereference. 9587 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9588 /// !maptype.IsDelete) 9589 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9590 /// size*sizeof(Ty), clearToFromMember(type)); 9591 /// // Map members. 9592 /// for (unsigned i = 0; i < size; i++) { 9593 /// // For each component specified by this mapper: 9594 /// for (auto c : begin[i]->all_components) { 9595 /// if (c.hasMapper()) 9596 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9597 /// c.arg_type, c.arg_name); 9598 /// else 9599 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9600 /// c.arg_begin, c.arg_size, c.arg_type, 9601 /// c.arg_name); 9602 /// } 9603 /// } 9604 /// // Delete the array section. 9605 /// if (size > 1 && maptype.IsDelete) 9606 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9607 /// size*sizeof(Ty), clearToFromMember(type)); 9608 /// } 9609 /// \endcode 9610 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9611 CodeGenFunction *CGF) { 9612 if (UDMMap.count(D) > 0) 9613 return; 9614 ASTContext &C = CGM.getContext(); 9615 QualType Ty = D->getType(); 9616 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9617 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9618 auto *MapperVarDecl = 9619 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9620 SourceLocation Loc = D->getLocation(); 9621 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9622 9623 // Prepare mapper function arguments and attributes. 9624 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9625 C.VoidPtrTy, ImplicitParamDecl::Other); 9626 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9627 ImplicitParamDecl::Other); 9628 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9629 C.VoidPtrTy, ImplicitParamDecl::Other); 9630 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9631 ImplicitParamDecl::Other); 9632 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9633 ImplicitParamDecl::Other); 9634 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9635 ImplicitParamDecl::Other); 9636 FunctionArgList Args; 9637 Args.push_back(&HandleArg); 9638 Args.push_back(&BaseArg); 9639 Args.push_back(&BeginArg); 9640 Args.push_back(&SizeArg); 9641 Args.push_back(&TypeArg); 9642 Args.push_back(&NameArg); 9643 const CGFunctionInfo &FnInfo = 9644 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9645 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9646 SmallString<64> TyStr; 9647 llvm::raw_svector_ostream Out(TyStr); 9648 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9649 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9650 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9651 Name, &CGM.getModule()); 9652 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9653 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9654 // Start the mapper function code generation. 9655 CodeGenFunction MapperCGF(CGM); 9656 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9657 // Compute the starting and end addresses of array elements. 9658 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9659 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9660 C.getPointerType(Int64Ty), Loc); 9661 // Prepare common arguments for array initiation and deletion. 9662 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9663 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9664 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9665 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9666 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9667 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9668 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9669 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9670 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9671 // Convert the size in bytes into the number of array elements. 9672 Size = MapperCGF.Builder.CreateExactUDiv( 9673 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9674 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9675 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9676 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9677 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9678 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9679 C.getPointerType(Int64Ty), Loc); 9680 9681 // Emit array initiation if this is an array section and \p MapType indicates 9682 // that memory allocation is required. 9683 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9684 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9685 ElementSize, HeadBB, /*IsInit=*/true); 9686 9687 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9688 9689 // Emit the loop header block. 9690 MapperCGF.EmitBlock(HeadBB); 9691 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9692 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9693 // Evaluate whether the initial condition is satisfied. 9694 llvm::Value *IsEmpty = 9695 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9696 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9697 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9698 9699 // Emit the loop body block. 9700 MapperCGF.EmitBlock(BodyBB); 9701 llvm::BasicBlock *LastBB = BodyBB; 9702 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9703 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9704 PtrPHI->addIncoming(PtrBegin, EntryBB); 9705 Address PtrCurrent = 9706 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9707 .getAlignment() 9708 .alignmentOfArrayElement(ElementSize)); 9709 // Privatize the declared variable of mapper to be the current array element. 9710 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9711 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9712 (void)Scope.Privatize(); 9713 9714 // Get map clause information. Fill up the arrays with all mapped variables. 9715 MappableExprsHandler::MapCombinedInfoTy Info; 9716 MappableExprsHandler MEHandler(*D, MapperCGF); 9717 MEHandler.generateAllInfoForMapper(Info); 9718 9719 // Call the runtime API __tgt_mapper_num_components to get the number of 9720 // pre-existing components. 9721 llvm::Value *OffloadingArgs[] = {Handle}; 9722 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9723 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9724 OMPRTL___tgt_mapper_num_components), 9725 OffloadingArgs); 9726 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9727 PreviousSize, 9728 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9729 9730 // Fill up the runtime mapper handle for all components. 9731 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9732 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9733 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9734 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9735 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9736 llvm::Value *CurSizeArg = Info.Sizes[I]; 9737 llvm::Value *CurNameArg = 9738 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9739 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9740 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9741 9742 // Extract the MEMBER_OF field from the map type. 9743 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9744 llvm::Value *MemberMapType = 9745 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9746 9747 // Combine the map type inherited from user-defined mapper with that 9748 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9749 // bits of the \a MapType, which is the input argument of the mapper 9750 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9751 // bits of MemberMapType. 9752 // [OpenMP 5.0], 1.2.6. map-type decay. 9753 // | alloc | to | from | tofrom | release | delete 9754 // ---------------------------------------------------------- 9755 // alloc | alloc | alloc | alloc | alloc | release | delete 9756 // to | alloc | to | alloc | to | release | delete 9757 // from | alloc | alloc | from | from | release | delete 9758 // tofrom | alloc | to | from | tofrom | release | delete 9759 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9760 MapType, 9761 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9762 MappableExprsHandler::OMP_MAP_FROM)); 9763 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9764 llvm::BasicBlock *AllocElseBB = 9765 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9766 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9767 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9768 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9769 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9770 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9771 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9772 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9773 MapperCGF.EmitBlock(AllocBB); 9774 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9775 MemberMapType, 9776 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9777 MappableExprsHandler::OMP_MAP_FROM))); 9778 MapperCGF.Builder.CreateBr(EndBB); 9779 MapperCGF.EmitBlock(AllocElseBB); 9780 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9781 LeftToFrom, 9782 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9783 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9784 // In case of to, clear OMP_MAP_FROM. 9785 MapperCGF.EmitBlock(ToBB); 9786 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9787 MemberMapType, 9788 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9789 MapperCGF.Builder.CreateBr(EndBB); 9790 MapperCGF.EmitBlock(ToElseBB); 9791 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9792 LeftToFrom, 9793 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9794 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9795 // In case of from, clear OMP_MAP_TO. 9796 MapperCGF.EmitBlock(FromBB); 9797 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9798 MemberMapType, 9799 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9800 // In case of tofrom, do nothing. 9801 MapperCGF.EmitBlock(EndBB); 9802 LastBB = EndBB; 9803 llvm::PHINode *CurMapType = 9804 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9805 CurMapType->addIncoming(AllocMapType, AllocBB); 9806 CurMapType->addIncoming(ToMapType, ToBB); 9807 CurMapType->addIncoming(FromMapType, FromBB); 9808 CurMapType->addIncoming(MemberMapType, ToElseBB); 9809 9810 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9811 CurSizeArg, CurMapType, CurNameArg}; 9812 if (Info.Mappers[I]) { 9813 // Call the corresponding mapper function. 9814 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9815 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9816 assert(MapperFunc && "Expect a valid mapper function is available."); 9817 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9818 } else { 9819 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9820 // data structure. 9821 MapperCGF.EmitRuntimeCall( 9822 OMPBuilder.getOrCreateRuntimeFunction( 9823 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9824 OffloadingArgs); 9825 } 9826 } 9827 9828 // Update the pointer to point to the next element that needs to be mapped, 9829 // and check whether we have mapped all elements. 9830 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9831 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9832 PtrPHI->addIncoming(PtrNext, LastBB); 9833 llvm::Value *IsDone = 9834 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9835 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9836 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9837 9838 MapperCGF.EmitBlock(ExitBB); 9839 // Emit array deletion if this is an array section and \p MapType indicates 9840 // that deletion is required. 9841 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9842 ElementSize, DoneBB, /*IsInit=*/false); 9843 9844 // Emit the function exit block. 9845 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9846 MapperCGF.FinishFunction(); 9847 UDMMap.try_emplace(D, Fn); 9848 if (CGF) { 9849 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9850 Decls.second.push_back(D); 9851 } 9852 } 9853 9854 /// Emit the array initialization or deletion portion for user-defined mapper 9855 /// code generation. First, it evaluates whether an array section is mapped and 9856 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9857 /// true, and \a MapType indicates to not delete this array, array 9858 /// initialization code is generated. If \a IsInit is false, and \a MapType 9859 /// indicates to not this array, array deletion code is generated. 9860 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9861 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9862 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9863 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9864 StringRef Prefix = IsInit ? ".init" : ".del"; 9865 9866 // Evaluate if this is an array section. 9867 llvm::BasicBlock *BodyBB = 9868 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9869 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9870 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9871 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9872 MapType, 9873 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9874 llvm::Value *DeleteCond; 9875 llvm::Value *Cond; 9876 if (IsInit) { 9877 // base != begin? 9878 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9879 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9880 // IsPtrAndObj? 9881 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9882 MapType, 9883 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9884 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9885 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9886 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9887 DeleteCond = MapperCGF.Builder.CreateIsNull( 9888 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9889 } else { 9890 Cond = IsArray; 9891 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9892 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9893 } 9894 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9895 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9896 9897 MapperCGF.EmitBlock(BodyBB); 9898 // Get the array size by multiplying element size and element number (i.e., \p 9899 // Size). 9900 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9901 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9902 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9903 // memory allocation/deletion purpose only. 9904 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9905 MapType, 9906 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9907 MappableExprsHandler::OMP_MAP_FROM | 9908 MappableExprsHandler::OMP_MAP_MEMBER_OF))); 9909 llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9910 9911 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9912 // data structure. 9913 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9914 ArraySize, MapTypeArg, MapNameArg}; 9915 MapperCGF.EmitRuntimeCall( 9916 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9917 OMPRTL___tgt_push_mapper_component), 9918 OffloadingArgs); 9919 } 9920 9921 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9922 const OMPDeclareMapperDecl *D) { 9923 auto I = UDMMap.find(D); 9924 if (I != UDMMap.end()) 9925 return I->second; 9926 emitUserDefinedMapper(D); 9927 return UDMMap.lookup(D); 9928 } 9929 9930 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9931 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9932 llvm::Value *DeviceID, 9933 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9934 const OMPLoopDirective &D)> 9935 SizeEmitter) { 9936 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9937 const OMPExecutableDirective *TD = &D; 9938 // Get nested teams distribute kind directive, if any. 9939 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9940 TD = getNestedDistributeDirective(CGM.getContext(), D); 9941 if (!TD) 9942 return; 9943 const auto *LD = cast<OMPLoopDirective>(TD); 9944 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 9945 PrePostActionTy &) { 9946 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9947 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9948 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 9949 CGF.EmitRuntimeCall( 9950 OMPBuilder.getOrCreateRuntimeFunction( 9951 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9952 Args); 9953 } 9954 }; 9955 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9956 } 9957 9958 void CGOpenMPRuntime::emitTargetCall( 9959 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9960 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9961 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9962 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9963 const OMPLoopDirective &D)> 9964 SizeEmitter) { 9965 if (!CGF.HaveInsertPoint()) 9966 return; 9967 9968 assert(OutlinedFn && "Invalid outlined function!"); 9969 9970 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9971 D.hasClausesOfKind<OMPNowaitClause>(); 9972 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9973 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9974 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9975 PrePostActionTy &) { 9976 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9977 }; 9978 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9979 9980 CodeGenFunction::OMPTargetDataInfo InputInfo; 9981 llvm::Value *MapTypesArray = nullptr; 9982 llvm::Value *MapNamesArray = nullptr; 9983 // Fill up the pointer arrays and transfer execution to the device. 9984 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9985 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 9986 &CapturedVars, 9987 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9988 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9989 // Reverse offloading is not supported, so just execute on the host. 9990 if (RequiresOuterTask) { 9991 CapturedVars.clear(); 9992 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9993 } 9994 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9995 return; 9996 } 9997 9998 // On top of the arrays that were filled up, the target offloading call 9999 // takes as arguments the device id as well as the host pointer. The host 10000 // pointer is used by the runtime library to identify the current target 10001 // region, so it only has to be unique and not necessarily point to 10002 // anything. It could be the pointer to the outlined function that 10003 // implements the target region, but we aren't using that so that the 10004 // compiler doesn't need to keep that, and could therefore inline the host 10005 // function if proven worthwhile during optimization. 10006 10007 // From this point on, we need to have an ID of the target region defined. 10008 assert(OutlinedFnID && "Invalid outlined function ID!"); 10009 10010 // Emit device ID if any. 10011 llvm::Value *DeviceID; 10012 if (Device.getPointer()) { 10013 assert((Device.getInt() == OMPC_DEVICE_unknown || 10014 Device.getInt() == OMPC_DEVICE_device_num) && 10015 "Expected device_num modifier."); 10016 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10017 DeviceID = 10018 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10019 } else { 10020 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10021 } 10022 10023 // Emit the number of elements in the offloading arrays. 10024 llvm::Value *PointerNum = 10025 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10026 10027 // Return value of the runtime offloading call. 10028 llvm::Value *Return; 10029 10030 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10031 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10032 10033 // Source location for the ident struct 10034 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10035 10036 // Emit tripcount for the target loop-based directive. 10037 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10038 10039 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10040 // The target region is an outlined function launched by the runtime 10041 // via calls __tgt_target() or __tgt_target_teams(). 10042 // 10043 // __tgt_target() launches a target region with one team and one thread, 10044 // executing a serial region. This master thread may in turn launch 10045 // more threads within its team upon encountering a parallel region, 10046 // however, no additional teams can be launched on the device. 10047 // 10048 // __tgt_target_teams() launches a target region with one or more teams, 10049 // each with one or more threads. This call is required for target 10050 // constructs such as: 10051 // 'target teams' 10052 // 'target' / 'teams' 10053 // 'target teams distribute parallel for' 10054 // 'target parallel' 10055 // and so on. 10056 // 10057 // Note that on the host and CPU targets, the runtime implementation of 10058 // these calls simply call the outlined function without forking threads. 10059 // The outlined functions themselves have runtime calls to 10060 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10061 // the compiler in emitTeamsCall() and emitParallelCall(). 10062 // 10063 // In contrast, on the NVPTX target, the implementation of 10064 // __tgt_target_teams() launches a GPU kernel with the requested number 10065 // of teams and threads so no additional calls to the runtime are required. 10066 if (NumTeams) { 10067 // If we have NumTeams defined this means that we have an enclosed teams 10068 // region. Therefore we also expect to have NumThreads defined. These two 10069 // values should be defined in the presence of a teams directive, 10070 // regardless of having any clauses associated. If the user is using teams 10071 // but no clauses, these two values will be the default that should be 10072 // passed to the runtime library - a 32-bit integer with the value zero. 10073 assert(NumThreads && "Thread limit expression should be available along " 10074 "with number of teams."); 10075 llvm::Value *OffloadingArgs[] = {RTLoc, 10076 DeviceID, 10077 OutlinedFnID, 10078 PointerNum, 10079 InputInfo.BasePointersArray.getPointer(), 10080 InputInfo.PointersArray.getPointer(), 10081 InputInfo.SizesArray.getPointer(), 10082 MapTypesArray, 10083 MapNamesArray, 10084 InputInfo.MappersArray.getPointer(), 10085 NumTeams, 10086 NumThreads}; 10087 Return = CGF.EmitRuntimeCall( 10088 OMPBuilder.getOrCreateRuntimeFunction( 10089 CGM.getModule(), HasNowait 10090 ? OMPRTL___tgt_target_teams_nowait_mapper 10091 : OMPRTL___tgt_target_teams_mapper), 10092 OffloadingArgs); 10093 } else { 10094 llvm::Value *OffloadingArgs[] = {RTLoc, 10095 DeviceID, 10096 OutlinedFnID, 10097 PointerNum, 10098 InputInfo.BasePointersArray.getPointer(), 10099 InputInfo.PointersArray.getPointer(), 10100 InputInfo.SizesArray.getPointer(), 10101 MapTypesArray, 10102 MapNamesArray, 10103 InputInfo.MappersArray.getPointer()}; 10104 Return = CGF.EmitRuntimeCall( 10105 OMPBuilder.getOrCreateRuntimeFunction( 10106 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10107 : OMPRTL___tgt_target_mapper), 10108 OffloadingArgs); 10109 } 10110 10111 // Check the error code and execute the host version if required. 10112 llvm::BasicBlock *OffloadFailedBlock = 10113 CGF.createBasicBlock("omp_offload.failed"); 10114 llvm::BasicBlock *OffloadContBlock = 10115 CGF.createBasicBlock("omp_offload.cont"); 10116 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10117 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10118 10119 CGF.EmitBlock(OffloadFailedBlock); 10120 if (RequiresOuterTask) { 10121 CapturedVars.clear(); 10122 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10123 } 10124 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10125 CGF.EmitBranch(OffloadContBlock); 10126 10127 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10128 }; 10129 10130 // Notify that the host version must be executed. 10131 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10132 RequiresOuterTask](CodeGenFunction &CGF, 10133 PrePostActionTy &) { 10134 if (RequiresOuterTask) { 10135 CapturedVars.clear(); 10136 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10137 } 10138 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10139 }; 10140 10141 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10142 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10143 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10144 // Fill up the arrays with all the captured variables. 10145 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10146 10147 // Get mappable expression information. 10148 MappableExprsHandler MEHandler(D, CGF); 10149 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10150 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10151 10152 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10153 auto *CV = CapturedVars.begin(); 10154 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10155 CE = CS.capture_end(); 10156 CI != CE; ++CI, ++RI, ++CV) { 10157 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10158 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10159 10160 // VLA sizes are passed to the outlined region by copy and do not have map 10161 // information associated. 10162 if (CI->capturesVariableArrayType()) { 10163 CurInfo.Exprs.push_back(nullptr); 10164 CurInfo.BasePointers.push_back(*CV); 10165 CurInfo.Pointers.push_back(*CV); 10166 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10167 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10168 // Copy to the device as an argument. No need to retrieve it. 10169 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10170 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10171 MappableExprsHandler::OMP_MAP_IMPLICIT); 10172 CurInfo.Mappers.push_back(nullptr); 10173 } else { 10174 // If we have any information in the map clause, we use it, otherwise we 10175 // just do a default mapping. 10176 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10177 if (!CI->capturesThis()) 10178 MappedVarSet.insert(CI->getCapturedVar()); 10179 else 10180 MappedVarSet.insert(nullptr); 10181 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10182 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10183 // Generate correct mapping for variables captured by reference in 10184 // lambdas. 10185 if (CI->capturesVariable()) 10186 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10187 CurInfo, LambdaPointers); 10188 } 10189 // We expect to have at least an element of information for this capture. 10190 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10191 "Non-existing map pointer for capture!"); 10192 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10193 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10194 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10195 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10196 "Inconsistent map information sizes!"); 10197 10198 // If there is an entry in PartialStruct it means we have a struct with 10199 // individual members mapped. Emit an extra combined entry. 10200 if (PartialStruct.Base.isValid()) { 10201 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10202 MEHandler.emitCombinedEntry( 10203 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10204 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10205 } 10206 10207 // We need to append the results of this capture to what we already have. 10208 CombinedInfo.append(CurInfo); 10209 } 10210 // Adjust MEMBER_OF flags for the lambdas captures. 10211 MEHandler.adjustMemberOfForLambdaCaptures( 10212 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10213 CombinedInfo.Types); 10214 // Map any list items in a map clause that were not captures because they 10215 // weren't referenced within the construct. 10216 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10217 10218 TargetDataInfo Info; 10219 // Fill up the arrays and create the arguments. 10220 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10221 emitOffloadingArraysArgument( 10222 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10223 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10224 {/*ForEndTask=*/false}); 10225 10226 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10227 InputInfo.BasePointersArray = 10228 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10229 InputInfo.PointersArray = 10230 Address(Info.PointersArray, CGM.getPointerAlign()); 10231 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10232 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10233 MapTypesArray = Info.MapTypesArray; 10234 MapNamesArray = Info.MapNamesArray; 10235 if (RequiresOuterTask) 10236 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10237 else 10238 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10239 }; 10240 10241 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10242 CodeGenFunction &CGF, PrePostActionTy &) { 10243 if (RequiresOuterTask) { 10244 CodeGenFunction::OMPTargetDataInfo InputInfo; 10245 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10246 } else { 10247 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10248 } 10249 }; 10250 10251 // If we have a target function ID it means that we need to support 10252 // offloading, otherwise, just execute on the host. We need to execute on host 10253 // regardless of the conditional in the if clause if, e.g., the user do not 10254 // specify target triples. 10255 if (OutlinedFnID) { 10256 if (IfCond) { 10257 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10258 } else { 10259 RegionCodeGenTy ThenRCG(TargetThenGen); 10260 ThenRCG(CGF); 10261 } 10262 } else { 10263 RegionCodeGenTy ElseRCG(TargetElseGen); 10264 ElseRCG(CGF); 10265 } 10266 } 10267 10268 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10269 StringRef ParentName) { 10270 if (!S) 10271 return; 10272 10273 // Codegen OMP target directives that offload compute to the device. 10274 bool RequiresDeviceCodegen = 10275 isa<OMPExecutableDirective>(S) && 10276 isOpenMPTargetExecutionDirective( 10277 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10278 10279 if (RequiresDeviceCodegen) { 10280 const auto &E = *cast<OMPExecutableDirective>(S); 10281 unsigned DeviceID; 10282 unsigned FileID; 10283 unsigned Line; 10284 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10285 FileID, Line); 10286 10287 // Is this a target region that should not be emitted as an entry point? If 10288 // so just signal we are done with this target region. 10289 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10290 ParentName, Line)) 10291 return; 10292 10293 switch (E.getDirectiveKind()) { 10294 case OMPD_target: 10295 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10296 cast<OMPTargetDirective>(E)); 10297 break; 10298 case OMPD_target_parallel: 10299 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10300 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10301 break; 10302 case OMPD_target_teams: 10303 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10304 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10305 break; 10306 case OMPD_target_teams_distribute: 10307 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10308 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10309 break; 10310 case OMPD_target_teams_distribute_simd: 10311 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10312 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10313 break; 10314 case OMPD_target_parallel_for: 10315 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10316 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10317 break; 10318 case OMPD_target_parallel_for_simd: 10319 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10320 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10321 break; 10322 case OMPD_target_simd: 10323 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10324 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10325 break; 10326 case OMPD_target_teams_distribute_parallel_for: 10327 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10328 CGM, ParentName, 10329 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10330 break; 10331 case OMPD_target_teams_distribute_parallel_for_simd: 10332 CodeGenFunction:: 10333 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10334 CGM, ParentName, 10335 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10336 break; 10337 case OMPD_parallel: 10338 case OMPD_for: 10339 case OMPD_parallel_for: 10340 case OMPD_parallel_master: 10341 case OMPD_parallel_sections: 10342 case OMPD_for_simd: 10343 case OMPD_parallel_for_simd: 10344 case OMPD_cancel: 10345 case OMPD_cancellation_point: 10346 case OMPD_ordered: 10347 case OMPD_threadprivate: 10348 case OMPD_allocate: 10349 case OMPD_task: 10350 case OMPD_simd: 10351 case OMPD_tile: 10352 case OMPD_sections: 10353 case OMPD_section: 10354 case OMPD_single: 10355 case OMPD_master: 10356 case OMPD_critical: 10357 case OMPD_taskyield: 10358 case OMPD_barrier: 10359 case OMPD_taskwait: 10360 case OMPD_taskgroup: 10361 case OMPD_atomic: 10362 case OMPD_flush: 10363 case OMPD_depobj: 10364 case OMPD_scan: 10365 case OMPD_teams: 10366 case OMPD_target_data: 10367 case OMPD_target_exit_data: 10368 case OMPD_target_enter_data: 10369 case OMPD_distribute: 10370 case OMPD_distribute_simd: 10371 case OMPD_distribute_parallel_for: 10372 case OMPD_distribute_parallel_for_simd: 10373 case OMPD_teams_distribute: 10374 case OMPD_teams_distribute_simd: 10375 case OMPD_teams_distribute_parallel_for: 10376 case OMPD_teams_distribute_parallel_for_simd: 10377 case OMPD_target_update: 10378 case OMPD_declare_simd: 10379 case OMPD_declare_variant: 10380 case OMPD_begin_declare_variant: 10381 case OMPD_end_declare_variant: 10382 case OMPD_declare_target: 10383 case OMPD_end_declare_target: 10384 case OMPD_declare_reduction: 10385 case OMPD_declare_mapper: 10386 case OMPD_taskloop: 10387 case OMPD_taskloop_simd: 10388 case OMPD_master_taskloop: 10389 case OMPD_master_taskloop_simd: 10390 case OMPD_parallel_master_taskloop: 10391 case OMPD_parallel_master_taskloop_simd: 10392 case OMPD_requires: 10393 case OMPD_unknown: 10394 default: 10395 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10396 } 10397 return; 10398 } 10399 10400 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10401 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10402 return; 10403 10404 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10405 return; 10406 } 10407 10408 // If this is a lambda function, look into its body. 10409 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10410 S = L->getBody(); 10411 10412 // Keep looking for target regions recursively. 10413 for (const Stmt *II : S->children()) 10414 scanForTargetRegionsFunctions(II, ParentName); 10415 } 10416 10417 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10418 // If emitting code for the host, we do not process FD here. Instead we do 10419 // the normal code generation. 10420 if (!CGM.getLangOpts().OpenMPIsDevice) { 10421 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10422 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10423 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10424 // Do not emit device_type(nohost) functions for the host. 10425 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10426 return true; 10427 } 10428 return false; 10429 } 10430 10431 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10432 // Try to detect target regions in the function. 10433 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10434 StringRef Name = CGM.getMangledName(GD); 10435 scanForTargetRegionsFunctions(FD->getBody(), Name); 10436 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10437 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10438 // Do not emit device_type(nohost) functions for the host. 10439 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10440 return true; 10441 } 10442 10443 // Do not to emit function if it is not marked as declare target. 10444 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10445 AlreadyEmittedTargetDecls.count(VD) == 0; 10446 } 10447 10448 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10449 if (!CGM.getLangOpts().OpenMPIsDevice) 10450 return false; 10451 10452 // Check if there are Ctors/Dtors in this declaration and look for target 10453 // regions in it. We use the complete variant to produce the kernel name 10454 // mangling. 10455 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10456 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10457 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10458 StringRef ParentName = 10459 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10460 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10461 } 10462 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10463 StringRef ParentName = 10464 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10465 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10466 } 10467 } 10468 10469 // Do not to emit variable if it is not marked as declare target. 10470 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10471 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10472 cast<VarDecl>(GD.getDecl())); 10473 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10474 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10475 HasRequiresUnifiedSharedMemory)) { 10476 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10477 return true; 10478 } 10479 return false; 10480 } 10481 10482 llvm::Constant * 10483 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10484 const VarDecl *VD) { 10485 assert(VD->getType().isConstant(CGM.getContext()) && 10486 "Expected constant variable."); 10487 StringRef VarName; 10488 llvm::Constant *Addr; 10489 llvm::GlobalValue::LinkageTypes Linkage; 10490 QualType Ty = VD->getType(); 10491 SmallString<128> Buffer; 10492 { 10493 unsigned DeviceID; 10494 unsigned FileID; 10495 unsigned Line; 10496 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10497 FileID, Line); 10498 llvm::raw_svector_ostream OS(Buffer); 10499 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10500 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10501 VarName = OS.str(); 10502 } 10503 Linkage = llvm::GlobalValue::InternalLinkage; 10504 Addr = 10505 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10506 getDefaultFirstprivateAddressSpace()); 10507 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10508 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10509 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10510 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10511 VarName, Addr, VarSize, 10512 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10513 return Addr; 10514 } 10515 10516 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10517 llvm::Constant *Addr) { 10518 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10519 !CGM.getLangOpts().OpenMPIsDevice) 10520 return; 10521 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10523 if (!Res) { 10524 if (CGM.getLangOpts().OpenMPIsDevice) { 10525 // Register non-target variables being emitted in device code (debug info 10526 // may cause this). 10527 StringRef VarName = CGM.getMangledName(VD); 10528 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10529 } 10530 return; 10531 } 10532 // Register declare target variables. 10533 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10534 StringRef VarName; 10535 CharUnits VarSize; 10536 llvm::GlobalValue::LinkageTypes Linkage; 10537 10538 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10539 !HasRequiresUnifiedSharedMemory) { 10540 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10541 VarName = CGM.getMangledName(VD); 10542 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10543 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10544 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10545 } else { 10546 VarSize = CharUnits::Zero(); 10547 } 10548 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10549 // Temp solution to prevent optimizations of the internal variables. 10550 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10551 std::string RefName = getName({VarName, "ref"}); 10552 if (!CGM.GetGlobalValue(RefName)) { 10553 llvm::Constant *AddrRef = 10554 getOrCreateInternalVariable(Addr->getType(), RefName); 10555 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10556 GVAddrRef->setConstant(/*Val=*/true); 10557 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10558 GVAddrRef->setInitializer(Addr); 10559 CGM.addCompilerUsedGlobal(GVAddrRef); 10560 } 10561 } 10562 } else { 10563 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10564 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10565 HasRequiresUnifiedSharedMemory)) && 10566 "Declare target attribute must link or to with unified memory."); 10567 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10568 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10569 else 10570 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10571 10572 if (CGM.getLangOpts().OpenMPIsDevice) { 10573 VarName = Addr->getName(); 10574 Addr = nullptr; 10575 } else { 10576 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10577 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10578 } 10579 VarSize = CGM.getPointerSize(); 10580 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10581 } 10582 10583 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10584 VarName, Addr, VarSize, Flags, Linkage); 10585 } 10586 10587 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10588 if (isa<FunctionDecl>(GD.getDecl()) || 10589 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10590 return emitTargetFunctions(GD); 10591 10592 return emitTargetGlobalVariable(GD); 10593 } 10594 10595 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10596 for (const VarDecl *VD : DeferredGlobalVariables) { 10597 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10598 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10599 if (!Res) 10600 continue; 10601 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10602 !HasRequiresUnifiedSharedMemory) { 10603 CGM.EmitGlobal(VD); 10604 } else { 10605 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10606 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10607 HasRequiresUnifiedSharedMemory)) && 10608 "Expected link clause or to clause with unified memory."); 10609 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10610 } 10611 } 10612 } 10613 10614 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10615 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10616 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10617 " Expected target-based directive."); 10618 } 10619 10620 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10621 for (const OMPClause *Clause : D->clauselists()) { 10622 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10623 HasRequiresUnifiedSharedMemory = true; 10624 } else if (const auto *AC = 10625 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10626 switch (AC->getAtomicDefaultMemOrderKind()) { 10627 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10628 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10629 break; 10630 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10631 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10632 break; 10633 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10634 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10635 break; 10636 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10637 break; 10638 } 10639 } 10640 } 10641 } 10642 10643 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10644 return RequiresAtomicOrdering; 10645 } 10646 10647 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10648 LangAS &AS) { 10649 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10650 return false; 10651 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10652 switch(A->getAllocatorType()) { 10653 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10654 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10655 // Not supported, fallback to the default mem space. 10656 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10657 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10658 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10659 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10660 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10661 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10662 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10663 AS = LangAS::Default; 10664 return true; 10665 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10666 llvm_unreachable("Expected predefined allocator for the variables with the " 10667 "static storage."); 10668 } 10669 return false; 10670 } 10671 10672 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10673 return HasRequiresUnifiedSharedMemory; 10674 } 10675 10676 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10677 CodeGenModule &CGM) 10678 : CGM(CGM) { 10679 if (CGM.getLangOpts().OpenMPIsDevice) { 10680 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10681 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10682 } 10683 } 10684 10685 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10686 if (CGM.getLangOpts().OpenMPIsDevice) 10687 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10688 } 10689 10690 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10691 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10692 return true; 10693 10694 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10695 // Do not to emit function if it is marked as declare target as it was already 10696 // emitted. 10697 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10698 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10699 if (auto *F = dyn_cast_or_null<llvm::Function>( 10700 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10701 return !F->isDeclaration(); 10702 return false; 10703 } 10704 return true; 10705 } 10706 10707 return !AlreadyEmittedTargetDecls.insert(D).second; 10708 } 10709 10710 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10711 // If we don't have entries or if we are emitting code for the device, we 10712 // don't need to do anything. 10713 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10714 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10715 (OffloadEntriesInfoManager.empty() && 10716 !HasEmittedDeclareTargetRegion && 10717 !HasEmittedTargetRegion)) 10718 return nullptr; 10719 10720 // Create and register the function that handles the requires directives. 10721 ASTContext &C = CGM.getContext(); 10722 10723 llvm::Function *RequiresRegFn; 10724 { 10725 CodeGenFunction CGF(CGM); 10726 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10727 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10728 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10729 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10730 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10731 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10732 // TODO: check for other requires clauses. 10733 // The requires directive takes effect only when a target region is 10734 // present in the compilation unit. Otherwise it is ignored and not 10735 // passed to the runtime. This avoids the runtime from throwing an error 10736 // for mismatching requires clauses across compilation units that don't 10737 // contain at least 1 target region. 10738 assert((HasEmittedTargetRegion || 10739 HasEmittedDeclareTargetRegion || 10740 !OffloadEntriesInfoManager.empty()) && 10741 "Target or declare target region expected."); 10742 if (HasRequiresUnifiedSharedMemory) 10743 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10744 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10745 CGM.getModule(), OMPRTL___tgt_register_requires), 10746 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10747 CGF.FinishFunction(); 10748 } 10749 return RequiresRegFn; 10750 } 10751 10752 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10753 const OMPExecutableDirective &D, 10754 SourceLocation Loc, 10755 llvm::Function *OutlinedFn, 10756 ArrayRef<llvm::Value *> CapturedVars) { 10757 if (!CGF.HaveInsertPoint()) 10758 return; 10759 10760 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10761 CodeGenFunction::RunCleanupsScope Scope(CGF); 10762 10763 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10764 llvm::Value *Args[] = { 10765 RTLoc, 10766 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10767 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10768 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10769 RealArgs.append(std::begin(Args), std::end(Args)); 10770 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10771 10772 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10773 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10774 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10775 } 10776 10777 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10778 const Expr *NumTeams, 10779 const Expr *ThreadLimit, 10780 SourceLocation Loc) { 10781 if (!CGF.HaveInsertPoint()) 10782 return; 10783 10784 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10785 10786 llvm::Value *NumTeamsVal = 10787 NumTeams 10788 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10789 CGF.CGM.Int32Ty, /* isSigned = */ true) 10790 : CGF.Builder.getInt32(0); 10791 10792 llvm::Value *ThreadLimitVal = 10793 ThreadLimit 10794 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10795 CGF.CGM.Int32Ty, /* isSigned = */ true) 10796 : CGF.Builder.getInt32(0); 10797 10798 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10799 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10800 ThreadLimitVal}; 10801 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10802 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10803 PushNumTeamsArgs); 10804 } 10805 10806 void CGOpenMPRuntime::emitTargetDataCalls( 10807 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10808 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10809 if (!CGF.HaveInsertPoint()) 10810 return; 10811 10812 // Action used to replace the default codegen action and turn privatization 10813 // off. 10814 PrePostActionTy NoPrivAction; 10815 10816 // Generate the code for the opening of the data environment. Capture all the 10817 // arguments of the runtime call by reference because they are used in the 10818 // closing of the region. 10819 auto &&BeginThenGen = [this, &D, Device, &Info, 10820 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10821 // Fill up the arrays with all the mapped variables. 10822 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10823 10824 // Get map clause information. 10825 MappableExprsHandler MEHandler(D, CGF); 10826 MEHandler.generateAllInfo(CombinedInfo); 10827 10828 // Fill up the arrays and create the arguments. 10829 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10830 /*IsNonContiguous=*/true); 10831 10832 llvm::Value *BasePointersArrayArg = nullptr; 10833 llvm::Value *PointersArrayArg = nullptr; 10834 llvm::Value *SizesArrayArg = nullptr; 10835 llvm::Value *MapTypesArrayArg = nullptr; 10836 llvm::Value *MapNamesArrayArg = nullptr; 10837 llvm::Value *MappersArrayArg = nullptr; 10838 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10839 SizesArrayArg, MapTypesArrayArg, 10840 MapNamesArrayArg, MappersArrayArg, Info); 10841 10842 // Emit device ID if any. 10843 llvm::Value *DeviceID = nullptr; 10844 if (Device) { 10845 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10846 CGF.Int64Ty, /*isSigned=*/true); 10847 } else { 10848 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10849 } 10850 10851 // Emit the number of elements in the offloading arrays. 10852 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10853 // 10854 // Source location for the ident struct 10855 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10856 10857 llvm::Value *OffloadingArgs[] = {RTLoc, 10858 DeviceID, 10859 PointerNum, 10860 BasePointersArrayArg, 10861 PointersArrayArg, 10862 SizesArrayArg, 10863 MapTypesArrayArg, 10864 MapNamesArrayArg, 10865 MappersArrayArg}; 10866 CGF.EmitRuntimeCall( 10867 OMPBuilder.getOrCreateRuntimeFunction( 10868 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10869 OffloadingArgs); 10870 10871 // If device pointer privatization is required, emit the body of the region 10872 // here. It will have to be duplicated: with and without privatization. 10873 if (!Info.CaptureDeviceAddrMap.empty()) 10874 CodeGen(CGF); 10875 }; 10876 10877 // Generate code for the closing of the data region. 10878 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10879 PrePostActionTy &) { 10880 assert(Info.isValid() && "Invalid data environment closing arguments."); 10881 10882 llvm::Value *BasePointersArrayArg = nullptr; 10883 llvm::Value *PointersArrayArg = nullptr; 10884 llvm::Value *SizesArrayArg = nullptr; 10885 llvm::Value *MapTypesArrayArg = nullptr; 10886 llvm::Value *MapNamesArrayArg = nullptr; 10887 llvm::Value *MappersArrayArg = nullptr; 10888 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10889 SizesArrayArg, MapTypesArrayArg, 10890 MapNamesArrayArg, MappersArrayArg, Info, 10891 {/*ForEndCall=*/true}); 10892 10893 // Emit device ID if any. 10894 llvm::Value *DeviceID = nullptr; 10895 if (Device) { 10896 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10897 CGF.Int64Ty, /*isSigned=*/true); 10898 } else { 10899 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10900 } 10901 10902 // Emit the number of elements in the offloading arrays. 10903 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10904 10905 // Source location for the ident struct 10906 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10907 10908 llvm::Value *OffloadingArgs[] = {RTLoc, 10909 DeviceID, 10910 PointerNum, 10911 BasePointersArrayArg, 10912 PointersArrayArg, 10913 SizesArrayArg, 10914 MapTypesArrayArg, 10915 MapNamesArrayArg, 10916 MappersArrayArg}; 10917 CGF.EmitRuntimeCall( 10918 OMPBuilder.getOrCreateRuntimeFunction( 10919 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10920 OffloadingArgs); 10921 }; 10922 10923 // If we need device pointer privatization, we need to emit the body of the 10924 // region with no privatization in the 'else' branch of the conditional. 10925 // Otherwise, we don't have to do anything. 10926 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10927 PrePostActionTy &) { 10928 if (!Info.CaptureDeviceAddrMap.empty()) { 10929 CodeGen.setAction(NoPrivAction); 10930 CodeGen(CGF); 10931 } 10932 }; 10933 10934 // We don't have to do anything to close the region if the if clause evaluates 10935 // to false. 10936 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10937 10938 if (IfCond) { 10939 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10940 } else { 10941 RegionCodeGenTy RCG(BeginThenGen); 10942 RCG(CGF); 10943 } 10944 10945 // If we don't require privatization of device pointers, we emit the body in 10946 // between the runtime calls. This avoids duplicating the body code. 10947 if (Info.CaptureDeviceAddrMap.empty()) { 10948 CodeGen.setAction(NoPrivAction); 10949 CodeGen(CGF); 10950 } 10951 10952 if (IfCond) { 10953 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10954 } else { 10955 RegionCodeGenTy RCG(EndThenGen); 10956 RCG(CGF); 10957 } 10958 } 10959 10960 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10961 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10962 const Expr *Device) { 10963 if (!CGF.HaveInsertPoint()) 10964 return; 10965 10966 assert((isa<OMPTargetEnterDataDirective>(D) || 10967 isa<OMPTargetExitDataDirective>(D) || 10968 isa<OMPTargetUpdateDirective>(D)) && 10969 "Expecting either target enter, exit data, or update directives."); 10970 10971 CodeGenFunction::OMPTargetDataInfo InputInfo; 10972 llvm::Value *MapTypesArray = nullptr; 10973 llvm::Value *MapNamesArray = nullptr; 10974 // Generate the code for the opening of the data environment. 10975 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10976 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10977 // Emit device ID if any. 10978 llvm::Value *DeviceID = nullptr; 10979 if (Device) { 10980 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10981 CGF.Int64Ty, /*isSigned=*/true); 10982 } else { 10983 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10984 } 10985 10986 // Emit the number of elements in the offloading arrays. 10987 llvm::Constant *PointerNum = 10988 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10989 10990 // Source location for the ident struct 10991 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10992 10993 llvm::Value *OffloadingArgs[] = {RTLoc, 10994 DeviceID, 10995 PointerNum, 10996 InputInfo.BasePointersArray.getPointer(), 10997 InputInfo.PointersArray.getPointer(), 10998 InputInfo.SizesArray.getPointer(), 10999 MapTypesArray, 11000 MapNamesArray, 11001 InputInfo.MappersArray.getPointer()}; 11002 11003 // Select the right runtime function call for each standalone 11004 // directive. 11005 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11006 RuntimeFunction RTLFn; 11007 switch (D.getDirectiveKind()) { 11008 case OMPD_target_enter_data: 11009 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11010 : OMPRTL___tgt_target_data_begin_mapper; 11011 break; 11012 case OMPD_target_exit_data: 11013 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11014 : OMPRTL___tgt_target_data_end_mapper; 11015 break; 11016 case OMPD_target_update: 11017 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11018 : OMPRTL___tgt_target_data_update_mapper; 11019 break; 11020 case OMPD_parallel: 11021 case OMPD_for: 11022 case OMPD_parallel_for: 11023 case OMPD_parallel_master: 11024 case OMPD_parallel_sections: 11025 case OMPD_for_simd: 11026 case OMPD_parallel_for_simd: 11027 case OMPD_cancel: 11028 case OMPD_cancellation_point: 11029 case OMPD_ordered: 11030 case OMPD_threadprivate: 11031 case OMPD_allocate: 11032 case OMPD_task: 11033 case OMPD_simd: 11034 case OMPD_tile: 11035 case OMPD_sections: 11036 case OMPD_section: 11037 case OMPD_single: 11038 case OMPD_master: 11039 case OMPD_critical: 11040 case OMPD_taskyield: 11041 case OMPD_barrier: 11042 case OMPD_taskwait: 11043 case OMPD_taskgroup: 11044 case OMPD_atomic: 11045 case OMPD_flush: 11046 case OMPD_depobj: 11047 case OMPD_scan: 11048 case OMPD_teams: 11049 case OMPD_target_data: 11050 case OMPD_distribute: 11051 case OMPD_distribute_simd: 11052 case OMPD_distribute_parallel_for: 11053 case OMPD_distribute_parallel_for_simd: 11054 case OMPD_teams_distribute: 11055 case OMPD_teams_distribute_simd: 11056 case OMPD_teams_distribute_parallel_for: 11057 case OMPD_teams_distribute_parallel_for_simd: 11058 case OMPD_declare_simd: 11059 case OMPD_declare_variant: 11060 case OMPD_begin_declare_variant: 11061 case OMPD_end_declare_variant: 11062 case OMPD_declare_target: 11063 case OMPD_end_declare_target: 11064 case OMPD_declare_reduction: 11065 case OMPD_declare_mapper: 11066 case OMPD_taskloop: 11067 case OMPD_taskloop_simd: 11068 case OMPD_master_taskloop: 11069 case OMPD_master_taskloop_simd: 11070 case OMPD_parallel_master_taskloop: 11071 case OMPD_parallel_master_taskloop_simd: 11072 case OMPD_target: 11073 case OMPD_target_simd: 11074 case OMPD_target_teams_distribute: 11075 case OMPD_target_teams_distribute_simd: 11076 case OMPD_target_teams_distribute_parallel_for: 11077 case OMPD_target_teams_distribute_parallel_for_simd: 11078 case OMPD_target_teams: 11079 case OMPD_target_parallel: 11080 case OMPD_target_parallel_for: 11081 case OMPD_target_parallel_for_simd: 11082 case OMPD_requires: 11083 case OMPD_unknown: 11084 default: 11085 llvm_unreachable("Unexpected standalone target data directive."); 11086 break; 11087 } 11088 CGF.EmitRuntimeCall( 11089 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11090 OffloadingArgs); 11091 }; 11092 11093 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11094 &MapNamesArray](CodeGenFunction &CGF, 11095 PrePostActionTy &) { 11096 // Fill up the arrays with all the mapped variables. 11097 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11098 11099 // Get map clause information. 11100 MappableExprsHandler MEHandler(D, CGF); 11101 MEHandler.generateAllInfo(CombinedInfo); 11102 11103 TargetDataInfo Info; 11104 // Fill up the arrays and create the arguments. 11105 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11106 /*IsNonContiguous=*/true); 11107 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11108 D.hasClausesOfKind<OMPNowaitClause>(); 11109 emitOffloadingArraysArgument( 11110 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11111 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11112 {/*ForEndTask=*/false}); 11113 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11114 InputInfo.BasePointersArray = 11115 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11116 InputInfo.PointersArray = 11117 Address(Info.PointersArray, CGM.getPointerAlign()); 11118 InputInfo.SizesArray = 11119 Address(Info.SizesArray, CGM.getPointerAlign()); 11120 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11121 MapTypesArray = Info.MapTypesArray; 11122 MapNamesArray = Info.MapNamesArray; 11123 if (RequiresOuterTask) 11124 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11125 else 11126 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11127 }; 11128 11129 if (IfCond) { 11130 emitIfClause(CGF, IfCond, TargetThenGen, 11131 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11132 } else { 11133 RegionCodeGenTy ThenRCG(TargetThenGen); 11134 ThenRCG(CGF); 11135 } 11136 } 11137 11138 namespace { 11139 /// Kind of parameter in a function with 'declare simd' directive. 11140 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11141 /// Attribute set of the parameter. 11142 struct ParamAttrTy { 11143 ParamKindTy Kind = Vector; 11144 llvm::APSInt StrideOrArg; 11145 llvm::APSInt Alignment; 11146 }; 11147 } // namespace 11148 11149 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11150 ArrayRef<ParamAttrTy> ParamAttrs) { 11151 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11152 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11153 // of that clause. The VLEN value must be power of 2. 11154 // In other case the notion of the function`s "characteristic data type" (CDT) 11155 // is used to compute the vector length. 11156 // CDT is defined in the following order: 11157 // a) For non-void function, the CDT is the return type. 11158 // b) If the function has any non-uniform, non-linear parameters, then the 11159 // CDT is the type of the first such parameter. 11160 // c) If the CDT determined by a) or b) above is struct, union, or class 11161 // type which is pass-by-value (except for the type that maps to the 11162 // built-in complex data type), the characteristic data type is int. 11163 // d) If none of the above three cases is applicable, the CDT is int. 11164 // The VLEN is then determined based on the CDT and the size of vector 11165 // register of that ISA for which current vector version is generated. The 11166 // VLEN is computed using the formula below: 11167 // VLEN = sizeof(vector_register) / sizeof(CDT), 11168 // where vector register size specified in section 3.2.1 Registers and the 11169 // Stack Frame of original AMD64 ABI document. 11170 QualType RetType = FD->getReturnType(); 11171 if (RetType.isNull()) 11172 return 0; 11173 ASTContext &C = FD->getASTContext(); 11174 QualType CDT; 11175 if (!RetType.isNull() && !RetType->isVoidType()) { 11176 CDT = RetType; 11177 } else { 11178 unsigned Offset = 0; 11179 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11180 if (ParamAttrs[Offset].Kind == Vector) 11181 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11182 ++Offset; 11183 } 11184 if (CDT.isNull()) { 11185 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11186 if (ParamAttrs[I + Offset].Kind == Vector) { 11187 CDT = FD->getParamDecl(I)->getType(); 11188 break; 11189 } 11190 } 11191 } 11192 } 11193 if (CDT.isNull()) 11194 CDT = C.IntTy; 11195 CDT = CDT->getCanonicalTypeUnqualified(); 11196 if (CDT->isRecordType() || CDT->isUnionType()) 11197 CDT = C.IntTy; 11198 return C.getTypeSize(CDT); 11199 } 11200 11201 static void 11202 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11203 const llvm::APSInt &VLENVal, 11204 ArrayRef<ParamAttrTy> ParamAttrs, 11205 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11206 struct ISADataTy { 11207 char ISA; 11208 unsigned VecRegSize; 11209 }; 11210 ISADataTy ISAData[] = { 11211 { 11212 'b', 128 11213 }, // SSE 11214 { 11215 'c', 256 11216 }, // AVX 11217 { 11218 'd', 256 11219 }, // AVX2 11220 { 11221 'e', 512 11222 }, // AVX512 11223 }; 11224 llvm::SmallVector<char, 2> Masked; 11225 switch (State) { 11226 case OMPDeclareSimdDeclAttr::BS_Undefined: 11227 Masked.push_back('N'); 11228 Masked.push_back('M'); 11229 break; 11230 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11231 Masked.push_back('N'); 11232 break; 11233 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11234 Masked.push_back('M'); 11235 break; 11236 } 11237 for (char Mask : Masked) { 11238 for (const ISADataTy &Data : ISAData) { 11239 SmallString<256> Buffer; 11240 llvm::raw_svector_ostream Out(Buffer); 11241 Out << "_ZGV" << Data.ISA << Mask; 11242 if (!VLENVal) { 11243 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11244 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11245 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11246 } else { 11247 Out << VLENVal; 11248 } 11249 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11250 switch (ParamAttr.Kind){ 11251 case LinearWithVarStride: 11252 Out << 's' << ParamAttr.StrideOrArg; 11253 break; 11254 case Linear: 11255 Out << 'l'; 11256 if (ParamAttr.StrideOrArg != 1) 11257 Out << ParamAttr.StrideOrArg; 11258 break; 11259 case Uniform: 11260 Out << 'u'; 11261 break; 11262 case Vector: 11263 Out << 'v'; 11264 break; 11265 } 11266 if (!!ParamAttr.Alignment) 11267 Out << 'a' << ParamAttr.Alignment; 11268 } 11269 Out << '_' << Fn->getName(); 11270 Fn->addFnAttr(Out.str()); 11271 } 11272 } 11273 } 11274 11275 // This are the Functions that are needed to mangle the name of the 11276 // vector functions generated by the compiler, according to the rules 11277 // defined in the "Vector Function ABI specifications for AArch64", 11278 // available at 11279 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11280 11281 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11282 /// 11283 /// TODO: Need to implement the behavior for reference marked with a 11284 /// var or no linear modifiers (1.b in the section). For this, we 11285 /// need to extend ParamKindTy to support the linear modifiers. 11286 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11287 QT = QT.getCanonicalType(); 11288 11289 if (QT->isVoidType()) 11290 return false; 11291 11292 if (Kind == ParamKindTy::Uniform) 11293 return false; 11294 11295 if (Kind == ParamKindTy::Linear) 11296 return false; 11297 11298 // TODO: Handle linear references with modifiers 11299 11300 if (Kind == ParamKindTy::LinearWithVarStride) 11301 return false; 11302 11303 return true; 11304 } 11305 11306 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11307 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11308 QT = QT.getCanonicalType(); 11309 unsigned Size = C.getTypeSize(QT); 11310 11311 // Only scalars and complex within 16 bytes wide set PVB to true. 11312 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11313 return false; 11314 11315 if (QT->isFloatingType()) 11316 return true; 11317 11318 if (QT->isIntegerType()) 11319 return true; 11320 11321 if (QT->isPointerType()) 11322 return true; 11323 11324 // TODO: Add support for complex types (section 3.1.2, item 2). 11325 11326 return false; 11327 } 11328 11329 /// Computes the lane size (LS) of a return type or of an input parameter, 11330 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11331 /// TODO: Add support for references, section 3.2.1, item 1. 11332 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11333 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11334 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11335 if (getAArch64PBV(PTy, C)) 11336 return C.getTypeSize(PTy); 11337 } 11338 if (getAArch64PBV(QT, C)) 11339 return C.getTypeSize(QT); 11340 11341 return C.getTypeSize(C.getUIntPtrType()); 11342 } 11343 11344 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11345 // signature of the scalar function, as defined in 3.2.2 of the 11346 // AAVFABI. 11347 static std::tuple<unsigned, unsigned, bool> 11348 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11349 QualType RetType = FD->getReturnType().getCanonicalType(); 11350 11351 ASTContext &C = FD->getASTContext(); 11352 11353 bool OutputBecomesInput = false; 11354 11355 llvm::SmallVector<unsigned, 8> Sizes; 11356 if (!RetType->isVoidType()) { 11357 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11358 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11359 OutputBecomesInput = true; 11360 } 11361 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11362 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11363 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11364 } 11365 11366 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11367 // The LS of a function parameter / return value can only be a power 11368 // of 2, starting from 8 bits, up to 128. 11369 assert(std::all_of(Sizes.begin(), Sizes.end(), 11370 [](unsigned Size) { 11371 return Size == 8 || Size == 16 || Size == 32 || 11372 Size == 64 || Size == 128; 11373 }) && 11374 "Invalid size"); 11375 11376 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11377 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11378 OutputBecomesInput); 11379 } 11380 11381 /// Mangle the parameter part of the vector function name according to 11382 /// their OpenMP classification. The mangling function is defined in 11383 /// section 3.5 of the AAVFABI. 11384 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11385 SmallString<256> Buffer; 11386 llvm::raw_svector_ostream Out(Buffer); 11387 for (const auto &ParamAttr : ParamAttrs) { 11388 switch (ParamAttr.Kind) { 11389 case LinearWithVarStride: 11390 Out << "ls" << ParamAttr.StrideOrArg; 11391 break; 11392 case Linear: 11393 Out << 'l'; 11394 // Don't print the step value if it is not present or if it is 11395 // equal to 1. 11396 if (ParamAttr.StrideOrArg != 1) 11397 Out << ParamAttr.StrideOrArg; 11398 break; 11399 case Uniform: 11400 Out << 'u'; 11401 break; 11402 case Vector: 11403 Out << 'v'; 11404 break; 11405 } 11406 11407 if (!!ParamAttr.Alignment) 11408 Out << 'a' << ParamAttr.Alignment; 11409 } 11410 11411 return std::string(Out.str()); 11412 } 11413 11414 // Function used to add the attribute. The parameter `VLEN` is 11415 // templated to allow the use of "x" when targeting scalable functions 11416 // for SVE. 11417 template <typename T> 11418 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11419 char ISA, StringRef ParSeq, 11420 StringRef MangledName, bool OutputBecomesInput, 11421 llvm::Function *Fn) { 11422 SmallString<256> Buffer; 11423 llvm::raw_svector_ostream Out(Buffer); 11424 Out << Prefix << ISA << LMask << VLEN; 11425 if (OutputBecomesInput) 11426 Out << "v"; 11427 Out << ParSeq << "_" << MangledName; 11428 Fn->addFnAttr(Out.str()); 11429 } 11430 11431 // Helper function to generate the Advanced SIMD names depending on 11432 // the value of the NDS when simdlen is not present. 11433 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11434 StringRef Prefix, char ISA, 11435 StringRef ParSeq, StringRef MangledName, 11436 bool OutputBecomesInput, 11437 llvm::Function *Fn) { 11438 switch (NDS) { 11439 case 8: 11440 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11441 OutputBecomesInput, Fn); 11442 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11443 OutputBecomesInput, Fn); 11444 break; 11445 case 16: 11446 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11447 OutputBecomesInput, Fn); 11448 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11449 OutputBecomesInput, Fn); 11450 break; 11451 case 32: 11452 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11453 OutputBecomesInput, Fn); 11454 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11455 OutputBecomesInput, Fn); 11456 break; 11457 case 64: 11458 case 128: 11459 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11460 OutputBecomesInput, Fn); 11461 break; 11462 default: 11463 llvm_unreachable("Scalar type is too wide."); 11464 } 11465 } 11466 11467 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11468 static void emitAArch64DeclareSimdFunction( 11469 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11470 ArrayRef<ParamAttrTy> ParamAttrs, 11471 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11472 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11473 11474 // Get basic data for building the vector signature. 11475 const auto Data = getNDSWDS(FD, ParamAttrs); 11476 const unsigned NDS = std::get<0>(Data); 11477 const unsigned WDS = std::get<1>(Data); 11478 const bool OutputBecomesInput = std::get<2>(Data); 11479 11480 // Check the values provided via `simdlen` by the user. 11481 // 1. A `simdlen(1)` doesn't produce vector signatures, 11482 if (UserVLEN == 1) { 11483 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11484 DiagnosticsEngine::Warning, 11485 "The clause simdlen(1) has no effect when targeting aarch64."); 11486 CGM.getDiags().Report(SLoc, DiagID); 11487 return; 11488 } 11489 11490 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11491 // Advanced SIMD output. 11492 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11493 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11494 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11495 "power of 2 when targeting Advanced SIMD."); 11496 CGM.getDiags().Report(SLoc, DiagID); 11497 return; 11498 } 11499 11500 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11501 // limits. 11502 if (ISA == 's' && UserVLEN != 0) { 11503 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11504 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11505 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11506 "lanes in the architectural constraints " 11507 "for SVE (min is 128-bit, max is " 11508 "2048-bit, by steps of 128-bit)"); 11509 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11510 return; 11511 } 11512 } 11513 11514 // Sort out parameter sequence. 11515 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11516 StringRef Prefix = "_ZGV"; 11517 // Generate simdlen from user input (if any). 11518 if (UserVLEN) { 11519 if (ISA == 's') { 11520 // SVE generates only a masked function. 11521 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11522 OutputBecomesInput, Fn); 11523 } else { 11524 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11525 // Advanced SIMD generates one or two functions, depending on 11526 // the `[not]inbranch` clause. 11527 switch (State) { 11528 case OMPDeclareSimdDeclAttr::BS_Undefined: 11529 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11530 OutputBecomesInput, Fn); 11531 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11532 OutputBecomesInput, Fn); 11533 break; 11534 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11535 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11536 OutputBecomesInput, Fn); 11537 break; 11538 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11539 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11540 OutputBecomesInput, Fn); 11541 break; 11542 } 11543 } 11544 } else { 11545 // If no user simdlen is provided, follow the AAVFABI rules for 11546 // generating the vector length. 11547 if (ISA == 's') { 11548 // SVE, section 3.4.1, item 1. 11549 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11550 OutputBecomesInput, Fn); 11551 } else { 11552 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11553 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11554 // two vector names depending on the use of the clause 11555 // `[not]inbranch`. 11556 switch (State) { 11557 case OMPDeclareSimdDeclAttr::BS_Undefined: 11558 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11559 OutputBecomesInput, Fn); 11560 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11561 OutputBecomesInput, Fn); 11562 break; 11563 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11564 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11565 OutputBecomesInput, Fn); 11566 break; 11567 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11568 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11569 OutputBecomesInput, Fn); 11570 break; 11571 } 11572 } 11573 } 11574 } 11575 11576 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11577 llvm::Function *Fn) { 11578 ASTContext &C = CGM.getContext(); 11579 FD = FD->getMostRecentDecl(); 11580 // Map params to their positions in function decl. 11581 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11582 if (isa<CXXMethodDecl>(FD)) 11583 ParamPositions.try_emplace(FD, 0); 11584 unsigned ParamPos = ParamPositions.size(); 11585 for (const ParmVarDecl *P : FD->parameters()) { 11586 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11587 ++ParamPos; 11588 } 11589 while (FD) { 11590 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11591 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11592 // Mark uniform parameters. 11593 for (const Expr *E : Attr->uniforms()) { 11594 E = E->IgnoreParenImpCasts(); 11595 unsigned Pos; 11596 if (isa<CXXThisExpr>(E)) { 11597 Pos = ParamPositions[FD]; 11598 } else { 11599 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11600 ->getCanonicalDecl(); 11601 Pos = ParamPositions[PVD]; 11602 } 11603 ParamAttrs[Pos].Kind = Uniform; 11604 } 11605 // Get alignment info. 11606 auto NI = Attr->alignments_begin(); 11607 for (const Expr *E : Attr->aligneds()) { 11608 E = E->IgnoreParenImpCasts(); 11609 unsigned Pos; 11610 QualType ParmTy; 11611 if (isa<CXXThisExpr>(E)) { 11612 Pos = ParamPositions[FD]; 11613 ParmTy = E->getType(); 11614 } else { 11615 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11616 ->getCanonicalDecl(); 11617 Pos = ParamPositions[PVD]; 11618 ParmTy = PVD->getType(); 11619 } 11620 ParamAttrs[Pos].Alignment = 11621 (*NI) 11622 ? (*NI)->EvaluateKnownConstInt(C) 11623 : llvm::APSInt::getUnsigned( 11624 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11625 .getQuantity()); 11626 ++NI; 11627 } 11628 // Mark linear parameters. 11629 auto SI = Attr->steps_begin(); 11630 auto MI = Attr->modifiers_begin(); 11631 for (const Expr *E : Attr->linears()) { 11632 E = E->IgnoreParenImpCasts(); 11633 unsigned Pos; 11634 // Rescaling factor needed to compute the linear parameter 11635 // value in the mangled name. 11636 unsigned PtrRescalingFactor = 1; 11637 if (isa<CXXThisExpr>(E)) { 11638 Pos = ParamPositions[FD]; 11639 } else { 11640 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11641 ->getCanonicalDecl(); 11642 Pos = ParamPositions[PVD]; 11643 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11644 PtrRescalingFactor = CGM.getContext() 11645 .getTypeSizeInChars(P->getPointeeType()) 11646 .getQuantity(); 11647 } 11648 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11649 ParamAttr.Kind = Linear; 11650 // Assuming a stride of 1, for `linear` without modifiers. 11651 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11652 if (*SI) { 11653 Expr::EvalResult Result; 11654 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11655 if (const auto *DRE = 11656 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11657 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11658 ParamAttr.Kind = LinearWithVarStride; 11659 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11660 ParamPositions[StridePVD->getCanonicalDecl()]); 11661 } 11662 } 11663 } else { 11664 ParamAttr.StrideOrArg = Result.Val.getInt(); 11665 } 11666 } 11667 // If we are using a linear clause on a pointer, we need to 11668 // rescale the value of linear_step with the byte size of the 11669 // pointee type. 11670 if (Linear == ParamAttr.Kind) 11671 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11672 ++SI; 11673 ++MI; 11674 } 11675 llvm::APSInt VLENVal; 11676 SourceLocation ExprLoc; 11677 const Expr *VLENExpr = Attr->getSimdlen(); 11678 if (VLENExpr) { 11679 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11680 ExprLoc = VLENExpr->getExprLoc(); 11681 } 11682 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11683 if (CGM.getTriple().isX86()) { 11684 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11685 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11686 unsigned VLEN = VLENVal.getExtValue(); 11687 StringRef MangledName = Fn->getName(); 11688 if (CGM.getTarget().hasFeature("sve")) 11689 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11690 MangledName, 's', 128, Fn, ExprLoc); 11691 if (CGM.getTarget().hasFeature("neon")) 11692 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11693 MangledName, 'n', 128, Fn, ExprLoc); 11694 } 11695 } 11696 FD = FD->getPreviousDecl(); 11697 } 11698 } 11699 11700 namespace { 11701 /// Cleanup action for doacross support. 11702 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11703 public: 11704 static const int DoacrossFinArgs = 2; 11705 11706 private: 11707 llvm::FunctionCallee RTLFn; 11708 llvm::Value *Args[DoacrossFinArgs]; 11709 11710 public: 11711 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11712 ArrayRef<llvm::Value *> CallArgs) 11713 : RTLFn(RTLFn) { 11714 assert(CallArgs.size() == DoacrossFinArgs); 11715 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11716 } 11717 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11718 if (!CGF.HaveInsertPoint()) 11719 return; 11720 CGF.EmitRuntimeCall(RTLFn, Args); 11721 } 11722 }; 11723 } // namespace 11724 11725 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11726 const OMPLoopDirective &D, 11727 ArrayRef<Expr *> NumIterations) { 11728 if (!CGF.HaveInsertPoint()) 11729 return; 11730 11731 ASTContext &C = CGM.getContext(); 11732 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11733 RecordDecl *RD; 11734 if (KmpDimTy.isNull()) { 11735 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11736 // kmp_int64 lo; // lower 11737 // kmp_int64 up; // upper 11738 // kmp_int64 st; // stride 11739 // }; 11740 RD = C.buildImplicitRecord("kmp_dim"); 11741 RD->startDefinition(); 11742 addFieldToRecordDecl(C, RD, Int64Ty); 11743 addFieldToRecordDecl(C, RD, Int64Ty); 11744 addFieldToRecordDecl(C, RD, Int64Ty); 11745 RD->completeDefinition(); 11746 KmpDimTy = C.getRecordType(RD); 11747 } else { 11748 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11749 } 11750 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11751 QualType ArrayTy = 11752 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11753 11754 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11755 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11756 enum { LowerFD = 0, UpperFD, StrideFD }; 11757 // Fill dims with data. 11758 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11759 LValue DimsLVal = CGF.MakeAddrLValue( 11760 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11761 // dims.upper = num_iterations; 11762 LValue UpperLVal = CGF.EmitLValueForField( 11763 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11764 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11765 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11766 Int64Ty, NumIterations[I]->getExprLoc()); 11767 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11768 // dims.stride = 1; 11769 LValue StrideLVal = CGF.EmitLValueForField( 11770 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11771 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11772 StrideLVal); 11773 } 11774 11775 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11776 // kmp_int32 num_dims, struct kmp_dim * dims); 11777 llvm::Value *Args[] = { 11778 emitUpdateLocation(CGF, D.getBeginLoc()), 11779 getThreadID(CGF, D.getBeginLoc()), 11780 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11781 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11782 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11783 CGM.VoidPtrTy)}; 11784 11785 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11786 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11787 CGF.EmitRuntimeCall(RTLFn, Args); 11788 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11789 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11790 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11791 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11792 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11793 llvm::makeArrayRef(FiniArgs)); 11794 } 11795 11796 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11797 const OMPDependClause *C) { 11798 QualType Int64Ty = 11799 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11800 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11801 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11802 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11803 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11804 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11805 const Expr *CounterVal = C->getLoopData(I); 11806 assert(CounterVal); 11807 llvm::Value *CntVal = CGF.EmitScalarConversion( 11808 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11809 CounterVal->getExprLoc()); 11810 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11811 /*Volatile=*/false, Int64Ty); 11812 } 11813 llvm::Value *Args[] = { 11814 emitUpdateLocation(CGF, C->getBeginLoc()), 11815 getThreadID(CGF, C->getBeginLoc()), 11816 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11817 llvm::FunctionCallee RTLFn; 11818 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11819 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11820 OMPRTL___kmpc_doacross_post); 11821 } else { 11822 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11823 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11824 OMPRTL___kmpc_doacross_wait); 11825 } 11826 CGF.EmitRuntimeCall(RTLFn, Args); 11827 } 11828 11829 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11830 llvm::FunctionCallee Callee, 11831 ArrayRef<llvm::Value *> Args) const { 11832 assert(Loc.isValid() && "Outlined function call location must be valid."); 11833 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11834 11835 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11836 if (Fn->doesNotThrow()) { 11837 CGF.EmitNounwindRuntimeCall(Fn, Args); 11838 return; 11839 } 11840 } 11841 CGF.EmitRuntimeCall(Callee, Args); 11842 } 11843 11844 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11845 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11846 ArrayRef<llvm::Value *> Args) const { 11847 emitCall(CGF, Loc, OutlinedFn, Args); 11848 } 11849 11850 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11851 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11852 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11853 HasEmittedDeclareTargetRegion = true; 11854 } 11855 11856 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11857 const VarDecl *NativeParam, 11858 const VarDecl *TargetParam) const { 11859 return CGF.GetAddrOfLocalVar(NativeParam); 11860 } 11861 11862 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11863 const VarDecl *VD) { 11864 if (!VD) 11865 return Address::invalid(); 11866 Address UntiedAddr = Address::invalid(); 11867 Address UntiedRealAddr = Address::invalid(); 11868 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11869 if (It != FunctionToUntiedTaskStackMap.end()) { 11870 const UntiedLocalVarsAddressesMap &UntiedData = 11871 UntiedLocalVarsStack[It->second]; 11872 auto I = UntiedData.find(VD); 11873 if (I != UntiedData.end()) { 11874 UntiedAddr = I->second.first; 11875 UntiedRealAddr = I->second.second; 11876 } 11877 } 11878 const VarDecl *CVD = VD->getCanonicalDecl(); 11879 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11880 // Use the default allocation. 11881 if (!isAllocatableDecl(VD)) 11882 return UntiedAddr; 11883 llvm::Value *Size; 11884 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11885 if (CVD->getType()->isVariablyModifiedType()) { 11886 Size = CGF.getTypeSize(CVD->getType()); 11887 // Align the size: ((size + align - 1) / align) * align 11888 Size = CGF.Builder.CreateNUWAdd( 11889 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11890 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11891 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11892 } else { 11893 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11894 Size = CGM.getSize(Sz.alignTo(Align)); 11895 } 11896 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11897 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11898 assert(AA->getAllocator() && 11899 "Expected allocator expression for non-default allocator."); 11900 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11901 // According to the standard, the original allocator type is a enum 11902 // (integer). Convert to pointer type, if required. 11903 Allocator = CGF.EmitScalarConversion( 11904 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11905 AA->getAllocator()->getExprLoc()); 11906 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11907 11908 llvm::Value *Addr = 11909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11910 CGM.getModule(), OMPRTL___kmpc_alloc), 11911 Args, getName({CVD->getName(), ".void.addr"})); 11912 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11913 CGM.getModule(), OMPRTL___kmpc_free); 11914 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11915 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11916 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11917 if (UntiedAddr.isValid()) 11918 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11919 11920 // Cleanup action for allocate support. 11921 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11922 llvm::FunctionCallee RTLFn; 11923 unsigned LocEncoding; 11924 Address Addr; 11925 const Expr *Allocator; 11926 11927 public: 11928 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11929 Address Addr, const Expr *Allocator) 11930 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11931 Allocator(Allocator) {} 11932 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11933 if (!CGF.HaveInsertPoint()) 11934 return; 11935 llvm::Value *Args[3]; 11936 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11937 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11938 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11939 Addr.getPointer(), CGF.VoidPtrTy); 11940 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11941 // According to the standard, the original allocator type is a enum 11942 // (integer). Convert to pointer type, if required. 11943 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11944 CGF.getContext().VoidPtrTy, 11945 Allocator->getExprLoc()); 11946 Args[2] = AllocVal; 11947 11948 CGF.EmitRuntimeCall(RTLFn, Args); 11949 } 11950 }; 11951 Address VDAddr = 11952 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11953 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11954 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11955 VDAddr, AA->getAllocator()); 11956 if (UntiedRealAddr.isValid()) 11957 if (auto *Region = 11958 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11959 Region->emitUntiedSwitch(CGF); 11960 return VDAddr; 11961 } 11962 return UntiedAddr; 11963 } 11964 11965 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11966 const VarDecl *VD) const { 11967 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11968 if (It == FunctionToUntiedTaskStackMap.end()) 11969 return false; 11970 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11971 } 11972 11973 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11974 CodeGenModule &CGM, const OMPLoopDirective &S) 11975 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11976 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11977 if (!NeedToPush) 11978 return; 11979 NontemporalDeclsSet &DS = 11980 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11981 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11982 for (const Stmt *Ref : C->private_refs()) { 11983 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11984 const ValueDecl *VD; 11985 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11986 VD = DRE->getDecl(); 11987 } else { 11988 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11989 assert((ME->isImplicitCXXThis() || 11990 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11991 "Expected member of current class."); 11992 VD = ME->getMemberDecl(); 11993 } 11994 DS.insert(VD); 11995 } 11996 } 11997 } 11998 11999 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12000 if (!NeedToPush) 12001 return; 12002 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12003 } 12004 12005 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12006 CodeGenFunction &CGF, 12007 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 12008 std::pair<Address, Address>> &LocalVars) 12009 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12010 if (!NeedToPush) 12011 return; 12012 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12013 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12014 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12015 } 12016 12017 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12018 if (!NeedToPush) 12019 return; 12020 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12021 } 12022 12023 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12024 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12025 12026 return llvm::any_of( 12027 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12028 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12029 } 12030 12031 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12032 const OMPExecutableDirective &S, 12033 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12034 const { 12035 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12036 // Vars in target/task regions must be excluded completely. 12037 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12038 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12039 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12040 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12041 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12042 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12043 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12044 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12045 } 12046 } 12047 // Exclude vars in private clauses. 12048 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12049 for (const Expr *Ref : C->varlists()) { 12050 if (!Ref->getType()->isScalarType()) 12051 continue; 12052 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12053 if (!DRE) 12054 continue; 12055 NeedToCheckForLPCs.insert(DRE->getDecl()); 12056 } 12057 } 12058 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12059 for (const Expr *Ref : C->varlists()) { 12060 if (!Ref->getType()->isScalarType()) 12061 continue; 12062 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12063 if (!DRE) 12064 continue; 12065 NeedToCheckForLPCs.insert(DRE->getDecl()); 12066 } 12067 } 12068 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12069 for (const Expr *Ref : C->varlists()) { 12070 if (!Ref->getType()->isScalarType()) 12071 continue; 12072 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12073 if (!DRE) 12074 continue; 12075 NeedToCheckForLPCs.insert(DRE->getDecl()); 12076 } 12077 } 12078 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12079 for (const Expr *Ref : C->varlists()) { 12080 if (!Ref->getType()->isScalarType()) 12081 continue; 12082 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12083 if (!DRE) 12084 continue; 12085 NeedToCheckForLPCs.insert(DRE->getDecl()); 12086 } 12087 } 12088 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12089 for (const Expr *Ref : C->varlists()) { 12090 if (!Ref->getType()->isScalarType()) 12091 continue; 12092 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12093 if (!DRE) 12094 continue; 12095 NeedToCheckForLPCs.insert(DRE->getDecl()); 12096 } 12097 } 12098 for (const Decl *VD : NeedToCheckForLPCs) { 12099 for (const LastprivateConditionalData &Data : 12100 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12101 if (Data.DeclToUniqueName.count(VD) > 0) { 12102 if (!Data.Disabled) 12103 NeedToAddForLPCsAsDisabled.insert(VD); 12104 break; 12105 } 12106 } 12107 } 12108 } 12109 12110 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12111 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12112 : CGM(CGF.CGM), 12113 Action((CGM.getLangOpts().OpenMP >= 50 && 12114 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12115 [](const OMPLastprivateClause *C) { 12116 return C->getKind() == 12117 OMPC_LASTPRIVATE_conditional; 12118 })) 12119 ? ActionToDo::PushAsLastprivateConditional 12120 : ActionToDo::DoNotPush) { 12121 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12122 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12123 return; 12124 assert(Action == ActionToDo::PushAsLastprivateConditional && 12125 "Expected a push action."); 12126 LastprivateConditionalData &Data = 12127 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12128 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12129 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12130 continue; 12131 12132 for (const Expr *Ref : C->varlists()) { 12133 Data.DeclToUniqueName.insert(std::make_pair( 12134 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12135 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12136 } 12137 } 12138 Data.IVLVal = IVLVal; 12139 Data.Fn = CGF.CurFn; 12140 } 12141 12142 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12143 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12144 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12145 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12146 if (CGM.getLangOpts().OpenMP < 50) 12147 return; 12148 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12149 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12150 if (!NeedToAddForLPCsAsDisabled.empty()) { 12151 Action = ActionToDo::DisableLastprivateConditional; 12152 LastprivateConditionalData &Data = 12153 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12154 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12155 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12156 Data.Fn = CGF.CurFn; 12157 Data.Disabled = true; 12158 } 12159 } 12160 12161 CGOpenMPRuntime::LastprivateConditionalRAII 12162 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12163 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12164 return LastprivateConditionalRAII(CGF, S); 12165 } 12166 12167 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12168 if (CGM.getLangOpts().OpenMP < 50) 12169 return; 12170 if (Action == ActionToDo::DisableLastprivateConditional) { 12171 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12172 "Expected list of disabled private vars."); 12173 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12174 } 12175 if (Action == ActionToDo::PushAsLastprivateConditional) { 12176 assert( 12177 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12178 "Expected list of lastprivate conditional vars."); 12179 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12180 } 12181 } 12182 12183 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12184 const VarDecl *VD) { 12185 ASTContext &C = CGM.getContext(); 12186 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12187 if (I == LastprivateConditionalToTypes.end()) 12188 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12189 QualType NewType; 12190 const FieldDecl *VDField; 12191 const FieldDecl *FiredField; 12192 LValue BaseLVal; 12193 auto VI = I->getSecond().find(VD); 12194 if (VI == I->getSecond().end()) { 12195 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12196 RD->startDefinition(); 12197 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12198 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12199 RD->completeDefinition(); 12200 NewType = C.getRecordType(RD); 12201 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12202 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12203 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12204 } else { 12205 NewType = std::get<0>(VI->getSecond()); 12206 VDField = std::get<1>(VI->getSecond()); 12207 FiredField = std::get<2>(VI->getSecond()); 12208 BaseLVal = std::get<3>(VI->getSecond()); 12209 } 12210 LValue FiredLVal = 12211 CGF.EmitLValueForField(BaseLVal, FiredField); 12212 CGF.EmitStoreOfScalar( 12213 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12214 FiredLVal); 12215 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12216 } 12217 12218 namespace { 12219 /// Checks if the lastprivate conditional variable is referenced in LHS. 12220 class LastprivateConditionalRefChecker final 12221 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12222 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12223 const Expr *FoundE = nullptr; 12224 const Decl *FoundD = nullptr; 12225 StringRef UniqueDeclName; 12226 LValue IVLVal; 12227 llvm::Function *FoundFn = nullptr; 12228 SourceLocation Loc; 12229 12230 public: 12231 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12232 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12233 llvm::reverse(LPM)) { 12234 auto It = D.DeclToUniqueName.find(E->getDecl()); 12235 if (It == D.DeclToUniqueName.end()) 12236 continue; 12237 if (D.Disabled) 12238 return false; 12239 FoundE = E; 12240 FoundD = E->getDecl()->getCanonicalDecl(); 12241 UniqueDeclName = It->second; 12242 IVLVal = D.IVLVal; 12243 FoundFn = D.Fn; 12244 break; 12245 } 12246 return FoundE == E; 12247 } 12248 bool VisitMemberExpr(const MemberExpr *E) { 12249 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12250 return false; 12251 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12252 llvm::reverse(LPM)) { 12253 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12254 if (It == D.DeclToUniqueName.end()) 12255 continue; 12256 if (D.Disabled) 12257 return false; 12258 FoundE = E; 12259 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12260 UniqueDeclName = It->second; 12261 IVLVal = D.IVLVal; 12262 FoundFn = D.Fn; 12263 break; 12264 } 12265 return FoundE == E; 12266 } 12267 bool VisitStmt(const Stmt *S) { 12268 for (const Stmt *Child : S->children()) { 12269 if (!Child) 12270 continue; 12271 if (const auto *E = dyn_cast<Expr>(Child)) 12272 if (!E->isGLValue()) 12273 continue; 12274 if (Visit(Child)) 12275 return true; 12276 } 12277 return false; 12278 } 12279 explicit LastprivateConditionalRefChecker( 12280 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12281 : LPM(LPM) {} 12282 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12283 getFoundData() const { 12284 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12285 } 12286 }; 12287 } // namespace 12288 12289 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12290 LValue IVLVal, 12291 StringRef UniqueDeclName, 12292 LValue LVal, 12293 SourceLocation Loc) { 12294 // Last updated loop counter for the lastprivate conditional var. 12295 // int<xx> last_iv = 0; 12296 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12297 llvm::Constant *LastIV = 12298 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12299 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12300 IVLVal.getAlignment().getAsAlign()); 12301 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12302 12303 // Last value of the lastprivate conditional. 12304 // decltype(priv_a) last_a; 12305 llvm::Constant *Last = getOrCreateInternalVariable( 12306 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12307 cast<llvm::GlobalVariable>(Last)->setAlignment( 12308 LVal.getAlignment().getAsAlign()); 12309 LValue LastLVal = 12310 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12311 12312 // Global loop counter. Required to handle inner parallel-for regions. 12313 // iv 12314 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12315 12316 // #pragma omp critical(a) 12317 // if (last_iv <= iv) { 12318 // last_iv = iv; 12319 // last_a = priv_a; 12320 // } 12321 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12322 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12323 Action.Enter(CGF); 12324 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12325 // (last_iv <= iv) ? Check if the variable is updated and store new 12326 // value in global var. 12327 llvm::Value *CmpRes; 12328 if (IVLVal.getType()->isSignedIntegerType()) { 12329 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12330 } else { 12331 assert(IVLVal.getType()->isUnsignedIntegerType() && 12332 "Loop iteration variable must be integer."); 12333 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12334 } 12335 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12336 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12337 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12338 // { 12339 CGF.EmitBlock(ThenBB); 12340 12341 // last_iv = iv; 12342 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12343 12344 // last_a = priv_a; 12345 switch (CGF.getEvaluationKind(LVal.getType())) { 12346 case TEK_Scalar: { 12347 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12348 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12349 break; 12350 } 12351 case TEK_Complex: { 12352 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12353 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12354 break; 12355 } 12356 case TEK_Aggregate: 12357 llvm_unreachable( 12358 "Aggregates are not supported in lastprivate conditional."); 12359 } 12360 // } 12361 CGF.EmitBranch(ExitBB); 12362 // There is no need to emit line number for unconditional branch. 12363 (void)ApplyDebugLocation::CreateEmpty(CGF); 12364 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12365 }; 12366 12367 if (CGM.getLangOpts().OpenMPSimd) { 12368 // Do not emit as a critical region as no parallel region could be emitted. 12369 RegionCodeGenTy ThenRCG(CodeGen); 12370 ThenRCG(CGF); 12371 } else { 12372 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12373 } 12374 } 12375 12376 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12377 const Expr *LHS) { 12378 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12379 return; 12380 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12381 if (!Checker.Visit(LHS)) 12382 return; 12383 const Expr *FoundE; 12384 const Decl *FoundD; 12385 StringRef UniqueDeclName; 12386 LValue IVLVal; 12387 llvm::Function *FoundFn; 12388 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12389 Checker.getFoundData(); 12390 if (FoundFn != CGF.CurFn) { 12391 // Special codegen for inner parallel regions. 12392 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12393 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12394 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12395 "Lastprivate conditional is not found in outer region."); 12396 QualType StructTy = std::get<0>(It->getSecond()); 12397 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12398 LValue PrivLVal = CGF.EmitLValue(FoundE); 12399 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12400 PrivLVal.getAddress(CGF), 12401 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12402 LValue BaseLVal = 12403 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12404 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12405 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12406 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12407 FiredLVal, llvm::AtomicOrdering::Unordered, 12408 /*IsVolatile=*/true, /*isInit=*/false); 12409 return; 12410 } 12411 12412 // Private address of the lastprivate conditional in the current context. 12413 // priv_a 12414 LValue LVal = CGF.EmitLValue(FoundE); 12415 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12416 FoundE->getExprLoc()); 12417 } 12418 12419 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12420 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12421 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12422 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12423 return; 12424 auto Range = llvm::reverse(LastprivateConditionalStack); 12425 auto It = llvm::find_if( 12426 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12427 if (It == Range.end() || It->Fn != CGF.CurFn) 12428 return; 12429 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12430 assert(LPCI != LastprivateConditionalToTypes.end() && 12431 "Lastprivates must be registered already."); 12432 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12433 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12434 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12435 for (const auto &Pair : It->DeclToUniqueName) { 12436 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12437 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12438 continue; 12439 auto I = LPCI->getSecond().find(Pair.first); 12440 assert(I != LPCI->getSecond().end() && 12441 "Lastprivate must be rehistered already."); 12442 // bool Cmp = priv_a.Fired != 0; 12443 LValue BaseLVal = std::get<3>(I->getSecond()); 12444 LValue FiredLVal = 12445 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12446 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12447 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12448 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12449 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12450 // if (Cmp) { 12451 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12452 CGF.EmitBlock(ThenBB); 12453 Address Addr = CGF.GetAddrOfLocalVar(VD); 12454 LValue LVal; 12455 if (VD->getType()->isReferenceType()) 12456 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12457 AlignmentSource::Decl); 12458 else 12459 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12460 AlignmentSource::Decl); 12461 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12462 D.getBeginLoc()); 12463 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12464 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12465 // } 12466 } 12467 } 12468 12469 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12470 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12471 SourceLocation Loc) { 12472 if (CGF.getLangOpts().OpenMP < 50) 12473 return; 12474 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12475 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12476 "Unknown lastprivate conditional variable."); 12477 StringRef UniqueName = It->second; 12478 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12479 // The variable was not updated in the region - exit. 12480 if (!GV) 12481 return; 12482 LValue LPLVal = CGF.MakeAddrLValue( 12483 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12484 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12485 CGF.EmitStoreOfScalar(Res, PrivLVal); 12486 } 12487 12488 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12489 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12490 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12491 llvm_unreachable("Not supported in SIMD-only mode"); 12492 } 12493 12494 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12495 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12496 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12497 llvm_unreachable("Not supported in SIMD-only mode"); 12498 } 12499 12500 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12501 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12502 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12503 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12504 bool Tied, unsigned &NumberOfParts) { 12505 llvm_unreachable("Not supported in SIMD-only mode"); 12506 } 12507 12508 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12509 SourceLocation Loc, 12510 llvm::Function *OutlinedFn, 12511 ArrayRef<llvm::Value *> CapturedVars, 12512 const Expr *IfCond) { 12513 llvm_unreachable("Not supported in SIMD-only mode"); 12514 } 12515 12516 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12517 CodeGenFunction &CGF, StringRef CriticalName, 12518 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12519 const Expr *Hint) { 12520 llvm_unreachable("Not supported in SIMD-only mode"); 12521 } 12522 12523 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12524 const RegionCodeGenTy &MasterOpGen, 12525 SourceLocation Loc) { 12526 llvm_unreachable("Not supported in SIMD-only mode"); 12527 } 12528 12529 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12530 SourceLocation Loc) { 12531 llvm_unreachable("Not supported in SIMD-only mode"); 12532 } 12533 12534 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12535 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12536 SourceLocation Loc) { 12537 llvm_unreachable("Not supported in SIMD-only mode"); 12538 } 12539 12540 void CGOpenMPSIMDRuntime::emitSingleRegion( 12541 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12542 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12543 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12544 ArrayRef<const Expr *> AssignmentOps) { 12545 llvm_unreachable("Not supported in SIMD-only mode"); 12546 } 12547 12548 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12549 const RegionCodeGenTy &OrderedOpGen, 12550 SourceLocation Loc, 12551 bool IsThreads) { 12552 llvm_unreachable("Not supported in SIMD-only mode"); 12553 } 12554 12555 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12556 SourceLocation Loc, 12557 OpenMPDirectiveKind Kind, 12558 bool EmitChecks, 12559 bool ForceSimpleCall) { 12560 llvm_unreachable("Not supported in SIMD-only mode"); 12561 } 12562 12563 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12564 CodeGenFunction &CGF, SourceLocation Loc, 12565 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12566 bool Ordered, const DispatchRTInput &DispatchValues) { 12567 llvm_unreachable("Not supported in SIMD-only mode"); 12568 } 12569 12570 void CGOpenMPSIMDRuntime::emitForStaticInit( 12571 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12572 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12573 llvm_unreachable("Not supported in SIMD-only mode"); 12574 } 12575 12576 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12577 CodeGenFunction &CGF, SourceLocation Loc, 12578 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12579 llvm_unreachable("Not supported in SIMD-only mode"); 12580 } 12581 12582 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12583 SourceLocation Loc, 12584 unsigned IVSize, 12585 bool IVSigned) { 12586 llvm_unreachable("Not supported in SIMD-only mode"); 12587 } 12588 12589 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12590 SourceLocation Loc, 12591 OpenMPDirectiveKind DKind) { 12592 llvm_unreachable("Not supported in SIMD-only mode"); 12593 } 12594 12595 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12596 SourceLocation Loc, 12597 unsigned IVSize, bool IVSigned, 12598 Address IL, Address LB, 12599 Address UB, Address ST) { 12600 llvm_unreachable("Not supported in SIMD-only mode"); 12601 } 12602 12603 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12604 llvm::Value *NumThreads, 12605 SourceLocation Loc) { 12606 llvm_unreachable("Not supported in SIMD-only mode"); 12607 } 12608 12609 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12610 ProcBindKind ProcBind, 12611 SourceLocation Loc) { 12612 llvm_unreachable("Not supported in SIMD-only mode"); 12613 } 12614 12615 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12616 const VarDecl *VD, 12617 Address VDAddr, 12618 SourceLocation Loc) { 12619 llvm_unreachable("Not supported in SIMD-only mode"); 12620 } 12621 12622 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12623 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12624 CodeGenFunction *CGF) { 12625 llvm_unreachable("Not supported in SIMD-only mode"); 12626 } 12627 12628 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12629 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12630 llvm_unreachable("Not supported in SIMD-only mode"); 12631 } 12632 12633 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12634 ArrayRef<const Expr *> Vars, 12635 SourceLocation Loc, 12636 llvm::AtomicOrdering AO) { 12637 llvm_unreachable("Not supported in SIMD-only mode"); 12638 } 12639 12640 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12641 const OMPExecutableDirective &D, 12642 llvm::Function *TaskFunction, 12643 QualType SharedsTy, Address Shareds, 12644 const Expr *IfCond, 12645 const OMPTaskDataTy &Data) { 12646 llvm_unreachable("Not supported in SIMD-only mode"); 12647 } 12648 12649 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12650 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12651 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12652 const Expr *IfCond, const OMPTaskDataTy &Data) { 12653 llvm_unreachable("Not supported in SIMD-only mode"); 12654 } 12655 12656 void CGOpenMPSIMDRuntime::emitReduction( 12657 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12658 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12659 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12660 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12661 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12662 ReductionOps, Options); 12663 } 12664 12665 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12666 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12667 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12668 llvm_unreachable("Not supported in SIMD-only mode"); 12669 } 12670 12671 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12672 SourceLocation Loc, 12673 bool IsWorksharingReduction) { 12674 llvm_unreachable("Not supported in SIMD-only mode"); 12675 } 12676 12677 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12678 SourceLocation Loc, 12679 ReductionCodeGen &RCG, 12680 unsigned N) { 12681 llvm_unreachable("Not supported in SIMD-only mode"); 12682 } 12683 12684 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12685 SourceLocation Loc, 12686 llvm::Value *ReductionsPtr, 12687 LValue SharedLVal) { 12688 llvm_unreachable("Not supported in SIMD-only mode"); 12689 } 12690 12691 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12692 SourceLocation Loc) { 12693 llvm_unreachable("Not supported in SIMD-only mode"); 12694 } 12695 12696 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12697 CodeGenFunction &CGF, SourceLocation Loc, 12698 OpenMPDirectiveKind CancelRegion) { 12699 llvm_unreachable("Not supported in SIMD-only mode"); 12700 } 12701 12702 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12703 SourceLocation Loc, const Expr *IfCond, 12704 OpenMPDirectiveKind CancelRegion) { 12705 llvm_unreachable("Not supported in SIMD-only mode"); 12706 } 12707 12708 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12709 const OMPExecutableDirective &D, StringRef ParentName, 12710 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12711 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12712 llvm_unreachable("Not supported in SIMD-only mode"); 12713 } 12714 12715 void CGOpenMPSIMDRuntime::emitTargetCall( 12716 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12717 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12718 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12719 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12720 const OMPLoopDirective &D)> 12721 SizeEmitter) { 12722 llvm_unreachable("Not supported in SIMD-only mode"); 12723 } 12724 12725 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12726 llvm_unreachable("Not supported in SIMD-only mode"); 12727 } 12728 12729 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12730 llvm_unreachable("Not supported in SIMD-only mode"); 12731 } 12732 12733 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12734 return false; 12735 } 12736 12737 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12738 const OMPExecutableDirective &D, 12739 SourceLocation Loc, 12740 llvm::Function *OutlinedFn, 12741 ArrayRef<llvm::Value *> CapturedVars) { 12742 llvm_unreachable("Not supported in SIMD-only mode"); 12743 } 12744 12745 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12746 const Expr *NumTeams, 12747 const Expr *ThreadLimit, 12748 SourceLocation Loc) { 12749 llvm_unreachable("Not supported in SIMD-only mode"); 12750 } 12751 12752 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12753 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12754 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12755 llvm_unreachable("Not supported in SIMD-only mode"); 12756 } 12757 12758 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12759 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12760 const Expr *Device) { 12761 llvm_unreachable("Not supported in SIMD-only mode"); 12762 } 12763 12764 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12765 const OMPLoopDirective &D, 12766 ArrayRef<Expr *> NumIterations) { 12767 llvm_unreachable("Not supported in SIMD-only mode"); 12768 } 12769 12770 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12771 const OMPDependClause *C) { 12772 llvm_unreachable("Not supported in SIMD-only mode"); 12773 } 12774 12775 const VarDecl * 12776 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12777 const VarDecl *NativeParam) const { 12778 llvm_unreachable("Not supported in SIMD-only mode"); 12779 } 12780 12781 Address 12782 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12783 const VarDecl *NativeParam, 12784 const VarDecl *TargetParam) const { 12785 llvm_unreachable("Not supported in SIMD-only mode"); 12786 } 12787