1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1530 } 1531 1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1533 return OMPBuilder.IdentPtr; 1534 } 1535 1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1537 if (!Kmpc_MicroTy) { 1538 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1539 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1540 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1541 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1542 } 1543 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1544 } 1545 1546 llvm::FunctionCallee 1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1548 assert((IVSize == 32 || IVSize == 64) && 1549 "IV size is not compatible with the omp runtime"); 1550 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1551 : "__kmpc_for_static_init_4u") 1552 : (IVSigned ? "__kmpc_for_static_init_8" 1553 : "__kmpc_for_static_init_8u"); 1554 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1555 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1556 llvm::Type *TypeParams[] = { 1557 getIdentTyPointerTy(), // loc 1558 CGM.Int32Ty, // tid 1559 CGM.Int32Ty, // schedtype 1560 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1561 PtrTy, // p_lower 1562 PtrTy, // p_upper 1563 PtrTy, // p_stride 1564 ITy, // incr 1565 ITy // chunk 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1579 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1582 CGM.Int32Ty, // tid 1583 CGM.Int32Ty, // schedtype 1584 ITy, // lower 1585 ITy, // upper 1586 ITy, // stride 1587 ITy // chunk 1588 }; 1589 auto *FnTy = 1590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1591 return CGM.CreateRuntimeFunction(FnTy, Name); 1592 } 1593 1594 llvm::FunctionCallee 1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1596 assert((IVSize == 32 || IVSize == 64) && 1597 "IV size is not compatible with the omp runtime"); 1598 StringRef Name = 1599 IVSize == 32 1600 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1601 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1602 llvm::Type *TypeParams[] = { 1603 getIdentTyPointerTy(), // loc 1604 CGM.Int32Ty, // tid 1605 }; 1606 auto *FnTy = 1607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1608 return CGM.CreateRuntimeFunction(FnTy, Name); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = 1616 IVSize == 32 1617 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1618 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1621 llvm::Type *TypeParams[] = { 1622 getIdentTyPointerTy(), // loc 1623 CGM.Int32Ty, // tid 1624 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1625 PtrTy, // p_lower 1626 PtrTy, // p_upper 1627 PtrTy // p_stride 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 /// Obtain information that uniquely identifies a target entry. This 1635 /// consists of the file and device IDs as well as line number associated with 1636 /// the relevant entry source location. 1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1638 unsigned &DeviceID, unsigned &FileID, 1639 unsigned &LineNum) { 1640 SourceManager &SM = C.getSourceManager(); 1641 1642 // The loc should be always valid and have a file ID (the user cannot use 1643 // #pragma directives in macros) 1644 1645 assert(Loc.isValid() && "Source location is expected to be always valid."); 1646 1647 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1648 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1649 1650 llvm::sys::fs::UniqueID ID; 1651 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1652 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1653 << PLoc.getFilename() << EC.message(); 1654 1655 DeviceID = ID.getDevice(); 1656 FileID = ID.getFile(); 1657 LineNum = PLoc.getLine(); 1658 } 1659 1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1661 if (CGM.getLangOpts().OpenMPSimd) 1662 return Address::invalid(); 1663 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1664 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1665 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1666 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1667 HasRequiresUnifiedSharedMemory))) { 1668 SmallString<64> PtrName; 1669 { 1670 llvm::raw_svector_ostream OS(PtrName); 1671 OS << CGM.getMangledName(GlobalDecl(VD)); 1672 if (!VD->isExternallyVisible()) { 1673 unsigned DeviceID, FileID, Line; 1674 getTargetEntryUniqueInfo(CGM.getContext(), 1675 VD->getCanonicalDecl()->getBeginLoc(), 1676 DeviceID, FileID, Line); 1677 OS << llvm::format("_%x", FileID); 1678 } 1679 OS << "_decl_tgt_ref_ptr"; 1680 } 1681 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1682 if (!Ptr) { 1683 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1684 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1685 PtrName); 1686 1687 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1688 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1689 1690 if (!CGM.getLangOpts().OpenMPIsDevice) 1691 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1692 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1693 } 1694 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1695 } 1696 return Address::invalid(); 1697 } 1698 1699 llvm::Constant * 1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1701 assert(!CGM.getLangOpts().OpenMPUseTLS || 1702 !CGM.getContext().getTargetInfo().isTLSSupported()); 1703 // Lookup the entry, lazily creating it if necessary. 1704 std::string Suffix = getName({"cache", ""}); 1705 return getOrCreateInternalVariable( 1706 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1707 } 1708 1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1710 const VarDecl *VD, 1711 Address VDAddr, 1712 SourceLocation Loc) { 1713 if (CGM.getLangOpts().OpenMPUseTLS && 1714 CGM.getContext().getTargetInfo().isTLSSupported()) 1715 return VDAddr; 1716 1717 llvm::Type *VarTy = VDAddr.getElementType(); 1718 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1719 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1720 CGM.Int8PtrTy), 1721 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1722 getOrCreateThreadPrivateCache(VD)}; 1723 return Address(CGF.EmitRuntimeCall( 1724 OMPBuilder.getOrCreateRuntimeFunction( 1725 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1726 Args), 1727 VDAddr.getAlignment()); 1728 } 1729 1730 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1731 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1732 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1733 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1734 // library. 1735 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1736 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1737 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1738 OMPLoc); 1739 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1740 // to register constructor/destructor for variable. 1741 llvm::Value *Args[] = { 1742 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1743 Ctor, CopyCtor, Dtor}; 1744 CGF.EmitRuntimeCall( 1745 OMPBuilder.getOrCreateRuntimeFunction( 1746 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1747 Args); 1748 } 1749 1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1751 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1752 bool PerformInit, CodeGenFunction *CGF) { 1753 if (CGM.getLangOpts().OpenMPUseTLS && 1754 CGM.getContext().getTargetInfo().isTLSSupported()) 1755 return nullptr; 1756 1757 VD = VD->getDefinition(CGM.getContext()); 1758 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1759 QualType ASTTy = VD->getType(); 1760 1761 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1762 const Expr *Init = VD->getAnyInitializer(); 1763 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1764 // Generate function that re-emits the declaration's initializer into the 1765 // threadprivate copy of the variable VD 1766 CodeGenFunction CtorCGF(CGM); 1767 FunctionArgList Args; 1768 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1769 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1770 ImplicitParamDecl::Other); 1771 Args.push_back(&Dst); 1772 1773 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1774 CGM.getContext().VoidPtrTy, Args); 1775 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1776 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1777 llvm::Function *Fn = 1778 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1779 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1780 Args, Loc, Loc); 1781 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1782 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1783 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1784 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1785 Arg = CtorCGF.Builder.CreateElementBitCast( 1786 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1787 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1788 /*IsInitializer=*/true); 1789 ArgVal = CtorCGF.EmitLoadOfScalar( 1790 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1791 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1792 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1793 CtorCGF.FinishFunction(); 1794 Ctor = Fn; 1795 } 1796 if (VD->getType().isDestructedType() != QualType::DK_none) { 1797 // Generate function that emits destructor call for the threadprivate copy 1798 // of the variable VD 1799 CodeGenFunction DtorCGF(CGM); 1800 FunctionArgList Args; 1801 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1802 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1803 ImplicitParamDecl::Other); 1804 Args.push_back(&Dst); 1805 1806 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1807 CGM.getContext().VoidTy, Args); 1808 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1809 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1810 llvm::Function *Fn = 1811 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1812 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1813 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1814 Loc, Loc); 1815 // Create a scope with an artificial location for the body of this function. 1816 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1817 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1818 DtorCGF.GetAddrOfLocalVar(&Dst), 1819 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1820 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1821 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1822 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1823 DtorCGF.FinishFunction(); 1824 Dtor = Fn; 1825 } 1826 // Do not emit init function if it is not required. 1827 if (!Ctor && !Dtor) 1828 return nullptr; 1829 1830 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1831 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1832 /*isVarArg=*/false) 1833 ->getPointerTo(); 1834 // Copying constructor for the threadprivate variable. 1835 // Must be NULL - reserved by runtime, but currently it requires that this 1836 // parameter is always NULL. Otherwise it fires assertion. 1837 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1838 if (Ctor == nullptr) { 1839 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1840 /*isVarArg=*/false) 1841 ->getPointerTo(); 1842 Ctor = llvm::Constant::getNullValue(CtorTy); 1843 } 1844 if (Dtor == nullptr) { 1845 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1846 /*isVarArg=*/false) 1847 ->getPointerTo(); 1848 Dtor = llvm::Constant::getNullValue(DtorTy); 1849 } 1850 if (!CGF) { 1851 auto *InitFunctionTy = 1852 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1853 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1854 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1855 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1856 CodeGenFunction InitCGF(CGM); 1857 FunctionArgList ArgList; 1858 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1859 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1860 Loc, Loc); 1861 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1862 InitCGF.FinishFunction(); 1863 return InitFunction; 1864 } 1865 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1866 } 1867 return nullptr; 1868 } 1869 1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1871 llvm::GlobalVariable *Addr, 1872 bool PerformInit) { 1873 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1874 !CGM.getLangOpts().OpenMPIsDevice) 1875 return false; 1876 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1877 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1878 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1879 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1880 HasRequiresUnifiedSharedMemory)) 1881 return CGM.getLangOpts().OpenMPIsDevice; 1882 VD = VD->getDefinition(CGM.getContext()); 1883 assert(VD && "Unknown VarDecl"); 1884 1885 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1886 return CGM.getLangOpts().OpenMPIsDevice; 1887 1888 QualType ASTTy = VD->getType(); 1889 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1890 1891 // Produce the unique prefix to identify the new target regions. We use 1892 // the source location of the variable declaration which we know to not 1893 // conflict with any target region. 1894 unsigned DeviceID; 1895 unsigned FileID; 1896 unsigned Line; 1897 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1898 SmallString<128> Buffer, Out; 1899 { 1900 llvm::raw_svector_ostream OS(Buffer); 1901 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1902 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1903 } 1904 1905 const Expr *Init = VD->getAnyInitializer(); 1906 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1907 llvm::Constant *Ctor; 1908 llvm::Constant *ID; 1909 if (CGM.getLangOpts().OpenMPIsDevice) { 1910 // Generate function that re-emits the declaration's initializer into 1911 // the threadprivate copy of the variable VD 1912 CodeGenFunction CtorCGF(CGM); 1913 1914 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1915 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1916 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1917 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1918 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1919 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1920 FunctionArgList(), Loc, Loc); 1921 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1922 CtorCGF.EmitAnyExprToMem(Init, 1923 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1924 Init->getType().getQualifiers(), 1925 /*IsInitializer=*/true); 1926 CtorCGF.FinishFunction(); 1927 Ctor = Fn; 1928 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1929 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1930 } else { 1931 Ctor = new llvm::GlobalVariable( 1932 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1933 llvm::GlobalValue::PrivateLinkage, 1934 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1935 ID = Ctor; 1936 } 1937 1938 // Register the information for the entry associated with the constructor. 1939 Out.clear(); 1940 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1941 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1942 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1943 } 1944 if (VD->getType().isDestructedType() != QualType::DK_none) { 1945 llvm::Constant *Dtor; 1946 llvm::Constant *ID; 1947 if (CGM.getLangOpts().OpenMPIsDevice) { 1948 // Generate function that emits destructor call for the threadprivate 1949 // copy of the variable VD 1950 CodeGenFunction DtorCGF(CGM); 1951 1952 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1953 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1954 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1955 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1956 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1957 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1958 FunctionArgList(), Loc, Loc); 1959 // Create a scope with an artificial location for the body of this 1960 // function. 1961 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1962 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1963 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1964 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1965 DtorCGF.FinishFunction(); 1966 Dtor = Fn; 1967 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1968 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1969 } else { 1970 Dtor = new llvm::GlobalVariable( 1971 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1972 llvm::GlobalValue::PrivateLinkage, 1973 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1974 ID = Dtor; 1975 } 1976 // Register the information for the entry associated with the destructor. 1977 Out.clear(); 1978 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1979 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1980 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1981 } 1982 return CGM.getLangOpts().OpenMPIsDevice; 1983 } 1984 1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1986 QualType VarType, 1987 StringRef Name) { 1988 std::string Suffix = getName({"artificial", ""}); 1989 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1990 llvm::Value *GAddr = 1991 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1992 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1993 CGM.getTarget().isTLSSupported()) { 1994 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1995 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1996 } 1997 std::string CacheSuffix = getName({"cache", ""}); 1998 llvm::Value *Args[] = { 1999 emitUpdateLocation(CGF, SourceLocation()), 2000 getThreadID(CGF, SourceLocation()), 2001 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2002 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2003 /*isSigned=*/false), 2004 getOrCreateInternalVariable( 2005 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2006 return Address( 2007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2008 CGF.EmitRuntimeCall( 2009 OMPBuilder.getOrCreateRuntimeFunction( 2010 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2011 Args), 2012 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2013 CGM.getContext().getTypeAlignInChars(VarType)); 2014 } 2015 2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2017 const RegionCodeGenTy &ThenGen, 2018 const RegionCodeGenTy &ElseGen) { 2019 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2020 2021 // If the condition constant folds and can be elided, try to avoid emitting 2022 // the condition and the dead arm of the if/else. 2023 bool CondConstant; 2024 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2025 if (CondConstant) 2026 ThenGen(CGF); 2027 else 2028 ElseGen(CGF); 2029 return; 2030 } 2031 2032 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2033 // emit the conditional branch. 2034 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2035 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2036 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2037 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2038 2039 // Emit the 'then' code. 2040 CGF.EmitBlock(ThenBlock); 2041 ThenGen(CGF); 2042 CGF.EmitBranch(ContBlock); 2043 // Emit the 'else' code if present. 2044 // There is no need to emit line number for unconditional branch. 2045 (void)ApplyDebugLocation::CreateEmpty(CGF); 2046 CGF.EmitBlock(ElseBlock); 2047 ElseGen(CGF); 2048 // There is no need to emit line number for unconditional branch. 2049 (void)ApplyDebugLocation::CreateEmpty(CGF); 2050 CGF.EmitBranch(ContBlock); 2051 // Emit the continuation block for code after the if. 2052 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2053 } 2054 2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2056 llvm::Function *OutlinedFn, 2057 ArrayRef<llvm::Value *> CapturedVars, 2058 const Expr *IfCond) { 2059 if (!CGF.HaveInsertPoint()) 2060 return; 2061 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2062 auto &M = CGM.getModule(); 2063 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2064 this](CodeGenFunction &CGF, PrePostActionTy &) { 2065 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *Args[] = { 2068 RTLoc, 2069 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2070 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2071 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2072 RealArgs.append(std::begin(Args), std::end(Args)); 2073 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2074 2075 llvm::FunctionCallee RTLFn = 2076 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2077 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2078 }; 2079 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2082 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2083 // Build calls: 2084 // __kmpc_serialized_parallel(&Loc, GTid); 2085 llvm::Value *Args[] = {RTLoc, ThreadID}; 2086 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2087 M, OMPRTL___kmpc_serialized_parallel), 2088 Args); 2089 2090 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2091 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2092 Address ZeroAddrBound = 2093 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2094 /*Name=*/".bound.zero.addr"); 2095 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2096 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2097 // ThreadId for serialized parallels is 0. 2098 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2099 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2100 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2101 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2102 2103 // __kmpc_end_serialized_parallel(&Loc, GTid); 2104 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2105 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2106 M, OMPRTL___kmpc_end_serialized_parallel), 2107 EndArgs); 2108 }; 2109 if (IfCond) { 2110 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2111 } else { 2112 RegionCodeGenTy ThenRCG(ThenGen); 2113 ThenRCG(CGF); 2114 } 2115 } 2116 2117 // If we're inside an (outlined) parallel region, use the region info's 2118 // thread-ID variable (it is passed in a first argument of the outlined function 2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2120 // regular serial code region, get thread ID by calling kmp_int32 2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2122 // return the address of that temp. 2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2124 SourceLocation Loc) { 2125 if (auto *OMPRegionInfo = 2126 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2127 if (OMPRegionInfo->getThreadIDVariable()) 2128 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2129 2130 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2131 QualType Int32Ty = 2132 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2133 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2134 CGF.EmitStoreOfScalar(ThreadID, 2135 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2136 2137 return ThreadIDTemp; 2138 } 2139 2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2141 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2142 SmallString<256> Buffer; 2143 llvm::raw_svector_ostream Out(Buffer); 2144 Out << Name; 2145 StringRef RuntimeName = Out.str(); 2146 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2147 if (Elem.second) { 2148 assert(Elem.second->getType()->getPointerElementType() == Ty && 2149 "OMP internal variable has different type than requested"); 2150 return &*Elem.second; 2151 } 2152 2153 return Elem.second = new llvm::GlobalVariable( 2154 CGM.getModule(), Ty, /*IsConstant*/ false, 2155 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2156 Elem.first(), /*InsertBefore=*/nullptr, 2157 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2158 } 2159 2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2161 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2162 std::string Name = getName({Prefix, "var"}); 2163 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2164 } 2165 2166 namespace { 2167 /// Common pre(post)-action for different OpenMP constructs. 2168 class CommonActionTy final : public PrePostActionTy { 2169 llvm::FunctionCallee EnterCallee; 2170 ArrayRef<llvm::Value *> EnterArgs; 2171 llvm::FunctionCallee ExitCallee; 2172 ArrayRef<llvm::Value *> ExitArgs; 2173 bool Conditional; 2174 llvm::BasicBlock *ContBlock = nullptr; 2175 2176 public: 2177 CommonActionTy(llvm::FunctionCallee EnterCallee, 2178 ArrayRef<llvm::Value *> EnterArgs, 2179 llvm::FunctionCallee ExitCallee, 2180 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2181 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2182 ExitArgs(ExitArgs), Conditional(Conditional) {} 2183 void Enter(CodeGenFunction &CGF) override { 2184 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2185 if (Conditional) { 2186 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2187 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2188 ContBlock = CGF.createBasicBlock("omp_if.end"); 2189 // Generate the branch (If-stmt) 2190 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2191 CGF.EmitBlock(ThenBlock); 2192 } 2193 } 2194 void Done(CodeGenFunction &CGF) { 2195 // Emit the rest of blocks/branches 2196 CGF.EmitBranch(ContBlock); 2197 CGF.EmitBlock(ContBlock, true); 2198 } 2199 void Exit(CodeGenFunction &CGF) override { 2200 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2201 } 2202 }; 2203 } // anonymous namespace 2204 2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2206 StringRef CriticalName, 2207 const RegionCodeGenTy &CriticalOpGen, 2208 SourceLocation Loc, const Expr *Hint) { 2209 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2210 // CriticalOpGen(); 2211 // __kmpc_end_critical(ident_t *, gtid, Lock); 2212 // Prepare arguments and build a call to __kmpc_critical 2213 if (!CGF.HaveInsertPoint()) 2214 return; 2215 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2216 getCriticalRegionLock(CriticalName)}; 2217 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2218 std::end(Args)); 2219 if (Hint) { 2220 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2221 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2222 } 2223 CommonActionTy Action( 2224 OMPBuilder.getOrCreateRuntimeFunction( 2225 CGM.getModule(), 2226 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2227 EnterArgs, 2228 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2229 OMPRTL___kmpc_end_critical), 2230 Args); 2231 CriticalOpGen.setAction(Action); 2232 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2233 } 2234 2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2236 const RegionCodeGenTy &MasterOpGen, 2237 SourceLocation Loc) { 2238 if (!CGF.HaveInsertPoint()) 2239 return; 2240 // if(__kmpc_master(ident_t *, gtid)) { 2241 // MasterOpGen(); 2242 // __kmpc_end_master(ident_t *, gtid); 2243 // } 2244 // Prepare arguments and build a call to __kmpc_master 2245 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2246 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2247 CGM.getModule(), OMPRTL___kmpc_master), 2248 Args, 2249 OMPBuilder.getOrCreateRuntimeFunction( 2250 CGM.getModule(), OMPRTL___kmpc_end_master), 2251 Args, 2252 /*Conditional=*/true); 2253 MasterOpGen.setAction(Action); 2254 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2255 Action.Done(CGF); 2256 } 2257 2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2259 SourceLocation Loc) { 2260 if (!CGF.HaveInsertPoint()) 2261 return; 2262 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2263 OMPBuilder.CreateTaskyield(CGF.Builder); 2264 } else { 2265 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2266 llvm::Value *Args[] = { 2267 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2268 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2269 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2271 Args); 2272 } 2273 2274 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2275 Region->emitUntiedSwitch(CGF); 2276 } 2277 2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2279 const RegionCodeGenTy &TaskgroupOpGen, 2280 SourceLocation Loc) { 2281 if (!CGF.HaveInsertPoint()) 2282 return; 2283 // __kmpc_taskgroup(ident_t *, gtid); 2284 // TaskgroupOpGen(); 2285 // __kmpc_end_taskgroup(ident_t *, gtid); 2286 // Prepare arguments and build a call to __kmpc_taskgroup 2287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2288 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2289 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2290 Args, 2291 OMPBuilder.getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2293 Args); 2294 TaskgroupOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2296 } 2297 2298 /// Given an array of pointers to variables, project the address of a 2299 /// given variable. 2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2301 unsigned Index, const VarDecl *Var) { 2302 // Pull out the pointer to the variable. 2303 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2304 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2305 2306 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2307 Addr = CGF.Builder.CreateElementBitCast( 2308 Addr, CGF.ConvertTypeForMem(Var->getType())); 2309 return Addr; 2310 } 2311 2312 static llvm::Value *emitCopyprivateCopyFunction( 2313 CodeGenModule &CGM, llvm::Type *ArgsType, 2314 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2315 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2316 SourceLocation Loc) { 2317 ASTContext &C = CGM.getContext(); 2318 // void copy_func(void *LHSArg, void *RHSArg); 2319 FunctionArgList Args; 2320 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2321 ImplicitParamDecl::Other); 2322 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2323 ImplicitParamDecl::Other); 2324 Args.push_back(&LHSArg); 2325 Args.push_back(&RHSArg); 2326 const auto &CGFI = 2327 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2328 std::string Name = 2329 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2330 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2331 llvm::GlobalValue::InternalLinkage, Name, 2332 &CGM.getModule()); 2333 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2334 Fn->setDoesNotRecurse(); 2335 CodeGenFunction CGF(CGM); 2336 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2337 // Dest = (void*[n])(LHSArg); 2338 // Src = (void*[n])(RHSArg); 2339 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2340 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2341 ArgsType), CGF.getPointerAlign()); 2342 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2343 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2344 ArgsType), CGF.getPointerAlign()); 2345 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2346 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2347 // ... 2348 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2349 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2350 const auto *DestVar = 2351 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2352 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2353 2354 const auto *SrcVar = 2355 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2356 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2357 2358 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2359 QualType Type = VD->getType(); 2360 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2361 } 2362 CGF.FinishFunction(); 2363 return Fn; 2364 } 2365 2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2367 const RegionCodeGenTy &SingleOpGen, 2368 SourceLocation Loc, 2369 ArrayRef<const Expr *> CopyprivateVars, 2370 ArrayRef<const Expr *> SrcExprs, 2371 ArrayRef<const Expr *> DstExprs, 2372 ArrayRef<const Expr *> AssignmentOps) { 2373 if (!CGF.HaveInsertPoint()) 2374 return; 2375 assert(CopyprivateVars.size() == SrcExprs.size() && 2376 CopyprivateVars.size() == DstExprs.size() && 2377 CopyprivateVars.size() == AssignmentOps.size()); 2378 ASTContext &C = CGM.getContext(); 2379 // int32 did_it = 0; 2380 // if(__kmpc_single(ident_t *, gtid)) { 2381 // SingleOpGen(); 2382 // __kmpc_end_single(ident_t *, gtid); 2383 // did_it = 1; 2384 // } 2385 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2386 // <copy_func>, did_it); 2387 2388 Address DidIt = Address::invalid(); 2389 if (!CopyprivateVars.empty()) { 2390 // int32 did_it = 0; 2391 QualType KmpInt32Ty = 2392 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2393 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2394 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2395 } 2396 // Prepare arguments and build a call to __kmpc_single 2397 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2398 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2399 CGM.getModule(), OMPRTL___kmpc_single), 2400 Args, 2401 OMPBuilder.getOrCreateRuntimeFunction( 2402 CGM.getModule(), OMPRTL___kmpc_end_single), 2403 Args, 2404 /*Conditional=*/true); 2405 SingleOpGen.setAction(Action); 2406 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2407 if (DidIt.isValid()) { 2408 // did_it = 1; 2409 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2410 } 2411 Action.Done(CGF); 2412 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2413 // <copy_func>, did_it); 2414 if (DidIt.isValid()) { 2415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2416 QualType CopyprivateArrayTy = C.getConstantArrayType( 2417 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2418 /*IndexTypeQuals=*/0); 2419 // Create a list of all private variables for copyprivate. 2420 Address CopyprivateList = 2421 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2422 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2423 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2424 CGF.Builder.CreateStore( 2425 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2426 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2427 CGF.VoidPtrTy), 2428 Elem); 2429 } 2430 // Build function that copies private values from single region to all other 2431 // threads in the corresponding parallel region. 2432 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2433 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2434 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2435 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2436 Address CL = 2437 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2438 CGF.VoidPtrTy); 2439 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2440 llvm::Value *Args[] = { 2441 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2442 getThreadID(CGF, Loc), // i32 <gtid> 2443 BufSize, // size_t <buf_size> 2444 CL.getPointer(), // void *<copyprivate list> 2445 CpyFn, // void (*) (void *, void *) <copy_func> 2446 DidItVal // i32 did_it 2447 }; 2448 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2449 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2450 Args); 2451 } 2452 } 2453 2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2455 const RegionCodeGenTy &OrderedOpGen, 2456 SourceLocation Loc, bool IsThreads) { 2457 if (!CGF.HaveInsertPoint()) 2458 return; 2459 // __kmpc_ordered(ident_t *, gtid); 2460 // OrderedOpGen(); 2461 // __kmpc_end_ordered(ident_t *, gtid); 2462 // Prepare arguments and build a call to __kmpc_ordered 2463 if (IsThreads) { 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_ordered), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2470 Args); 2471 OrderedOpGen.setAction(Action); 2472 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2473 return; 2474 } 2475 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2476 } 2477 2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2479 unsigned Flags; 2480 if (Kind == OMPD_for) 2481 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2482 else if (Kind == OMPD_sections) 2483 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2484 else if (Kind == OMPD_single) 2485 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2486 else if (Kind == OMPD_barrier) 2487 Flags = OMP_IDENT_BARRIER_EXPL; 2488 else 2489 Flags = OMP_IDENT_BARRIER_IMPL; 2490 return Flags; 2491 } 2492 2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2494 CodeGenFunction &CGF, const OMPLoopDirective &S, 2495 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2496 // Check if the loop directive is actually a doacross loop directive. In this 2497 // case choose static, 1 schedule. 2498 if (llvm::any_of( 2499 S.getClausesOfKind<OMPOrderedClause>(), 2500 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2501 ScheduleKind = OMPC_SCHEDULE_static; 2502 // Chunk size is 1 in this case. 2503 llvm::APInt ChunkSize(32, 1); 2504 ChunkExpr = IntegerLiteral::Create( 2505 CGF.getContext(), ChunkSize, 2506 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2507 SourceLocation()); 2508 } 2509 } 2510 2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2512 OpenMPDirectiveKind Kind, bool EmitChecks, 2513 bool ForceSimpleCall) { 2514 // Check if we should use the OMPBuilder 2515 auto *OMPRegionInfo = 2516 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2517 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2518 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( 2519 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2520 return; 2521 } 2522 2523 if (!CGF.HaveInsertPoint()) 2524 return; 2525 // Build call __kmpc_cancel_barrier(loc, thread_id); 2526 // Build call __kmpc_barrier(loc, thread_id); 2527 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2528 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2529 // thread_id); 2530 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2531 getThreadID(CGF, Loc)}; 2532 if (OMPRegionInfo) { 2533 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2534 llvm::Value *Result = CGF.EmitRuntimeCall( 2535 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2536 OMPRTL___kmpc_cancel_barrier), 2537 Args); 2538 if (EmitChecks) { 2539 // if (__kmpc_cancel_barrier()) { 2540 // exit from construct; 2541 // } 2542 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2543 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2544 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2545 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2546 CGF.EmitBlock(ExitBB); 2547 // exit from construct; 2548 CodeGenFunction::JumpDest CancelDestination = 2549 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2550 CGF.EmitBranchThroughCleanup(CancelDestination); 2551 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2552 } 2553 return; 2554 } 2555 } 2556 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2557 CGM.getModule(), OMPRTL___kmpc_barrier), 2558 Args); 2559 } 2560 2561 /// Map the OpenMP loop schedule to the runtime enumeration. 2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2563 bool Chunked, bool Ordered) { 2564 switch (ScheduleKind) { 2565 case OMPC_SCHEDULE_static: 2566 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2567 : (Ordered ? OMP_ord_static : OMP_sch_static); 2568 case OMPC_SCHEDULE_dynamic: 2569 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2570 case OMPC_SCHEDULE_guided: 2571 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2572 case OMPC_SCHEDULE_runtime: 2573 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2574 case OMPC_SCHEDULE_auto: 2575 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2576 case OMPC_SCHEDULE_unknown: 2577 assert(!Chunked && "chunk was specified but schedule kind not known"); 2578 return Ordered ? OMP_ord_static : OMP_sch_static; 2579 } 2580 llvm_unreachable("Unexpected runtime schedule"); 2581 } 2582 2583 /// Map the OpenMP distribute schedule to the runtime enumeration. 2584 static OpenMPSchedType 2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2586 // only static is allowed for dist_schedule 2587 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2588 } 2589 2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2591 bool Chunked) const { 2592 OpenMPSchedType Schedule = 2593 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2594 return Schedule == OMP_sch_static; 2595 } 2596 2597 bool CGOpenMPRuntime::isStaticNonchunked( 2598 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2599 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2600 return Schedule == OMP_dist_sch_static; 2601 } 2602 2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2604 bool Chunked) const { 2605 OpenMPSchedType Schedule = 2606 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2607 return Schedule == OMP_sch_static_chunked; 2608 } 2609 2610 bool CGOpenMPRuntime::isStaticChunked( 2611 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2612 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2613 return Schedule == OMP_dist_sch_static_chunked; 2614 } 2615 2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2619 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2620 return Schedule != OMP_sch_static; 2621 } 2622 2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2624 OpenMPScheduleClauseModifier M1, 2625 OpenMPScheduleClauseModifier M2) { 2626 int Modifier = 0; 2627 switch (M1) { 2628 case OMPC_SCHEDULE_MODIFIER_monotonic: 2629 Modifier = OMP_sch_modifier_monotonic; 2630 break; 2631 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2632 Modifier = OMP_sch_modifier_nonmonotonic; 2633 break; 2634 case OMPC_SCHEDULE_MODIFIER_simd: 2635 if (Schedule == OMP_sch_static_chunked) 2636 Schedule = OMP_sch_static_balanced_chunked; 2637 break; 2638 case OMPC_SCHEDULE_MODIFIER_last: 2639 case OMPC_SCHEDULE_MODIFIER_unknown: 2640 break; 2641 } 2642 switch (M2) { 2643 case OMPC_SCHEDULE_MODIFIER_monotonic: 2644 Modifier = OMP_sch_modifier_monotonic; 2645 break; 2646 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2647 Modifier = OMP_sch_modifier_nonmonotonic; 2648 break; 2649 case OMPC_SCHEDULE_MODIFIER_simd: 2650 if (Schedule == OMP_sch_static_chunked) 2651 Schedule = OMP_sch_static_balanced_chunked; 2652 break; 2653 case OMPC_SCHEDULE_MODIFIER_last: 2654 case OMPC_SCHEDULE_MODIFIER_unknown: 2655 break; 2656 } 2657 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2658 // If the static schedule kind is specified or if the ordered clause is 2659 // specified, and if the nonmonotonic modifier is not specified, the effect is 2660 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2661 // modifier is specified, the effect is as if the nonmonotonic modifier is 2662 // specified. 2663 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2664 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2665 Schedule == OMP_sch_static_balanced_chunked || 2666 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2667 Schedule == OMP_dist_sch_static_chunked || 2668 Schedule == OMP_dist_sch_static)) 2669 Modifier = OMP_sch_modifier_nonmonotonic; 2670 } 2671 return Schedule | Modifier; 2672 } 2673 2674 void CGOpenMPRuntime::emitForDispatchInit( 2675 CodeGenFunction &CGF, SourceLocation Loc, 2676 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2677 bool Ordered, const DispatchRTInput &DispatchValues) { 2678 if (!CGF.HaveInsertPoint()) 2679 return; 2680 OpenMPSchedType Schedule = getRuntimeSchedule( 2681 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2682 assert(Ordered || 2683 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2684 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2685 Schedule != OMP_sch_static_balanced_chunked)); 2686 // Call __kmpc_dispatch_init( 2687 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2688 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2689 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2690 2691 // If the Chunk was not specified in the clause - use default value 1. 2692 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2693 : CGF.Builder.getIntN(IVSize, 1); 2694 llvm::Value *Args[] = { 2695 emitUpdateLocation(CGF, Loc), 2696 getThreadID(CGF, Loc), 2697 CGF.Builder.getInt32(addMonoNonMonoModifier( 2698 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2699 DispatchValues.LB, // Lower 2700 DispatchValues.UB, // Upper 2701 CGF.Builder.getIntN(IVSize, 1), // Stride 2702 Chunk // Chunk 2703 }; 2704 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2705 } 2706 2707 static void emitForStaticInitCall( 2708 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2709 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2710 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2711 const CGOpenMPRuntime::StaticRTInput &Values) { 2712 if (!CGF.HaveInsertPoint()) 2713 return; 2714 2715 assert(!Values.Ordered); 2716 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2717 Schedule == OMP_sch_static_balanced_chunked || 2718 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2719 Schedule == OMP_dist_sch_static || 2720 Schedule == OMP_dist_sch_static_chunked); 2721 2722 // Call __kmpc_for_static_init( 2723 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2724 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2725 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2726 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2727 llvm::Value *Chunk = Values.Chunk; 2728 if (Chunk == nullptr) { 2729 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2730 Schedule == OMP_dist_sch_static) && 2731 "expected static non-chunked schedule"); 2732 // If the Chunk was not specified in the clause - use default value 1. 2733 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2734 } else { 2735 assert((Schedule == OMP_sch_static_chunked || 2736 Schedule == OMP_sch_static_balanced_chunked || 2737 Schedule == OMP_ord_static_chunked || 2738 Schedule == OMP_dist_sch_static_chunked) && 2739 "expected static chunked schedule"); 2740 } 2741 llvm::Value *Args[] = { 2742 UpdateLocation, 2743 ThreadId, 2744 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2745 M2)), // Schedule type 2746 Values.IL.getPointer(), // &isLastIter 2747 Values.LB.getPointer(), // &LB 2748 Values.UB.getPointer(), // &UB 2749 Values.ST.getPointer(), // &Stride 2750 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2751 Chunk // Chunk 2752 }; 2753 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2754 } 2755 2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2757 SourceLocation Loc, 2758 OpenMPDirectiveKind DKind, 2759 const OpenMPScheduleTy &ScheduleKind, 2760 const StaticRTInput &Values) { 2761 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2762 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2763 assert(isOpenMPWorksharingDirective(DKind) && 2764 "Expected loop-based or sections-based directive."); 2765 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2766 isOpenMPLoopDirective(DKind) 2767 ? OMP_IDENT_WORK_LOOP 2768 : OMP_IDENT_WORK_SECTIONS); 2769 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2770 llvm::FunctionCallee StaticInitFunction = 2771 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2772 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2773 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2774 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2775 } 2776 2777 void CGOpenMPRuntime::emitDistributeStaticInit( 2778 CodeGenFunction &CGF, SourceLocation Loc, 2779 OpenMPDistScheduleClauseKind SchedKind, 2780 const CGOpenMPRuntime::StaticRTInput &Values) { 2781 OpenMPSchedType ScheduleNum = 2782 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2783 llvm::Value *UpdatedLocation = 2784 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2785 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2786 llvm::FunctionCallee StaticInitFunction = 2787 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2788 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2789 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2790 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2791 } 2792 2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2794 SourceLocation Loc, 2795 OpenMPDirectiveKind DKind) { 2796 if (!CGF.HaveInsertPoint()) 2797 return; 2798 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2799 llvm::Value *Args[] = { 2800 emitUpdateLocation(CGF, Loc, 2801 isOpenMPDistributeDirective(DKind) 2802 ? OMP_IDENT_WORK_DISTRIBUTE 2803 : isOpenMPLoopDirective(DKind) 2804 ? OMP_IDENT_WORK_LOOP 2805 : OMP_IDENT_WORK_SECTIONS), 2806 getThreadID(CGF, Loc)}; 2807 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2808 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2809 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2810 Args); 2811 } 2812 2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2814 SourceLocation Loc, 2815 unsigned IVSize, 2816 bool IVSigned) { 2817 if (!CGF.HaveInsertPoint()) 2818 return; 2819 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2820 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2821 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2822 } 2823 2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2825 SourceLocation Loc, unsigned IVSize, 2826 bool IVSigned, Address IL, 2827 Address LB, Address UB, 2828 Address ST) { 2829 // Call __kmpc_dispatch_next( 2830 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2831 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2832 // kmp_int[32|64] *p_stride); 2833 llvm::Value *Args[] = { 2834 emitUpdateLocation(CGF, Loc), 2835 getThreadID(CGF, Loc), 2836 IL.getPointer(), // &isLastIter 2837 LB.getPointer(), // &Lower 2838 UB.getPointer(), // &Upper 2839 ST.getPointer() // &Stride 2840 }; 2841 llvm::Value *Call = 2842 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2843 return CGF.EmitScalarConversion( 2844 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2845 CGF.getContext().BoolTy, Loc); 2846 } 2847 2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2849 llvm::Value *NumThreads, 2850 SourceLocation Loc) { 2851 if (!CGF.HaveInsertPoint()) 2852 return; 2853 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2854 llvm::Value *Args[] = { 2855 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2856 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2857 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2858 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2859 Args); 2860 } 2861 2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2863 ProcBindKind ProcBind, 2864 SourceLocation Loc) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2868 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2869 llvm::Value *Args[] = { 2870 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2871 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2872 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2873 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2874 Args); 2875 } 2876 2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2878 SourceLocation Loc, llvm::AtomicOrdering AO) { 2879 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2880 OMPBuilder.CreateFlush(CGF.Builder); 2881 } else { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Build call void __kmpc_flush(ident_t *loc) 2885 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2886 CGM.getModule(), OMPRTL___kmpc_flush), 2887 emitUpdateLocation(CGF, Loc)); 2888 } 2889 } 2890 2891 namespace { 2892 /// Indexes of fields for type kmp_task_t. 2893 enum KmpTaskTFields { 2894 /// List of shared variables. 2895 KmpTaskTShareds, 2896 /// Task routine. 2897 KmpTaskTRoutine, 2898 /// Partition id for the untied tasks. 2899 KmpTaskTPartId, 2900 /// Function with call of destructors for private variables. 2901 Data1, 2902 /// Task priority. 2903 Data2, 2904 /// (Taskloops only) Lower bound. 2905 KmpTaskTLowerBound, 2906 /// (Taskloops only) Upper bound. 2907 KmpTaskTUpperBound, 2908 /// (Taskloops only) Stride. 2909 KmpTaskTStride, 2910 /// (Taskloops only) Is last iteration flag. 2911 KmpTaskTLastIter, 2912 /// (Taskloops only) Reduction data. 2913 KmpTaskTReductions, 2914 }; 2915 } // anonymous namespace 2916 2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2918 return OffloadEntriesTargetRegion.empty() && 2919 OffloadEntriesDeviceGlobalVar.empty(); 2920 } 2921 2922 /// Initialize target region entry. 2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2924 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2925 StringRef ParentName, unsigned LineNum, 2926 unsigned Order) { 2927 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2928 "only required for the device " 2929 "code generation."); 2930 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2931 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2932 OMPTargetRegionEntryTargetRegion); 2933 ++OffloadingEntriesNum; 2934 } 2935 2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2937 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2938 StringRef ParentName, unsigned LineNum, 2939 llvm::Constant *Addr, llvm::Constant *ID, 2940 OMPTargetRegionEntryKind Flags) { 2941 // If we are emitting code for a target, the entry is already initialized, 2942 // only has to be registered. 2943 if (CGM.getLangOpts().OpenMPIsDevice) { 2944 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 2945 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2946 DiagnosticsEngine::Error, 2947 "Unable to find target region on line '%0' in the device code."); 2948 CGM.getDiags().Report(DiagID) << LineNum; 2949 return; 2950 } 2951 auto &Entry = 2952 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2953 assert(Entry.isValid() && "Entry not initialized!"); 2954 Entry.setAddress(Addr); 2955 Entry.setID(ID); 2956 Entry.setFlags(Flags); 2957 } else { 2958 if (Flags == 2959 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2960 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2961 /*IgnoreAddressId*/ true)) 2962 return; 2963 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2964 "Target region entry already registered!"); 2965 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2966 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2967 ++OffloadingEntriesNum; 2968 } 2969 } 2970 2971 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2972 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2973 bool IgnoreAddressId) const { 2974 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2975 if (PerDevice == OffloadEntriesTargetRegion.end()) 2976 return false; 2977 auto PerFile = PerDevice->second.find(FileID); 2978 if (PerFile == PerDevice->second.end()) 2979 return false; 2980 auto PerParentName = PerFile->second.find(ParentName); 2981 if (PerParentName == PerFile->second.end()) 2982 return false; 2983 auto PerLine = PerParentName->second.find(LineNum); 2984 if (PerLine == PerParentName->second.end()) 2985 return false; 2986 // Fail if this entry is already registered. 2987 if (!IgnoreAddressId && 2988 (PerLine->second.getAddress() || PerLine->second.getID())) 2989 return false; 2990 return true; 2991 } 2992 2993 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2994 const OffloadTargetRegionEntryInfoActTy &Action) { 2995 // Scan all target region entries and perform the provided action. 2996 for (const auto &D : OffloadEntriesTargetRegion) 2997 for (const auto &F : D.second) 2998 for (const auto &P : F.second) 2999 for (const auto &L : P.second) 3000 Action(D.first, F.first, P.first(), L.first, L.second); 3001 } 3002 3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3004 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3005 OMPTargetGlobalVarEntryKind Flags, 3006 unsigned Order) { 3007 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3008 "only required for the device " 3009 "code generation."); 3010 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3011 ++OffloadingEntriesNum; 3012 } 3013 3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3015 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3016 CharUnits VarSize, 3017 OMPTargetGlobalVarEntryKind Flags, 3018 llvm::GlobalValue::LinkageTypes Linkage) { 3019 if (CGM.getLangOpts().OpenMPIsDevice) { 3020 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3021 assert(Entry.isValid() && Entry.getFlags() == Flags && 3022 "Entry not initialized!"); 3023 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3024 "Resetting with the new address."); 3025 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3026 if (Entry.getVarSize().isZero()) { 3027 Entry.setVarSize(VarSize); 3028 Entry.setLinkage(Linkage); 3029 } 3030 return; 3031 } 3032 Entry.setVarSize(VarSize); 3033 Entry.setLinkage(Linkage); 3034 Entry.setAddress(Addr); 3035 } else { 3036 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3037 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3038 assert(Entry.isValid() && Entry.getFlags() == Flags && 3039 "Entry not initialized!"); 3040 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3041 "Resetting with the new address."); 3042 if (Entry.getVarSize().isZero()) { 3043 Entry.setVarSize(VarSize); 3044 Entry.setLinkage(Linkage); 3045 } 3046 return; 3047 } 3048 OffloadEntriesDeviceGlobalVar.try_emplace( 3049 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3050 ++OffloadingEntriesNum; 3051 } 3052 } 3053 3054 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3055 actOnDeviceGlobalVarEntriesInfo( 3056 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3057 // Scan all target region entries and perform the provided action. 3058 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3059 Action(E.getKey(), E.getValue()); 3060 } 3061 3062 void CGOpenMPRuntime::createOffloadEntry( 3063 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3064 llvm::GlobalValue::LinkageTypes Linkage) { 3065 StringRef Name = Addr->getName(); 3066 llvm::Module &M = CGM.getModule(); 3067 llvm::LLVMContext &C = M.getContext(); 3068 3069 // Create constant string with the name. 3070 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3071 3072 std::string StringName = getName({"omp_offloading", "entry_name"}); 3073 auto *Str = new llvm::GlobalVariable( 3074 M, StrPtrInit->getType(), /*isConstant=*/true, 3075 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3076 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3077 3078 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3079 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3080 llvm::ConstantInt::get(CGM.SizeTy, Size), 3081 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3082 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3083 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3084 llvm::GlobalVariable *Entry = createGlobalStruct( 3085 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3086 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3087 3088 // The entry has to be created in the section the linker expects it to be. 3089 Entry->setSection("omp_offloading_entries"); 3090 } 3091 3092 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3093 // Emit the offloading entries and metadata so that the device codegen side 3094 // can easily figure out what to emit. The produced metadata looks like 3095 // this: 3096 // 3097 // !omp_offload.info = !{!1, ...} 3098 // 3099 // Right now we only generate metadata for function that contain target 3100 // regions. 3101 3102 // If we are in simd mode or there are no entries, we don't need to do 3103 // anything. 3104 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3105 return; 3106 3107 llvm::Module &M = CGM.getModule(); 3108 llvm::LLVMContext &C = M.getContext(); 3109 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3110 SourceLocation, StringRef>, 3111 16> 3112 OrderedEntries(OffloadEntriesInfoManager.size()); 3113 llvm::SmallVector<StringRef, 16> ParentFunctions( 3114 OffloadEntriesInfoManager.size()); 3115 3116 // Auxiliary methods to create metadata values and strings. 3117 auto &&GetMDInt = [this](unsigned V) { 3118 return llvm::ConstantAsMetadata::get( 3119 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3120 }; 3121 3122 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3123 3124 // Create the offloading info metadata node. 3125 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3126 3127 // Create function that emits metadata for each target region entry; 3128 auto &&TargetRegionMetadataEmitter = 3129 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3130 &GetMDString]( 3131 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3132 unsigned Line, 3133 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3134 // Generate metadata for target regions. Each entry of this metadata 3135 // contains: 3136 // - Entry 0 -> Kind of this type of metadata (0). 3137 // - Entry 1 -> Device ID of the file where the entry was identified. 3138 // - Entry 2 -> File ID of the file where the entry was identified. 3139 // - Entry 3 -> Mangled name of the function where the entry was 3140 // identified. 3141 // - Entry 4 -> Line in the file where the entry was identified. 3142 // - Entry 5 -> Order the entry was created. 3143 // The first element of the metadata node is the kind. 3144 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3145 GetMDInt(FileID), GetMDString(ParentName), 3146 GetMDInt(Line), GetMDInt(E.getOrder())}; 3147 3148 SourceLocation Loc; 3149 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3150 E = CGM.getContext().getSourceManager().fileinfo_end(); 3151 I != E; ++I) { 3152 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3153 I->getFirst()->getUniqueID().getFile() == FileID) { 3154 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3155 I->getFirst(), Line, 1); 3156 break; 3157 } 3158 } 3159 // Save this entry in the right position of the ordered entries array. 3160 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3161 ParentFunctions[E.getOrder()] = ParentName; 3162 3163 // Add metadata to the named metadata node. 3164 MD->addOperand(llvm::MDNode::get(C, Ops)); 3165 }; 3166 3167 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3168 TargetRegionMetadataEmitter); 3169 3170 // Create function that emits metadata for each device global variable entry; 3171 auto &&DeviceGlobalVarMetadataEmitter = 3172 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3173 MD](StringRef MangledName, 3174 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3175 &E) { 3176 // Generate metadata for global variables. Each entry of this metadata 3177 // contains: 3178 // - Entry 0 -> Kind of this type of metadata (1). 3179 // - Entry 1 -> Mangled name of the variable. 3180 // - Entry 2 -> Declare target kind. 3181 // - Entry 3 -> Order the entry was created. 3182 // The first element of the metadata node is the kind. 3183 llvm::Metadata *Ops[] = { 3184 GetMDInt(E.getKind()), GetMDString(MangledName), 3185 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3186 3187 // Save this entry in the right position of the ordered entries array. 3188 OrderedEntries[E.getOrder()] = 3189 std::make_tuple(&E, SourceLocation(), MangledName); 3190 3191 // Add metadata to the named metadata node. 3192 MD->addOperand(llvm::MDNode::get(C, Ops)); 3193 }; 3194 3195 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3196 DeviceGlobalVarMetadataEmitter); 3197 3198 for (const auto &E : OrderedEntries) { 3199 assert(std::get<0>(E) && "All ordered entries must exist!"); 3200 if (const auto *CE = 3201 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3202 std::get<0>(E))) { 3203 if (!CE->getID() || !CE->getAddress()) { 3204 // Do not blame the entry if the parent funtion is not emitted. 3205 StringRef FnName = ParentFunctions[CE->getOrder()]; 3206 if (!CGM.GetGlobalValue(FnName)) 3207 continue; 3208 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3209 DiagnosticsEngine::Error, 3210 "Offloading entry for target region in %0 is incorrect: either the " 3211 "address or the ID is invalid."); 3212 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3213 continue; 3214 } 3215 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3216 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3217 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3218 OffloadEntryInfoDeviceGlobalVar>( 3219 std::get<0>(E))) { 3220 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3221 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3222 CE->getFlags()); 3223 switch (Flags) { 3224 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3225 if (CGM.getLangOpts().OpenMPIsDevice && 3226 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3227 continue; 3228 if (!CE->getAddress()) { 3229 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3230 DiagnosticsEngine::Error, "Offloading entry for declare target " 3231 "variable %0 is incorrect: the " 3232 "address is invalid."); 3233 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3234 continue; 3235 } 3236 // The vaiable has no definition - no need to add the entry. 3237 if (CE->getVarSize().isZero()) 3238 continue; 3239 break; 3240 } 3241 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3242 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3243 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3244 "Declaret target link address is set."); 3245 if (CGM.getLangOpts().OpenMPIsDevice) 3246 continue; 3247 if (!CE->getAddress()) { 3248 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3249 DiagnosticsEngine::Error, 3250 "Offloading entry for declare target variable is incorrect: the " 3251 "address is invalid."); 3252 CGM.getDiags().Report(DiagID); 3253 continue; 3254 } 3255 break; 3256 } 3257 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3258 CE->getVarSize().getQuantity(), Flags, 3259 CE->getLinkage()); 3260 } else { 3261 llvm_unreachable("Unsupported entry kind."); 3262 } 3263 } 3264 } 3265 3266 /// Loads all the offload entries information from the host IR 3267 /// metadata. 3268 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3269 // If we are in target mode, load the metadata from the host IR. This code has 3270 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3271 3272 if (!CGM.getLangOpts().OpenMPIsDevice) 3273 return; 3274 3275 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3276 return; 3277 3278 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3279 if (auto EC = Buf.getError()) { 3280 CGM.getDiags().Report(diag::err_cannot_open_file) 3281 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3282 return; 3283 } 3284 3285 llvm::LLVMContext C; 3286 auto ME = expectedToErrorOrAndEmitErrors( 3287 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3288 3289 if (auto EC = ME.getError()) { 3290 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3291 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3292 CGM.getDiags().Report(DiagID) 3293 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3294 return; 3295 } 3296 3297 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3298 if (!MD) 3299 return; 3300 3301 for (llvm::MDNode *MN : MD->operands()) { 3302 auto &&GetMDInt = [MN](unsigned Idx) { 3303 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3304 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3305 }; 3306 3307 auto &&GetMDString = [MN](unsigned Idx) { 3308 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3309 return V->getString(); 3310 }; 3311 3312 switch (GetMDInt(0)) { 3313 default: 3314 llvm_unreachable("Unexpected metadata!"); 3315 break; 3316 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3317 OffloadingEntryInfoTargetRegion: 3318 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3319 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3320 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3321 /*Order=*/GetMDInt(5)); 3322 break; 3323 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3324 OffloadingEntryInfoDeviceGlobalVar: 3325 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3326 /*MangledName=*/GetMDString(1), 3327 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3328 /*Flags=*/GetMDInt(2)), 3329 /*Order=*/GetMDInt(3)); 3330 break; 3331 } 3332 } 3333 } 3334 3335 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3336 if (!KmpRoutineEntryPtrTy) { 3337 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3338 ASTContext &C = CGM.getContext(); 3339 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3340 FunctionProtoType::ExtProtoInfo EPI; 3341 KmpRoutineEntryPtrQTy = C.getPointerType( 3342 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3343 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3344 } 3345 } 3346 3347 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3348 // Make sure the type of the entry is already created. This is the type we 3349 // have to create: 3350 // struct __tgt_offload_entry{ 3351 // void *addr; // Pointer to the offload entry info. 3352 // // (function or global) 3353 // char *name; // Name of the function or global. 3354 // size_t size; // Size of the entry info (0 if it a function). 3355 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3356 // int32_t reserved; // Reserved, to use by the runtime library. 3357 // }; 3358 if (TgtOffloadEntryQTy.isNull()) { 3359 ASTContext &C = CGM.getContext(); 3360 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3361 RD->startDefinition(); 3362 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3363 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3364 addFieldToRecordDecl(C, RD, C.getSizeType()); 3365 addFieldToRecordDecl( 3366 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3367 addFieldToRecordDecl( 3368 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3369 RD->completeDefinition(); 3370 RD->addAttr(PackedAttr::CreateImplicit(C)); 3371 TgtOffloadEntryQTy = C.getRecordType(RD); 3372 } 3373 return TgtOffloadEntryQTy; 3374 } 3375 3376 namespace { 3377 struct PrivateHelpersTy { 3378 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3379 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3380 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3381 PrivateElemInit(PrivateElemInit) {} 3382 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3383 const Expr *OriginalRef = nullptr; 3384 const VarDecl *Original = nullptr; 3385 const VarDecl *PrivateCopy = nullptr; 3386 const VarDecl *PrivateElemInit = nullptr; 3387 bool isLocalPrivate() const { 3388 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3389 } 3390 }; 3391 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3392 } // anonymous namespace 3393 3394 static bool isAllocatableDecl(const VarDecl *VD) { 3395 const VarDecl *CVD = VD->getCanonicalDecl(); 3396 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3397 return false; 3398 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3399 // Use the default allocation. 3400 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3401 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3402 !AA->getAllocator()); 3403 } 3404 3405 static RecordDecl * 3406 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3407 if (!Privates.empty()) { 3408 ASTContext &C = CGM.getContext(); 3409 // Build struct .kmp_privates_t. { 3410 // /* private vars */ 3411 // }; 3412 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3413 RD->startDefinition(); 3414 for (const auto &Pair : Privates) { 3415 const VarDecl *VD = Pair.second.Original; 3416 QualType Type = VD->getType().getNonReferenceType(); 3417 // If the private variable is a local variable with lvalue ref type, 3418 // allocate the pointer instead of the pointee type. 3419 if (Pair.second.isLocalPrivate()) { 3420 if (VD->getType()->isLValueReferenceType()) 3421 Type = C.getPointerType(Type); 3422 if (isAllocatableDecl(VD)) 3423 Type = C.getPointerType(Type); 3424 } 3425 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3426 if (VD->hasAttrs()) { 3427 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3428 E(VD->getAttrs().end()); 3429 I != E; ++I) 3430 FD->addAttr(*I); 3431 } 3432 } 3433 RD->completeDefinition(); 3434 return RD; 3435 } 3436 return nullptr; 3437 } 3438 3439 static RecordDecl * 3440 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3441 QualType KmpInt32Ty, 3442 QualType KmpRoutineEntryPointerQTy) { 3443 ASTContext &C = CGM.getContext(); 3444 // Build struct kmp_task_t { 3445 // void * shareds; 3446 // kmp_routine_entry_t routine; 3447 // kmp_int32 part_id; 3448 // kmp_cmplrdata_t data1; 3449 // kmp_cmplrdata_t data2; 3450 // For taskloops additional fields: 3451 // kmp_uint64 lb; 3452 // kmp_uint64 ub; 3453 // kmp_int64 st; 3454 // kmp_int32 liter; 3455 // void * reductions; 3456 // }; 3457 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3458 UD->startDefinition(); 3459 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3460 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3461 UD->completeDefinition(); 3462 QualType KmpCmplrdataTy = C.getRecordType(UD); 3463 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3464 RD->startDefinition(); 3465 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3466 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3467 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3468 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3469 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3470 if (isOpenMPTaskLoopDirective(Kind)) { 3471 QualType KmpUInt64Ty = 3472 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3473 QualType KmpInt64Ty = 3474 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3475 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3476 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3477 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3478 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3479 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3480 } 3481 RD->completeDefinition(); 3482 return RD; 3483 } 3484 3485 static RecordDecl * 3486 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3487 ArrayRef<PrivateDataTy> Privates) { 3488 ASTContext &C = CGM.getContext(); 3489 // Build struct kmp_task_t_with_privates { 3490 // kmp_task_t task_data; 3491 // .kmp_privates_t. privates; 3492 // }; 3493 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3494 RD->startDefinition(); 3495 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3496 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3497 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3498 RD->completeDefinition(); 3499 return RD; 3500 } 3501 3502 /// Emit a proxy function which accepts kmp_task_t as the second 3503 /// argument. 3504 /// \code 3505 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3506 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3507 /// For taskloops: 3508 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3509 /// tt->reductions, tt->shareds); 3510 /// return 0; 3511 /// } 3512 /// \endcode 3513 static llvm::Function * 3514 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3515 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3516 QualType KmpTaskTWithPrivatesPtrQTy, 3517 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3518 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3519 llvm::Value *TaskPrivatesMap) { 3520 ASTContext &C = CGM.getContext(); 3521 FunctionArgList Args; 3522 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3523 ImplicitParamDecl::Other); 3524 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3525 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3526 ImplicitParamDecl::Other); 3527 Args.push_back(&GtidArg); 3528 Args.push_back(&TaskTypeArg); 3529 const auto &TaskEntryFnInfo = 3530 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3531 llvm::FunctionType *TaskEntryTy = 3532 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3533 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3534 auto *TaskEntry = llvm::Function::Create( 3535 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3536 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3537 TaskEntry->setDoesNotRecurse(); 3538 CodeGenFunction CGF(CGM); 3539 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3540 Loc, Loc); 3541 3542 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3543 // tt, 3544 // For taskloops: 3545 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3546 // tt->task_data.shareds); 3547 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3548 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3549 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3550 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3551 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3552 const auto *KmpTaskTWithPrivatesQTyRD = 3553 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3554 LValue Base = 3555 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3556 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3557 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3558 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3559 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3560 3561 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3562 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3563 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3564 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3565 CGF.ConvertTypeForMem(SharedsPtrTy)); 3566 3567 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3568 llvm::Value *PrivatesParam; 3569 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3570 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3571 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3572 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3573 } else { 3574 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3575 } 3576 3577 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3578 TaskPrivatesMap, 3579 CGF.Builder 3580 .CreatePointerBitCastOrAddrSpaceCast( 3581 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3582 .getPointer()}; 3583 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3584 std::end(CommonArgs)); 3585 if (isOpenMPTaskLoopDirective(Kind)) { 3586 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3587 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3588 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3589 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3590 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3591 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3592 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3593 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3594 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3595 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3596 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3597 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3598 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3599 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3600 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3601 CallArgs.push_back(LBParam); 3602 CallArgs.push_back(UBParam); 3603 CallArgs.push_back(StParam); 3604 CallArgs.push_back(LIParam); 3605 CallArgs.push_back(RParam); 3606 } 3607 CallArgs.push_back(SharedsParam); 3608 3609 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3610 CallArgs); 3611 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3612 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3613 CGF.FinishFunction(); 3614 return TaskEntry; 3615 } 3616 3617 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3618 SourceLocation Loc, 3619 QualType KmpInt32Ty, 3620 QualType KmpTaskTWithPrivatesPtrQTy, 3621 QualType KmpTaskTWithPrivatesQTy) { 3622 ASTContext &C = CGM.getContext(); 3623 FunctionArgList Args; 3624 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3625 ImplicitParamDecl::Other); 3626 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3627 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3628 ImplicitParamDecl::Other); 3629 Args.push_back(&GtidArg); 3630 Args.push_back(&TaskTypeArg); 3631 const auto &DestructorFnInfo = 3632 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3633 llvm::FunctionType *DestructorFnTy = 3634 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3635 std::string Name = 3636 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3637 auto *DestructorFn = 3638 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3639 Name, &CGM.getModule()); 3640 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3641 DestructorFnInfo); 3642 DestructorFn->setDoesNotRecurse(); 3643 CodeGenFunction CGF(CGM); 3644 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3645 Args, Loc, Loc); 3646 3647 LValue Base = CGF.EmitLoadOfPointerLValue( 3648 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3649 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3650 const auto *KmpTaskTWithPrivatesQTyRD = 3651 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3652 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3653 Base = CGF.EmitLValueForField(Base, *FI); 3654 for (const auto *Field : 3655 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3656 if (QualType::DestructionKind DtorKind = 3657 Field->getType().isDestructedType()) { 3658 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3659 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3660 } 3661 } 3662 CGF.FinishFunction(); 3663 return DestructorFn; 3664 } 3665 3666 /// Emit a privates mapping function for correct handling of private and 3667 /// firstprivate variables. 3668 /// \code 3669 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3670 /// **noalias priv1,..., <tyn> **noalias privn) { 3671 /// *priv1 = &.privates.priv1; 3672 /// ...; 3673 /// *privn = &.privates.privn; 3674 /// } 3675 /// \endcode 3676 static llvm::Value * 3677 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3678 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3679 ArrayRef<PrivateDataTy> Privates) { 3680 ASTContext &C = CGM.getContext(); 3681 FunctionArgList Args; 3682 ImplicitParamDecl TaskPrivatesArg( 3683 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3684 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3685 ImplicitParamDecl::Other); 3686 Args.push_back(&TaskPrivatesArg); 3687 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3688 unsigned Counter = 1; 3689 for (const Expr *E : Data.PrivateVars) { 3690 Args.push_back(ImplicitParamDecl::Create( 3691 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3692 C.getPointerType(C.getPointerType(E->getType())) 3693 .withConst() 3694 .withRestrict(), 3695 ImplicitParamDecl::Other)); 3696 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3697 PrivateVarsPos[VD] = Counter; 3698 ++Counter; 3699 } 3700 for (const Expr *E : Data.FirstprivateVars) { 3701 Args.push_back(ImplicitParamDecl::Create( 3702 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3703 C.getPointerType(C.getPointerType(E->getType())) 3704 .withConst() 3705 .withRestrict(), 3706 ImplicitParamDecl::Other)); 3707 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3708 PrivateVarsPos[VD] = Counter; 3709 ++Counter; 3710 } 3711 for (const Expr *E : Data.LastprivateVars) { 3712 Args.push_back(ImplicitParamDecl::Create( 3713 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3714 C.getPointerType(C.getPointerType(E->getType())) 3715 .withConst() 3716 .withRestrict(), 3717 ImplicitParamDecl::Other)); 3718 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3719 PrivateVarsPos[VD] = Counter; 3720 ++Counter; 3721 } 3722 for (const VarDecl *VD : Data.PrivateLocals) { 3723 QualType Ty = VD->getType().getNonReferenceType(); 3724 if (VD->getType()->isLValueReferenceType()) 3725 Ty = C.getPointerType(Ty); 3726 if (isAllocatableDecl(VD)) 3727 Ty = C.getPointerType(Ty); 3728 Args.push_back(ImplicitParamDecl::Create( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3731 ImplicitParamDecl::Other)); 3732 PrivateVarsPos[VD] = Counter; 3733 ++Counter; 3734 } 3735 const auto &TaskPrivatesMapFnInfo = 3736 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3737 llvm::FunctionType *TaskPrivatesMapTy = 3738 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3739 std::string Name = 3740 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3741 auto *TaskPrivatesMap = llvm::Function::Create( 3742 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3743 &CGM.getModule()); 3744 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3745 TaskPrivatesMapFnInfo); 3746 if (CGM.getLangOpts().Optimize) { 3747 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3748 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3749 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3750 } 3751 CodeGenFunction CGF(CGM); 3752 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3753 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3754 3755 // *privi = &.privates.privi; 3756 LValue Base = CGF.EmitLoadOfPointerLValue( 3757 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3758 TaskPrivatesArg.getType()->castAs<PointerType>()); 3759 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3760 Counter = 0; 3761 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3762 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3763 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3764 LValue RefLVal = 3765 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3766 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3767 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3768 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3769 ++Counter; 3770 } 3771 CGF.FinishFunction(); 3772 return TaskPrivatesMap; 3773 } 3774 3775 /// Emit initialization for private variables in task-based directives. 3776 static void emitPrivatesInit(CodeGenFunction &CGF, 3777 const OMPExecutableDirective &D, 3778 Address KmpTaskSharedsPtr, LValue TDBase, 3779 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3780 QualType SharedsTy, QualType SharedsPtrTy, 3781 const OMPTaskDataTy &Data, 3782 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3783 ASTContext &C = CGF.getContext(); 3784 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3785 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3786 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3787 ? OMPD_taskloop 3788 : OMPD_task; 3789 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3790 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3791 LValue SrcBase; 3792 bool IsTargetTask = 3793 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3794 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3795 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3796 // PointersArray, SizesArray, and MappersArray. The original variables for 3797 // these arrays are not captured and we get their addresses explicitly. 3798 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3799 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3800 SrcBase = CGF.MakeAddrLValue( 3801 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3802 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3803 SharedsTy); 3804 } 3805 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3806 for (const PrivateDataTy &Pair : Privates) { 3807 // Do not initialize private locals. 3808 if (Pair.second.isLocalPrivate()) { 3809 ++FI; 3810 continue; 3811 } 3812 const VarDecl *VD = Pair.second.PrivateCopy; 3813 const Expr *Init = VD->getAnyInitializer(); 3814 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3815 !CGF.isTrivialInitializer(Init)))) { 3816 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3817 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3818 const VarDecl *OriginalVD = Pair.second.Original; 3819 // Check if the variable is the target-based BasePointersArray, 3820 // PointersArray, SizesArray, or MappersArray. 3821 LValue SharedRefLValue; 3822 QualType Type = PrivateLValue.getType(); 3823 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3824 if (IsTargetTask && !SharedField) { 3825 assert(isa<ImplicitParamDecl>(OriginalVD) && 3826 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3827 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3828 ->getNumParams() == 0 && 3829 isa<TranslationUnitDecl>( 3830 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3831 ->getDeclContext()) && 3832 "Expected artificial target data variable."); 3833 SharedRefLValue = 3834 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3835 } else if (ForDup) { 3836 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3837 SharedRefLValue = CGF.MakeAddrLValue( 3838 Address(SharedRefLValue.getPointer(CGF), 3839 C.getDeclAlign(OriginalVD)), 3840 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3841 SharedRefLValue.getTBAAInfo()); 3842 } else if (CGF.LambdaCaptureFields.count( 3843 Pair.second.Original->getCanonicalDecl()) > 0 || 3844 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3845 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3846 } else { 3847 // Processing for implicitly captured variables. 3848 InlinedOpenMPRegionRAII Region( 3849 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3850 /*HasCancel=*/false); 3851 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3852 } 3853 if (Type->isArrayType()) { 3854 // Initialize firstprivate array. 3855 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3856 // Perform simple memcpy. 3857 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3858 } else { 3859 // Initialize firstprivate array using element-by-element 3860 // initialization. 3861 CGF.EmitOMPAggregateAssign( 3862 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3863 Type, 3864 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3865 Address SrcElement) { 3866 // Clean up any temporaries needed by the initialization. 3867 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3868 InitScope.addPrivate( 3869 Elem, [SrcElement]() -> Address { return SrcElement; }); 3870 (void)InitScope.Privatize(); 3871 // Emit initialization for single element. 3872 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3873 CGF, &CapturesInfo); 3874 CGF.EmitAnyExprToMem(Init, DestElement, 3875 Init->getType().getQualifiers(), 3876 /*IsInitializer=*/false); 3877 }); 3878 } 3879 } else { 3880 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3881 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3882 return SharedRefLValue.getAddress(CGF); 3883 }); 3884 (void)InitScope.Privatize(); 3885 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3886 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3887 /*capturedByInit=*/false); 3888 } 3889 } else { 3890 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3891 } 3892 } 3893 ++FI; 3894 } 3895 } 3896 3897 /// Check if duplication function is required for taskloops. 3898 static bool checkInitIsRequired(CodeGenFunction &CGF, 3899 ArrayRef<PrivateDataTy> Privates) { 3900 bool InitRequired = false; 3901 for (const PrivateDataTy &Pair : Privates) { 3902 if (Pair.second.isLocalPrivate()) 3903 continue; 3904 const VarDecl *VD = Pair.second.PrivateCopy; 3905 const Expr *Init = VD->getAnyInitializer(); 3906 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3907 !CGF.isTrivialInitializer(Init)); 3908 if (InitRequired) 3909 break; 3910 } 3911 return InitRequired; 3912 } 3913 3914 3915 /// Emit task_dup function (for initialization of 3916 /// private/firstprivate/lastprivate vars and last_iter flag) 3917 /// \code 3918 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3919 /// lastpriv) { 3920 /// // setup lastprivate flag 3921 /// task_dst->last = lastpriv; 3922 /// // could be constructor calls here... 3923 /// } 3924 /// \endcode 3925 static llvm::Value * 3926 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3927 const OMPExecutableDirective &D, 3928 QualType KmpTaskTWithPrivatesPtrQTy, 3929 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3930 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3931 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3932 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3933 ASTContext &C = CGM.getContext(); 3934 FunctionArgList Args; 3935 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3936 KmpTaskTWithPrivatesPtrQTy, 3937 ImplicitParamDecl::Other); 3938 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3939 KmpTaskTWithPrivatesPtrQTy, 3940 ImplicitParamDecl::Other); 3941 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3942 ImplicitParamDecl::Other); 3943 Args.push_back(&DstArg); 3944 Args.push_back(&SrcArg); 3945 Args.push_back(&LastprivArg); 3946 const auto &TaskDupFnInfo = 3947 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3948 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3949 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3950 auto *TaskDup = llvm::Function::Create( 3951 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3952 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3953 TaskDup->setDoesNotRecurse(); 3954 CodeGenFunction CGF(CGM); 3955 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3956 Loc); 3957 3958 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3959 CGF.GetAddrOfLocalVar(&DstArg), 3960 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3961 // task_dst->liter = lastpriv; 3962 if (WithLastIter) { 3963 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3964 LValue Base = CGF.EmitLValueForField( 3965 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3966 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3967 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3968 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3969 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3970 } 3971 3972 // Emit initial values for private copies (if any). 3973 assert(!Privates.empty()); 3974 Address KmpTaskSharedsPtr = Address::invalid(); 3975 if (!Data.FirstprivateVars.empty()) { 3976 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3977 CGF.GetAddrOfLocalVar(&SrcArg), 3978 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3979 LValue Base = CGF.EmitLValueForField( 3980 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3981 KmpTaskSharedsPtr = Address( 3982 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3983 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3984 KmpTaskTShareds)), 3985 Loc), 3986 CGM.getNaturalTypeAlignment(SharedsTy)); 3987 } 3988 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3989 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3990 CGF.FinishFunction(); 3991 return TaskDup; 3992 } 3993 3994 /// Checks if destructor function is required to be generated. 3995 /// \return true if cleanups are required, false otherwise. 3996 static bool 3997 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3998 ArrayRef<PrivateDataTy> Privates) { 3999 for (const PrivateDataTy &P : Privates) { 4000 if (P.second.isLocalPrivate()) 4001 continue; 4002 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4003 if (Ty.isDestructedType()) 4004 return true; 4005 } 4006 return false; 4007 } 4008 4009 namespace { 4010 /// Loop generator for OpenMP iterator expression. 4011 class OMPIteratorGeneratorScope final 4012 : public CodeGenFunction::OMPPrivateScope { 4013 CodeGenFunction &CGF; 4014 const OMPIteratorExpr *E = nullptr; 4015 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4016 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4017 OMPIteratorGeneratorScope() = delete; 4018 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4019 4020 public: 4021 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4022 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4023 if (!E) 4024 return; 4025 SmallVector<llvm::Value *, 4> Uppers; 4026 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4027 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4028 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4029 addPrivate(VD, [&CGF, VD]() { 4030 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4031 }); 4032 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4033 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4034 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4035 "counter.addr"); 4036 }); 4037 } 4038 Privatize(); 4039 4040 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4041 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4042 LValue CLVal = 4043 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4044 HelperData.CounterVD->getType()); 4045 // Counter = 0; 4046 CGF.EmitStoreOfScalar( 4047 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4048 CLVal); 4049 CodeGenFunction::JumpDest &ContDest = 4050 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4051 CodeGenFunction::JumpDest &ExitDest = 4052 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4053 // N = <number-of_iterations>; 4054 llvm::Value *N = Uppers[I]; 4055 // cont: 4056 // if (Counter < N) goto body; else goto exit; 4057 CGF.EmitBlock(ContDest.getBlock()); 4058 auto *CVal = 4059 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4060 llvm::Value *Cmp = 4061 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4062 ? CGF.Builder.CreateICmpSLT(CVal, N) 4063 : CGF.Builder.CreateICmpULT(CVal, N); 4064 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4065 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4066 // body: 4067 CGF.EmitBlock(BodyBB); 4068 // Iteri = Begini + Counter * Stepi; 4069 CGF.EmitIgnoredExpr(HelperData.Update); 4070 } 4071 } 4072 ~OMPIteratorGeneratorScope() { 4073 if (!E) 4074 return; 4075 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4076 // Counter = Counter + 1; 4077 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4078 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4079 // goto cont; 4080 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4081 // exit: 4082 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4083 } 4084 } 4085 }; 4086 } // namespace 4087 4088 static std::pair<llvm::Value *, llvm::Value *> 4089 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4090 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4091 llvm::Value *Addr; 4092 if (OASE) { 4093 const Expr *Base = OASE->getBase(); 4094 Addr = CGF.EmitScalarExpr(Base); 4095 } else { 4096 Addr = CGF.EmitLValue(E).getPointer(CGF); 4097 } 4098 llvm::Value *SizeVal; 4099 QualType Ty = E->getType(); 4100 if (OASE) { 4101 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4102 for (const Expr *SE : OASE->getDimensions()) { 4103 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4104 Sz = CGF.EmitScalarConversion( 4105 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4106 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4107 } 4108 } else if (const auto *ASE = 4109 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4110 LValue UpAddrLVal = 4111 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4112 llvm::Value *UpAddr = 4113 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4114 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4115 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4116 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4117 } else { 4118 SizeVal = CGF.getTypeSize(Ty); 4119 } 4120 return std::make_pair(Addr, SizeVal); 4121 } 4122 4123 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4124 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4125 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4126 if (KmpTaskAffinityInfoTy.isNull()) { 4127 RecordDecl *KmpAffinityInfoRD = 4128 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4129 KmpAffinityInfoRD->startDefinition(); 4130 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4131 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4132 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4133 KmpAffinityInfoRD->completeDefinition(); 4134 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4135 } 4136 } 4137 4138 CGOpenMPRuntime::TaskResultTy 4139 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4140 const OMPExecutableDirective &D, 4141 llvm::Function *TaskFunction, QualType SharedsTy, 4142 Address Shareds, const OMPTaskDataTy &Data) { 4143 ASTContext &C = CGM.getContext(); 4144 llvm::SmallVector<PrivateDataTy, 4> Privates; 4145 // Aggregate privates and sort them by the alignment. 4146 const auto *I = Data.PrivateCopies.begin(); 4147 for (const Expr *E : Data.PrivateVars) { 4148 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4149 Privates.emplace_back( 4150 C.getDeclAlign(VD), 4151 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4152 /*PrivateElemInit=*/nullptr)); 4153 ++I; 4154 } 4155 I = Data.FirstprivateCopies.begin(); 4156 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4157 for (const Expr *E : Data.FirstprivateVars) { 4158 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4159 Privates.emplace_back( 4160 C.getDeclAlign(VD), 4161 PrivateHelpersTy( 4162 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4163 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4164 ++I; 4165 ++IElemInitRef; 4166 } 4167 I = Data.LastprivateCopies.begin(); 4168 for (const Expr *E : Data.LastprivateVars) { 4169 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4170 Privates.emplace_back( 4171 C.getDeclAlign(VD), 4172 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4173 /*PrivateElemInit=*/nullptr)); 4174 ++I; 4175 } 4176 for (const VarDecl *VD : Data.PrivateLocals) { 4177 if (isAllocatableDecl(VD)) 4178 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4179 else 4180 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4181 } 4182 llvm::stable_sort(Privates, 4183 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4184 return L.first > R.first; 4185 }); 4186 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4187 // Build type kmp_routine_entry_t (if not built yet). 4188 emitKmpRoutineEntryT(KmpInt32Ty); 4189 // Build type kmp_task_t (if not built yet). 4190 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4191 if (SavedKmpTaskloopTQTy.isNull()) { 4192 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4193 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4194 } 4195 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4196 } else { 4197 assert((D.getDirectiveKind() == OMPD_task || 4198 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4199 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4200 "Expected taskloop, task or target directive"); 4201 if (SavedKmpTaskTQTy.isNull()) { 4202 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4203 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4204 } 4205 KmpTaskTQTy = SavedKmpTaskTQTy; 4206 } 4207 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4208 // Build particular struct kmp_task_t for the given task. 4209 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4210 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4211 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4212 QualType KmpTaskTWithPrivatesPtrQTy = 4213 C.getPointerType(KmpTaskTWithPrivatesQTy); 4214 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4215 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4216 KmpTaskTWithPrivatesTy->getPointerTo(); 4217 llvm::Value *KmpTaskTWithPrivatesTySize = 4218 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4219 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4220 4221 // Emit initial values for private copies (if any). 4222 llvm::Value *TaskPrivatesMap = nullptr; 4223 llvm::Type *TaskPrivatesMapTy = 4224 std::next(TaskFunction->arg_begin(), 3)->getType(); 4225 if (!Privates.empty()) { 4226 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4227 TaskPrivatesMap = 4228 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4229 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4230 TaskPrivatesMap, TaskPrivatesMapTy); 4231 } else { 4232 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4233 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4234 } 4235 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4236 // kmp_task_t *tt); 4237 llvm::Function *TaskEntry = emitProxyTaskFunction( 4238 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4239 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4240 TaskPrivatesMap); 4241 4242 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4243 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4244 // kmp_routine_entry_t *task_entry); 4245 // Task flags. Format is taken from 4246 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4247 // description of kmp_tasking_flags struct. 4248 enum { 4249 TiedFlag = 0x1, 4250 FinalFlag = 0x2, 4251 DestructorsFlag = 0x8, 4252 PriorityFlag = 0x20, 4253 DetachableFlag = 0x40, 4254 }; 4255 unsigned Flags = Data.Tied ? TiedFlag : 0; 4256 bool NeedsCleanup = false; 4257 if (!Privates.empty()) { 4258 NeedsCleanup = 4259 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4260 if (NeedsCleanup) 4261 Flags = Flags | DestructorsFlag; 4262 } 4263 if (Data.Priority.getInt()) 4264 Flags = Flags | PriorityFlag; 4265 if (D.hasClausesOfKind<OMPDetachClause>()) 4266 Flags = Flags | DetachableFlag; 4267 llvm::Value *TaskFlags = 4268 Data.Final.getPointer() 4269 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4270 CGF.Builder.getInt32(FinalFlag), 4271 CGF.Builder.getInt32(/*C=*/0)) 4272 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4273 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4274 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4275 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4276 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4277 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4278 TaskEntry, KmpRoutineEntryPtrTy)}; 4279 llvm::Value *NewTask; 4280 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4281 // Check if we have any device clause associated with the directive. 4282 const Expr *Device = nullptr; 4283 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4284 Device = C->getDevice(); 4285 // Emit device ID if any otherwise use default value. 4286 llvm::Value *DeviceID; 4287 if (Device) 4288 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4289 CGF.Int64Ty, /*isSigned=*/true); 4290 else 4291 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4292 AllocArgs.push_back(DeviceID); 4293 NewTask = CGF.EmitRuntimeCall( 4294 OMPBuilder.getOrCreateRuntimeFunction( 4295 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4296 AllocArgs); 4297 } else { 4298 NewTask = 4299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4300 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4301 AllocArgs); 4302 } 4303 // Emit detach clause initialization. 4304 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4305 // task_descriptor); 4306 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4307 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4308 LValue EvtLVal = CGF.EmitLValue(Evt); 4309 4310 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4311 // int gtid, kmp_task_t *task); 4312 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4313 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4314 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4315 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4316 OMPBuilder.getOrCreateRuntimeFunction( 4317 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4318 {Loc, Tid, NewTask}); 4319 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4320 Evt->getExprLoc()); 4321 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4322 } 4323 // Process affinity clauses. 4324 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4325 // Process list of affinity data. 4326 ASTContext &C = CGM.getContext(); 4327 Address AffinitiesArray = Address::invalid(); 4328 // Calculate number of elements to form the array of affinity data. 4329 llvm::Value *NumOfElements = nullptr; 4330 unsigned NumAffinities = 0; 4331 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4332 if (const Expr *Modifier = C->getModifier()) { 4333 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4334 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4335 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4336 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4337 NumOfElements = 4338 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4339 } 4340 } else { 4341 NumAffinities += C->varlist_size(); 4342 } 4343 } 4344 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4345 // Fields ids in kmp_task_affinity_info record. 4346 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4347 4348 QualType KmpTaskAffinityInfoArrayTy; 4349 if (NumOfElements) { 4350 NumOfElements = CGF.Builder.CreateNUWAdd( 4351 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4352 OpaqueValueExpr OVE( 4353 Loc, 4354 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4355 VK_RValue); 4356 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4357 RValue::get(NumOfElements)); 4358 KmpTaskAffinityInfoArrayTy = 4359 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4360 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4361 // Properly emit variable-sized array. 4362 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4363 ImplicitParamDecl::Other); 4364 CGF.EmitVarDecl(*PD); 4365 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4366 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4367 /*isSigned=*/false); 4368 } else { 4369 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4370 KmpTaskAffinityInfoTy, 4371 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4372 ArrayType::Normal, /*IndexTypeQuals=*/0); 4373 AffinitiesArray = 4374 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4375 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4376 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4377 /*isSigned=*/false); 4378 } 4379 4380 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4381 // Fill array by elements without iterators. 4382 unsigned Pos = 0; 4383 bool HasIterator = false; 4384 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4385 if (C->getModifier()) { 4386 HasIterator = true; 4387 continue; 4388 } 4389 for (const Expr *E : C->varlists()) { 4390 llvm::Value *Addr; 4391 llvm::Value *Size; 4392 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4393 LValue Base = 4394 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4395 KmpTaskAffinityInfoTy); 4396 // affs[i].base_addr = &<Affinities[i].second>; 4397 LValue BaseAddrLVal = CGF.EmitLValueForField( 4398 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4399 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4400 BaseAddrLVal); 4401 // affs[i].len = sizeof(<Affinities[i].second>); 4402 LValue LenLVal = CGF.EmitLValueForField( 4403 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4404 CGF.EmitStoreOfScalar(Size, LenLVal); 4405 ++Pos; 4406 } 4407 } 4408 LValue PosLVal; 4409 if (HasIterator) { 4410 PosLVal = CGF.MakeAddrLValue( 4411 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4412 C.getSizeType()); 4413 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4414 } 4415 // Process elements with iterators. 4416 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4417 const Expr *Modifier = C->getModifier(); 4418 if (!Modifier) 4419 continue; 4420 OMPIteratorGeneratorScope IteratorScope( 4421 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4422 for (const Expr *E : C->varlists()) { 4423 llvm::Value *Addr; 4424 llvm::Value *Size; 4425 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4426 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4427 LValue Base = CGF.MakeAddrLValue( 4428 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4429 AffinitiesArray.getAlignment()), 4430 KmpTaskAffinityInfoTy); 4431 // affs[i].base_addr = &<Affinities[i].second>; 4432 LValue BaseAddrLVal = CGF.EmitLValueForField( 4433 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4434 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4435 BaseAddrLVal); 4436 // affs[i].len = sizeof(<Affinities[i].second>); 4437 LValue LenLVal = CGF.EmitLValueForField( 4438 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4439 CGF.EmitStoreOfScalar(Size, LenLVal); 4440 Idx = CGF.Builder.CreateNUWAdd( 4441 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4442 CGF.EmitStoreOfScalar(Idx, PosLVal); 4443 } 4444 } 4445 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4446 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4447 // naffins, kmp_task_affinity_info_t *affin_list); 4448 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4449 llvm::Value *GTid = getThreadID(CGF, Loc); 4450 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4451 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4452 // FIXME: Emit the function and ignore its result for now unless the 4453 // runtime function is properly implemented. 4454 (void)CGF.EmitRuntimeCall( 4455 OMPBuilder.getOrCreateRuntimeFunction( 4456 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4457 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4458 } 4459 llvm::Value *NewTaskNewTaskTTy = 4460 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4461 NewTask, KmpTaskTWithPrivatesPtrTy); 4462 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4463 KmpTaskTWithPrivatesQTy); 4464 LValue TDBase = 4465 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4466 // Fill the data in the resulting kmp_task_t record. 4467 // Copy shareds if there are any. 4468 Address KmpTaskSharedsPtr = Address::invalid(); 4469 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4470 KmpTaskSharedsPtr = 4471 Address(CGF.EmitLoadOfScalar( 4472 CGF.EmitLValueForField( 4473 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4474 KmpTaskTShareds)), 4475 Loc), 4476 CGM.getNaturalTypeAlignment(SharedsTy)); 4477 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4478 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4479 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4480 } 4481 // Emit initial values for private copies (if any). 4482 TaskResultTy Result; 4483 if (!Privates.empty()) { 4484 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4485 SharedsTy, SharedsPtrTy, Data, Privates, 4486 /*ForDup=*/false); 4487 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4488 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4489 Result.TaskDupFn = emitTaskDupFunction( 4490 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4491 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4492 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4493 } 4494 } 4495 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4496 enum { Priority = 0, Destructors = 1 }; 4497 // Provide pointer to function with destructors for privates. 4498 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4499 const RecordDecl *KmpCmplrdataUD = 4500 (*FI)->getType()->getAsUnionType()->getDecl(); 4501 if (NeedsCleanup) { 4502 llvm::Value *DestructorFn = emitDestructorsFunction( 4503 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4504 KmpTaskTWithPrivatesQTy); 4505 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4506 LValue DestructorsLV = CGF.EmitLValueForField( 4507 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4508 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4509 DestructorFn, KmpRoutineEntryPtrTy), 4510 DestructorsLV); 4511 } 4512 // Set priority. 4513 if (Data.Priority.getInt()) { 4514 LValue Data2LV = CGF.EmitLValueForField( 4515 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4516 LValue PriorityLV = CGF.EmitLValueForField( 4517 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4518 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4519 } 4520 Result.NewTask = NewTask; 4521 Result.TaskEntry = TaskEntry; 4522 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4523 Result.TDBase = TDBase; 4524 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4525 return Result; 4526 } 4527 4528 namespace { 4529 /// Dependence kind for RTL. 4530 enum RTLDependenceKindTy { 4531 DepIn = 0x01, 4532 DepInOut = 0x3, 4533 DepMutexInOutSet = 0x4 4534 }; 4535 /// Fields ids in kmp_depend_info record. 4536 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4537 } // namespace 4538 4539 /// Translates internal dependency kind into the runtime kind. 4540 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4541 RTLDependenceKindTy DepKind; 4542 switch (K) { 4543 case OMPC_DEPEND_in: 4544 DepKind = DepIn; 4545 break; 4546 // Out and InOut dependencies must use the same code. 4547 case OMPC_DEPEND_out: 4548 case OMPC_DEPEND_inout: 4549 DepKind = DepInOut; 4550 break; 4551 case OMPC_DEPEND_mutexinoutset: 4552 DepKind = DepMutexInOutSet; 4553 break; 4554 case OMPC_DEPEND_source: 4555 case OMPC_DEPEND_sink: 4556 case OMPC_DEPEND_depobj: 4557 case OMPC_DEPEND_unknown: 4558 llvm_unreachable("Unknown task dependence type"); 4559 } 4560 return DepKind; 4561 } 4562 4563 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4564 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4565 QualType &FlagsTy) { 4566 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4567 if (KmpDependInfoTy.isNull()) { 4568 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4569 KmpDependInfoRD->startDefinition(); 4570 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4571 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4572 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4573 KmpDependInfoRD->completeDefinition(); 4574 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4575 } 4576 } 4577 4578 std::pair<llvm::Value *, LValue> 4579 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4580 SourceLocation Loc) { 4581 ASTContext &C = CGM.getContext(); 4582 QualType FlagsTy; 4583 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4584 RecordDecl *KmpDependInfoRD = 4585 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4586 LValue Base = CGF.EmitLoadOfPointerLValue( 4587 DepobjLVal.getAddress(CGF), 4588 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4589 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4590 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4591 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4592 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4593 Base.getTBAAInfo()); 4594 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4595 Addr.getPointer(), 4596 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4597 LValue NumDepsBase = CGF.MakeAddrLValue( 4598 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4599 Base.getBaseInfo(), Base.getTBAAInfo()); 4600 // NumDeps = deps[i].base_addr; 4601 LValue BaseAddrLVal = CGF.EmitLValueForField( 4602 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4603 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4604 return std::make_pair(NumDeps, Base); 4605 } 4606 4607 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4608 llvm::PointerUnion<unsigned *, LValue *> Pos, 4609 const OMPTaskDataTy::DependData &Data, 4610 Address DependenciesArray) { 4611 CodeGenModule &CGM = CGF.CGM; 4612 ASTContext &C = CGM.getContext(); 4613 QualType FlagsTy; 4614 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4615 RecordDecl *KmpDependInfoRD = 4616 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4617 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4618 4619 OMPIteratorGeneratorScope IteratorScope( 4620 CGF, cast_or_null<OMPIteratorExpr>( 4621 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4622 : nullptr)); 4623 for (const Expr *E : Data.DepExprs) { 4624 llvm::Value *Addr; 4625 llvm::Value *Size; 4626 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4627 LValue Base; 4628 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4629 Base = CGF.MakeAddrLValue( 4630 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4631 } else { 4632 LValue &PosLVal = *Pos.get<LValue *>(); 4633 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4634 Base = CGF.MakeAddrLValue( 4635 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4636 DependenciesArray.getAlignment()), 4637 KmpDependInfoTy); 4638 } 4639 // deps[i].base_addr = &<Dependencies[i].second>; 4640 LValue BaseAddrLVal = CGF.EmitLValueForField( 4641 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4642 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4643 BaseAddrLVal); 4644 // deps[i].len = sizeof(<Dependencies[i].second>); 4645 LValue LenLVal = CGF.EmitLValueForField( 4646 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4647 CGF.EmitStoreOfScalar(Size, LenLVal); 4648 // deps[i].flags = <Dependencies[i].first>; 4649 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4650 LValue FlagsLVal = CGF.EmitLValueForField( 4651 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4652 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4653 FlagsLVal); 4654 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4655 ++(*P); 4656 } else { 4657 LValue &PosLVal = *Pos.get<LValue *>(); 4658 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4659 Idx = CGF.Builder.CreateNUWAdd(Idx, 4660 llvm::ConstantInt::get(Idx->getType(), 1)); 4661 CGF.EmitStoreOfScalar(Idx, PosLVal); 4662 } 4663 } 4664 } 4665 4666 static SmallVector<llvm::Value *, 4> 4667 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4668 const OMPTaskDataTy::DependData &Data) { 4669 assert(Data.DepKind == OMPC_DEPEND_depobj && 4670 "Expected depobj dependecy kind."); 4671 SmallVector<llvm::Value *, 4> Sizes; 4672 SmallVector<LValue, 4> SizeLVals; 4673 ASTContext &C = CGF.getContext(); 4674 QualType FlagsTy; 4675 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4676 RecordDecl *KmpDependInfoRD = 4677 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4678 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4679 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4680 { 4681 OMPIteratorGeneratorScope IteratorScope( 4682 CGF, cast_or_null<OMPIteratorExpr>( 4683 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4684 : nullptr)); 4685 for (const Expr *E : Data.DepExprs) { 4686 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4687 LValue Base = CGF.EmitLoadOfPointerLValue( 4688 DepobjLVal.getAddress(CGF), 4689 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4690 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4691 Base.getAddress(CGF), KmpDependInfoPtrT); 4692 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4693 Base.getTBAAInfo()); 4694 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4695 Addr.getPointer(), 4696 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4697 LValue NumDepsBase = CGF.MakeAddrLValue( 4698 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4699 Base.getBaseInfo(), Base.getTBAAInfo()); 4700 // NumDeps = deps[i].base_addr; 4701 LValue BaseAddrLVal = CGF.EmitLValueForField( 4702 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4703 llvm::Value *NumDeps = 4704 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4705 LValue NumLVal = CGF.MakeAddrLValue( 4706 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4707 C.getUIntPtrType()); 4708 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4709 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4710 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4711 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4712 CGF.EmitStoreOfScalar(Add, NumLVal); 4713 SizeLVals.push_back(NumLVal); 4714 } 4715 } 4716 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4717 llvm::Value *Size = 4718 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4719 Sizes.push_back(Size); 4720 } 4721 return Sizes; 4722 } 4723 4724 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4725 LValue PosLVal, 4726 const OMPTaskDataTy::DependData &Data, 4727 Address DependenciesArray) { 4728 assert(Data.DepKind == OMPC_DEPEND_depobj && 4729 "Expected depobj dependecy kind."); 4730 ASTContext &C = CGF.getContext(); 4731 QualType FlagsTy; 4732 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4733 RecordDecl *KmpDependInfoRD = 4734 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4735 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4736 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4737 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4738 { 4739 OMPIteratorGeneratorScope IteratorScope( 4740 CGF, cast_or_null<OMPIteratorExpr>( 4741 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4742 : nullptr)); 4743 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4744 const Expr *E = Data.DepExprs[I]; 4745 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4746 LValue Base = CGF.EmitLoadOfPointerLValue( 4747 DepobjLVal.getAddress(CGF), 4748 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4749 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4750 Base.getAddress(CGF), KmpDependInfoPtrT); 4751 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4752 Base.getTBAAInfo()); 4753 4754 // Get number of elements in a single depobj. 4755 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4756 Addr.getPointer(), 4757 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4758 LValue NumDepsBase = CGF.MakeAddrLValue( 4759 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4760 Base.getBaseInfo(), Base.getTBAAInfo()); 4761 // NumDeps = deps[i].base_addr; 4762 LValue BaseAddrLVal = CGF.EmitLValueForField( 4763 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4764 llvm::Value *NumDeps = 4765 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4766 4767 // memcopy dependency data. 4768 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4769 ElSize, 4770 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4771 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4772 Address DepAddr = 4773 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4774 DependenciesArray.getAlignment()); 4775 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4776 4777 // Increase pos. 4778 // pos += size; 4779 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4780 CGF.EmitStoreOfScalar(Add, PosLVal); 4781 } 4782 } 4783 } 4784 4785 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4786 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4787 SourceLocation Loc) { 4788 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4789 return D.DepExprs.empty(); 4790 })) 4791 return std::make_pair(nullptr, Address::invalid()); 4792 // Process list of dependencies. 4793 ASTContext &C = CGM.getContext(); 4794 Address DependenciesArray = Address::invalid(); 4795 llvm::Value *NumOfElements = nullptr; 4796 unsigned NumDependencies = std::accumulate( 4797 Dependencies.begin(), Dependencies.end(), 0, 4798 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4799 return D.DepKind == OMPC_DEPEND_depobj 4800 ? V 4801 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4802 }); 4803 QualType FlagsTy; 4804 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4805 bool HasDepobjDeps = false; 4806 bool HasRegularWithIterators = false; 4807 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4808 llvm::Value *NumOfRegularWithIterators = 4809 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4810 // Calculate number of depobj dependecies and regular deps with the iterators. 4811 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4812 if (D.DepKind == OMPC_DEPEND_depobj) { 4813 SmallVector<llvm::Value *, 4> Sizes = 4814 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4815 for (llvm::Value *Size : Sizes) { 4816 NumOfDepobjElements = 4817 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4818 } 4819 HasDepobjDeps = true; 4820 continue; 4821 } 4822 // Include number of iterations, if any. 4823 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4824 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4825 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4826 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4827 NumOfRegularWithIterators = 4828 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4829 } 4830 HasRegularWithIterators = true; 4831 continue; 4832 } 4833 } 4834 4835 QualType KmpDependInfoArrayTy; 4836 if (HasDepobjDeps || HasRegularWithIterators) { 4837 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4838 /*isSigned=*/false); 4839 if (HasDepobjDeps) { 4840 NumOfElements = 4841 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4842 } 4843 if (HasRegularWithIterators) { 4844 NumOfElements = 4845 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4846 } 4847 OpaqueValueExpr OVE(Loc, 4848 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4849 VK_RValue); 4850 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4851 RValue::get(NumOfElements)); 4852 KmpDependInfoArrayTy = 4853 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4854 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4855 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4856 // Properly emit variable-sized array. 4857 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4858 ImplicitParamDecl::Other); 4859 CGF.EmitVarDecl(*PD); 4860 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4861 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4862 /*isSigned=*/false); 4863 } else { 4864 KmpDependInfoArrayTy = C.getConstantArrayType( 4865 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4866 ArrayType::Normal, /*IndexTypeQuals=*/0); 4867 DependenciesArray = 4868 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4869 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4870 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4871 /*isSigned=*/false); 4872 } 4873 unsigned Pos = 0; 4874 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4875 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4876 Dependencies[I].IteratorExpr) 4877 continue; 4878 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4879 DependenciesArray); 4880 } 4881 // Copy regular dependecies with iterators. 4882 LValue PosLVal = CGF.MakeAddrLValue( 4883 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4884 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4885 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4886 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4887 !Dependencies[I].IteratorExpr) 4888 continue; 4889 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4890 DependenciesArray); 4891 } 4892 // Copy final depobj arrays without iterators. 4893 if (HasDepobjDeps) { 4894 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4895 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4896 continue; 4897 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4898 DependenciesArray); 4899 } 4900 } 4901 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4902 DependenciesArray, CGF.VoidPtrTy); 4903 return std::make_pair(NumOfElements, DependenciesArray); 4904 } 4905 4906 Address CGOpenMPRuntime::emitDepobjDependClause( 4907 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4908 SourceLocation Loc) { 4909 if (Dependencies.DepExprs.empty()) 4910 return Address::invalid(); 4911 // Process list of dependencies. 4912 ASTContext &C = CGM.getContext(); 4913 Address DependenciesArray = Address::invalid(); 4914 unsigned NumDependencies = Dependencies.DepExprs.size(); 4915 QualType FlagsTy; 4916 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4917 RecordDecl *KmpDependInfoRD = 4918 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4919 4920 llvm::Value *Size; 4921 // Define type kmp_depend_info[<Dependencies.size()>]; 4922 // For depobj reserve one extra element to store the number of elements. 4923 // It is required to handle depobj(x) update(in) construct. 4924 // kmp_depend_info[<Dependencies.size()>] deps; 4925 llvm::Value *NumDepsVal; 4926 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4927 if (const auto *IE = 4928 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4929 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4930 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4931 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4932 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4933 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4934 } 4935 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4936 NumDepsVal); 4937 CharUnits SizeInBytes = 4938 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4939 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4940 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4941 NumDepsVal = 4942 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4943 } else { 4944 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4945 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4946 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4947 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4948 Size = CGM.getSize(Sz.alignTo(Align)); 4949 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4950 } 4951 // Need to allocate on the dynamic memory. 4952 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4953 // Use default allocator. 4954 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4955 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4956 4957 llvm::Value *Addr = 4958 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4959 CGM.getModule(), OMPRTL___kmpc_alloc), 4960 Args, ".dep.arr.addr"); 4961 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4962 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4963 DependenciesArray = Address(Addr, Align); 4964 // Write number of elements in the first element of array for depobj. 4965 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4966 // deps[i].base_addr = NumDependencies; 4967 LValue BaseAddrLVal = CGF.EmitLValueForField( 4968 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4969 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4970 llvm::PointerUnion<unsigned *, LValue *> Pos; 4971 unsigned Idx = 1; 4972 LValue PosLVal; 4973 if (Dependencies.IteratorExpr) { 4974 PosLVal = CGF.MakeAddrLValue( 4975 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4976 C.getSizeType()); 4977 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4978 /*IsInit=*/true); 4979 Pos = &PosLVal; 4980 } else { 4981 Pos = &Idx; 4982 } 4983 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4984 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4985 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4986 return DependenciesArray; 4987 } 4988 4989 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4990 SourceLocation Loc) { 4991 ASTContext &C = CGM.getContext(); 4992 QualType FlagsTy; 4993 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4994 LValue Base = CGF.EmitLoadOfPointerLValue( 4995 DepobjLVal.getAddress(CGF), 4996 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4997 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4998 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4999 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5000 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5001 Addr.getPointer(), 5002 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5003 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5004 CGF.VoidPtrTy); 5005 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5006 // Use default allocator. 5007 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5008 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5009 5010 // _kmpc_free(gtid, addr, nullptr); 5011 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5012 CGM.getModule(), OMPRTL___kmpc_free), 5013 Args); 5014 } 5015 5016 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5017 OpenMPDependClauseKind NewDepKind, 5018 SourceLocation Loc) { 5019 ASTContext &C = CGM.getContext(); 5020 QualType FlagsTy; 5021 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5022 RecordDecl *KmpDependInfoRD = 5023 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5024 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5025 llvm::Value *NumDeps; 5026 LValue Base; 5027 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5028 5029 Address Begin = Base.getAddress(CGF); 5030 // Cast from pointer to array type to pointer to single element. 5031 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5032 // The basic structure here is a while-do loop. 5033 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5034 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5035 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5036 CGF.EmitBlock(BodyBB); 5037 llvm::PHINode *ElementPHI = 5038 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5039 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5040 Begin = Address(ElementPHI, Begin.getAlignment()); 5041 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5042 Base.getTBAAInfo()); 5043 // deps[i].flags = NewDepKind; 5044 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5045 LValue FlagsLVal = CGF.EmitLValueForField( 5046 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5047 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5048 FlagsLVal); 5049 5050 // Shift the address forward by one element. 5051 Address ElementNext = 5052 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5053 ElementPHI->addIncoming(ElementNext.getPointer(), 5054 CGF.Builder.GetInsertBlock()); 5055 llvm::Value *IsEmpty = 5056 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5057 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5058 // Done. 5059 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5060 } 5061 5062 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5063 const OMPExecutableDirective &D, 5064 llvm::Function *TaskFunction, 5065 QualType SharedsTy, Address Shareds, 5066 const Expr *IfCond, 5067 const OMPTaskDataTy &Data) { 5068 if (!CGF.HaveInsertPoint()) 5069 return; 5070 5071 TaskResultTy Result = 5072 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5073 llvm::Value *NewTask = Result.NewTask; 5074 llvm::Function *TaskEntry = Result.TaskEntry; 5075 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5076 LValue TDBase = Result.TDBase; 5077 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5078 // Process list of dependences. 5079 Address DependenciesArray = Address::invalid(); 5080 llvm::Value *NumOfElements; 5081 std::tie(NumOfElements, DependenciesArray) = 5082 emitDependClause(CGF, Data.Dependences, Loc); 5083 5084 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5085 // libcall. 5086 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5087 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5088 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5089 // list is not empty 5090 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5091 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5092 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5093 llvm::Value *DepTaskArgs[7]; 5094 if (!Data.Dependences.empty()) { 5095 DepTaskArgs[0] = UpLoc; 5096 DepTaskArgs[1] = ThreadID; 5097 DepTaskArgs[2] = NewTask; 5098 DepTaskArgs[3] = NumOfElements; 5099 DepTaskArgs[4] = DependenciesArray.getPointer(); 5100 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5101 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5102 } 5103 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5104 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5105 if (!Data.Tied) { 5106 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5107 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5108 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5109 } 5110 if (!Data.Dependences.empty()) { 5111 CGF.EmitRuntimeCall( 5112 OMPBuilder.getOrCreateRuntimeFunction( 5113 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5114 DepTaskArgs); 5115 } else { 5116 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5117 CGM.getModule(), OMPRTL___kmpc_omp_task), 5118 TaskArgs); 5119 } 5120 // Check if parent region is untied and build return for untied task; 5121 if (auto *Region = 5122 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5123 Region->emitUntiedSwitch(CGF); 5124 }; 5125 5126 llvm::Value *DepWaitTaskArgs[6]; 5127 if (!Data.Dependences.empty()) { 5128 DepWaitTaskArgs[0] = UpLoc; 5129 DepWaitTaskArgs[1] = ThreadID; 5130 DepWaitTaskArgs[2] = NumOfElements; 5131 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5132 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5133 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5134 } 5135 auto &M = CGM.getModule(); 5136 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5137 TaskEntry, &Data, &DepWaitTaskArgs, 5138 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5139 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5140 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5141 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5142 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5143 // is specified. 5144 if (!Data.Dependences.empty()) 5145 CGF.EmitRuntimeCall( 5146 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5147 DepWaitTaskArgs); 5148 // Call proxy_task_entry(gtid, new_task); 5149 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5150 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5151 Action.Enter(CGF); 5152 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5153 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5154 OutlinedFnArgs); 5155 }; 5156 5157 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5158 // kmp_task_t *new_task); 5159 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5160 // kmp_task_t *new_task); 5161 RegionCodeGenTy RCG(CodeGen); 5162 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5163 M, OMPRTL___kmpc_omp_task_begin_if0), 5164 TaskArgs, 5165 OMPBuilder.getOrCreateRuntimeFunction( 5166 M, OMPRTL___kmpc_omp_task_complete_if0), 5167 TaskArgs); 5168 RCG.setAction(Action); 5169 RCG(CGF); 5170 }; 5171 5172 if (IfCond) { 5173 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5174 } else { 5175 RegionCodeGenTy ThenRCG(ThenCodeGen); 5176 ThenRCG(CGF); 5177 } 5178 } 5179 5180 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5181 const OMPLoopDirective &D, 5182 llvm::Function *TaskFunction, 5183 QualType SharedsTy, Address Shareds, 5184 const Expr *IfCond, 5185 const OMPTaskDataTy &Data) { 5186 if (!CGF.HaveInsertPoint()) 5187 return; 5188 TaskResultTy Result = 5189 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5190 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5191 // libcall. 5192 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5193 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5194 // sched, kmp_uint64 grainsize, void *task_dup); 5195 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5196 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5197 llvm::Value *IfVal; 5198 if (IfCond) { 5199 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5200 /*isSigned=*/true); 5201 } else { 5202 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5203 } 5204 5205 LValue LBLVal = CGF.EmitLValueForField( 5206 Result.TDBase, 5207 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5208 const auto *LBVar = 5209 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5210 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5211 LBLVal.getQuals(), 5212 /*IsInitializer=*/true); 5213 LValue UBLVal = CGF.EmitLValueForField( 5214 Result.TDBase, 5215 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5216 const auto *UBVar = 5217 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5218 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5219 UBLVal.getQuals(), 5220 /*IsInitializer=*/true); 5221 LValue StLVal = CGF.EmitLValueForField( 5222 Result.TDBase, 5223 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5224 const auto *StVar = 5225 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5226 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5227 StLVal.getQuals(), 5228 /*IsInitializer=*/true); 5229 // Store reductions address. 5230 LValue RedLVal = CGF.EmitLValueForField( 5231 Result.TDBase, 5232 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5233 if (Data.Reductions) { 5234 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5235 } else { 5236 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5237 CGF.getContext().VoidPtrTy); 5238 } 5239 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5240 llvm::Value *TaskArgs[] = { 5241 UpLoc, 5242 ThreadID, 5243 Result.NewTask, 5244 IfVal, 5245 LBLVal.getPointer(CGF), 5246 UBLVal.getPointer(CGF), 5247 CGF.EmitLoadOfScalar(StLVal, Loc), 5248 llvm::ConstantInt::getSigned( 5249 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5250 llvm::ConstantInt::getSigned( 5251 CGF.IntTy, Data.Schedule.getPointer() 5252 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5253 : NoSchedule), 5254 Data.Schedule.getPointer() 5255 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5256 /*isSigned=*/false) 5257 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5258 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5259 Result.TaskDupFn, CGF.VoidPtrTy) 5260 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5261 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5262 CGM.getModule(), OMPRTL___kmpc_taskloop), 5263 TaskArgs); 5264 } 5265 5266 /// Emit reduction operation for each element of array (required for 5267 /// array sections) LHS op = RHS. 5268 /// \param Type Type of array. 5269 /// \param LHSVar Variable on the left side of the reduction operation 5270 /// (references element of array in original variable). 5271 /// \param RHSVar Variable on the right side of the reduction operation 5272 /// (references element of array in original variable). 5273 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5274 /// RHSVar. 5275 static void EmitOMPAggregateReduction( 5276 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5277 const VarDecl *RHSVar, 5278 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5279 const Expr *, const Expr *)> &RedOpGen, 5280 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5281 const Expr *UpExpr = nullptr) { 5282 // Perform element-by-element initialization. 5283 QualType ElementTy; 5284 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5285 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5286 5287 // Drill down to the base element type on both arrays. 5288 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5289 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5290 5291 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5292 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5293 // Cast from pointer to array type to pointer to single element. 5294 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5295 // The basic structure here is a while-do loop. 5296 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5297 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5298 llvm::Value *IsEmpty = 5299 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5300 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5301 5302 // Enter the loop body, making that address the current address. 5303 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5304 CGF.EmitBlock(BodyBB); 5305 5306 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5307 5308 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5309 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5310 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5311 Address RHSElementCurrent = 5312 Address(RHSElementPHI, 5313 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5314 5315 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5316 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5317 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5318 Address LHSElementCurrent = 5319 Address(LHSElementPHI, 5320 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5321 5322 // Emit copy. 5323 CodeGenFunction::OMPPrivateScope Scope(CGF); 5324 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5325 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5326 Scope.Privatize(); 5327 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5328 Scope.ForceCleanup(); 5329 5330 // Shift the address forward by one element. 5331 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5332 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5333 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5334 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5335 // Check whether we've reached the end. 5336 llvm::Value *Done = 5337 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5338 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5339 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5340 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5341 5342 // Done. 5343 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5344 } 5345 5346 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5347 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5348 /// UDR combiner function. 5349 static void emitReductionCombiner(CodeGenFunction &CGF, 5350 const Expr *ReductionOp) { 5351 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5352 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5353 if (const auto *DRE = 5354 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5355 if (const auto *DRD = 5356 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5357 std::pair<llvm::Function *, llvm::Function *> Reduction = 5358 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5359 RValue Func = RValue::get(Reduction.first); 5360 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5361 CGF.EmitIgnoredExpr(ReductionOp); 5362 return; 5363 } 5364 CGF.EmitIgnoredExpr(ReductionOp); 5365 } 5366 5367 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5368 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5369 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5370 ArrayRef<const Expr *> ReductionOps) { 5371 ASTContext &C = CGM.getContext(); 5372 5373 // void reduction_func(void *LHSArg, void *RHSArg); 5374 FunctionArgList Args; 5375 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5376 ImplicitParamDecl::Other); 5377 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5378 ImplicitParamDecl::Other); 5379 Args.push_back(&LHSArg); 5380 Args.push_back(&RHSArg); 5381 const auto &CGFI = 5382 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5383 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5384 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5385 llvm::GlobalValue::InternalLinkage, Name, 5386 &CGM.getModule()); 5387 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5388 Fn->setDoesNotRecurse(); 5389 CodeGenFunction CGF(CGM); 5390 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5391 5392 // Dst = (void*[n])(LHSArg); 5393 // Src = (void*[n])(RHSArg); 5394 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5395 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5396 ArgsType), CGF.getPointerAlign()); 5397 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5398 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5399 ArgsType), CGF.getPointerAlign()); 5400 5401 // ... 5402 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5403 // ... 5404 CodeGenFunction::OMPPrivateScope Scope(CGF); 5405 auto IPriv = Privates.begin(); 5406 unsigned Idx = 0; 5407 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5408 const auto *RHSVar = 5409 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5410 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5411 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5412 }); 5413 const auto *LHSVar = 5414 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5415 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5416 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5417 }); 5418 QualType PrivTy = (*IPriv)->getType(); 5419 if (PrivTy->isVariablyModifiedType()) { 5420 // Get array size and emit VLA type. 5421 ++Idx; 5422 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5423 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5424 const VariableArrayType *VLA = 5425 CGF.getContext().getAsVariableArrayType(PrivTy); 5426 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5427 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5428 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5429 CGF.EmitVariablyModifiedType(PrivTy); 5430 } 5431 } 5432 Scope.Privatize(); 5433 IPriv = Privates.begin(); 5434 auto ILHS = LHSExprs.begin(); 5435 auto IRHS = RHSExprs.begin(); 5436 for (const Expr *E : ReductionOps) { 5437 if ((*IPriv)->getType()->isArrayType()) { 5438 // Emit reduction for array section. 5439 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5440 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5441 EmitOMPAggregateReduction( 5442 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5443 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5444 emitReductionCombiner(CGF, E); 5445 }); 5446 } else { 5447 // Emit reduction for array subscript or single variable. 5448 emitReductionCombiner(CGF, E); 5449 } 5450 ++IPriv; 5451 ++ILHS; 5452 ++IRHS; 5453 } 5454 Scope.ForceCleanup(); 5455 CGF.FinishFunction(); 5456 return Fn; 5457 } 5458 5459 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5460 const Expr *ReductionOp, 5461 const Expr *PrivateRef, 5462 const DeclRefExpr *LHS, 5463 const DeclRefExpr *RHS) { 5464 if (PrivateRef->getType()->isArrayType()) { 5465 // Emit reduction for array section. 5466 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5467 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5468 EmitOMPAggregateReduction( 5469 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5470 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5471 emitReductionCombiner(CGF, ReductionOp); 5472 }); 5473 } else { 5474 // Emit reduction for array subscript or single variable. 5475 emitReductionCombiner(CGF, ReductionOp); 5476 } 5477 } 5478 5479 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5480 ArrayRef<const Expr *> Privates, 5481 ArrayRef<const Expr *> LHSExprs, 5482 ArrayRef<const Expr *> RHSExprs, 5483 ArrayRef<const Expr *> ReductionOps, 5484 ReductionOptionsTy Options) { 5485 if (!CGF.HaveInsertPoint()) 5486 return; 5487 5488 bool WithNowait = Options.WithNowait; 5489 bool SimpleReduction = Options.SimpleReduction; 5490 5491 // Next code should be emitted for reduction: 5492 // 5493 // static kmp_critical_name lock = { 0 }; 5494 // 5495 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5496 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5497 // ... 5498 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5499 // *(Type<n>-1*)rhs[<n>-1]); 5500 // } 5501 // 5502 // ... 5503 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5504 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5505 // RedList, reduce_func, &<lock>)) { 5506 // case 1: 5507 // ... 5508 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5509 // ... 5510 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5511 // break; 5512 // case 2: 5513 // ... 5514 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5515 // ... 5516 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5517 // break; 5518 // default:; 5519 // } 5520 // 5521 // if SimpleReduction is true, only the next code is generated: 5522 // ... 5523 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5524 // ... 5525 5526 ASTContext &C = CGM.getContext(); 5527 5528 if (SimpleReduction) { 5529 CodeGenFunction::RunCleanupsScope Scope(CGF); 5530 auto IPriv = Privates.begin(); 5531 auto ILHS = LHSExprs.begin(); 5532 auto IRHS = RHSExprs.begin(); 5533 for (const Expr *E : ReductionOps) { 5534 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5535 cast<DeclRefExpr>(*IRHS)); 5536 ++IPriv; 5537 ++ILHS; 5538 ++IRHS; 5539 } 5540 return; 5541 } 5542 5543 // 1. Build a list of reduction variables. 5544 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5545 auto Size = RHSExprs.size(); 5546 for (const Expr *E : Privates) { 5547 if (E->getType()->isVariablyModifiedType()) 5548 // Reserve place for array size. 5549 ++Size; 5550 } 5551 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5552 QualType ReductionArrayTy = 5553 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5554 /*IndexTypeQuals=*/0); 5555 Address ReductionList = 5556 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5557 auto IPriv = Privates.begin(); 5558 unsigned Idx = 0; 5559 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5560 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5561 CGF.Builder.CreateStore( 5562 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5563 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5564 Elem); 5565 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5566 // Store array size. 5567 ++Idx; 5568 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5569 llvm::Value *Size = CGF.Builder.CreateIntCast( 5570 CGF.getVLASize( 5571 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5572 .NumElts, 5573 CGF.SizeTy, /*isSigned=*/false); 5574 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5575 Elem); 5576 } 5577 } 5578 5579 // 2. Emit reduce_func(). 5580 llvm::Function *ReductionFn = emitReductionFunction( 5581 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5582 LHSExprs, RHSExprs, ReductionOps); 5583 5584 // 3. Create static kmp_critical_name lock = { 0 }; 5585 std::string Name = getName({"reduction"}); 5586 llvm::Value *Lock = getCriticalRegionLock(Name); 5587 5588 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5589 // RedList, reduce_func, &<lock>); 5590 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5591 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5592 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5593 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5594 ReductionList.getPointer(), CGF.VoidPtrTy); 5595 llvm::Value *Args[] = { 5596 IdentTLoc, // ident_t *<loc> 5597 ThreadId, // i32 <gtid> 5598 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5599 ReductionArrayTySize, // size_type sizeof(RedList) 5600 RL, // void *RedList 5601 ReductionFn, // void (*) (void *, void *) <reduce_func> 5602 Lock // kmp_critical_name *&<lock> 5603 }; 5604 llvm::Value *Res = CGF.EmitRuntimeCall( 5605 OMPBuilder.getOrCreateRuntimeFunction( 5606 CGM.getModule(), 5607 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5608 Args); 5609 5610 // 5. Build switch(res) 5611 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5612 llvm::SwitchInst *SwInst = 5613 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5614 5615 // 6. Build case 1: 5616 // ... 5617 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5618 // ... 5619 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5620 // break; 5621 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5622 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5623 CGF.EmitBlock(Case1BB); 5624 5625 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5626 llvm::Value *EndArgs[] = { 5627 IdentTLoc, // ident_t *<loc> 5628 ThreadId, // i32 <gtid> 5629 Lock // kmp_critical_name *&<lock> 5630 }; 5631 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5632 CodeGenFunction &CGF, PrePostActionTy &Action) { 5633 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5634 auto IPriv = Privates.begin(); 5635 auto ILHS = LHSExprs.begin(); 5636 auto IRHS = RHSExprs.begin(); 5637 for (const Expr *E : ReductionOps) { 5638 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5639 cast<DeclRefExpr>(*IRHS)); 5640 ++IPriv; 5641 ++ILHS; 5642 ++IRHS; 5643 } 5644 }; 5645 RegionCodeGenTy RCG(CodeGen); 5646 CommonActionTy Action( 5647 nullptr, llvm::None, 5648 OMPBuilder.getOrCreateRuntimeFunction( 5649 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5650 : OMPRTL___kmpc_end_reduce), 5651 EndArgs); 5652 RCG.setAction(Action); 5653 RCG(CGF); 5654 5655 CGF.EmitBranch(DefaultBB); 5656 5657 // 7. Build case 2: 5658 // ... 5659 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5660 // ... 5661 // break; 5662 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5663 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5664 CGF.EmitBlock(Case2BB); 5665 5666 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5667 CodeGenFunction &CGF, PrePostActionTy &Action) { 5668 auto ILHS = LHSExprs.begin(); 5669 auto IRHS = RHSExprs.begin(); 5670 auto IPriv = Privates.begin(); 5671 for (const Expr *E : ReductionOps) { 5672 const Expr *XExpr = nullptr; 5673 const Expr *EExpr = nullptr; 5674 const Expr *UpExpr = nullptr; 5675 BinaryOperatorKind BO = BO_Comma; 5676 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5677 if (BO->getOpcode() == BO_Assign) { 5678 XExpr = BO->getLHS(); 5679 UpExpr = BO->getRHS(); 5680 } 5681 } 5682 // Try to emit update expression as a simple atomic. 5683 const Expr *RHSExpr = UpExpr; 5684 if (RHSExpr) { 5685 // Analyze RHS part of the whole expression. 5686 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5687 RHSExpr->IgnoreParenImpCasts())) { 5688 // If this is a conditional operator, analyze its condition for 5689 // min/max reduction operator. 5690 RHSExpr = ACO->getCond(); 5691 } 5692 if (const auto *BORHS = 5693 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5694 EExpr = BORHS->getRHS(); 5695 BO = BORHS->getOpcode(); 5696 } 5697 } 5698 if (XExpr) { 5699 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5700 auto &&AtomicRedGen = [BO, VD, 5701 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5702 const Expr *EExpr, const Expr *UpExpr) { 5703 LValue X = CGF.EmitLValue(XExpr); 5704 RValue E; 5705 if (EExpr) 5706 E = CGF.EmitAnyExpr(EExpr); 5707 CGF.EmitOMPAtomicSimpleUpdateExpr( 5708 X, E, BO, /*IsXLHSInRHSPart=*/true, 5709 llvm::AtomicOrdering::Monotonic, Loc, 5710 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5711 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5712 PrivateScope.addPrivate( 5713 VD, [&CGF, VD, XRValue, Loc]() { 5714 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5715 CGF.emitOMPSimpleStore( 5716 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5717 VD->getType().getNonReferenceType(), Loc); 5718 return LHSTemp; 5719 }); 5720 (void)PrivateScope.Privatize(); 5721 return CGF.EmitAnyExpr(UpExpr); 5722 }); 5723 }; 5724 if ((*IPriv)->getType()->isArrayType()) { 5725 // Emit atomic reduction for array section. 5726 const auto *RHSVar = 5727 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5728 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5729 AtomicRedGen, XExpr, EExpr, UpExpr); 5730 } else { 5731 // Emit atomic reduction for array subscript or single variable. 5732 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5733 } 5734 } else { 5735 // Emit as a critical region. 5736 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5737 const Expr *, const Expr *) { 5738 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5739 std::string Name = RT.getName({"atomic_reduction"}); 5740 RT.emitCriticalRegion( 5741 CGF, Name, 5742 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5743 Action.Enter(CGF); 5744 emitReductionCombiner(CGF, E); 5745 }, 5746 Loc); 5747 }; 5748 if ((*IPriv)->getType()->isArrayType()) { 5749 const auto *LHSVar = 5750 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5751 const auto *RHSVar = 5752 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5753 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5754 CritRedGen); 5755 } else { 5756 CritRedGen(CGF, nullptr, nullptr, nullptr); 5757 } 5758 } 5759 ++ILHS; 5760 ++IRHS; 5761 ++IPriv; 5762 } 5763 }; 5764 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5765 if (!WithNowait) { 5766 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5767 llvm::Value *EndArgs[] = { 5768 IdentTLoc, // ident_t *<loc> 5769 ThreadId, // i32 <gtid> 5770 Lock // kmp_critical_name *&<lock> 5771 }; 5772 CommonActionTy Action(nullptr, llvm::None, 5773 OMPBuilder.getOrCreateRuntimeFunction( 5774 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5775 EndArgs); 5776 AtomicRCG.setAction(Action); 5777 AtomicRCG(CGF); 5778 } else { 5779 AtomicRCG(CGF); 5780 } 5781 5782 CGF.EmitBranch(DefaultBB); 5783 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5784 } 5785 5786 /// Generates unique name for artificial threadprivate variables. 5787 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5788 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5789 const Expr *Ref) { 5790 SmallString<256> Buffer; 5791 llvm::raw_svector_ostream Out(Buffer); 5792 const clang::DeclRefExpr *DE; 5793 const VarDecl *D = ::getBaseDecl(Ref, DE); 5794 if (!D) 5795 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5796 D = D->getCanonicalDecl(); 5797 std::string Name = CGM.getOpenMPRuntime().getName( 5798 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5799 Out << Prefix << Name << "_" 5800 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5801 return std::string(Out.str()); 5802 } 5803 5804 /// Emits reduction initializer function: 5805 /// \code 5806 /// void @.red_init(void* %arg, void* %orig) { 5807 /// %0 = bitcast void* %arg to <type>* 5808 /// store <type> <init>, <type>* %0 5809 /// ret void 5810 /// } 5811 /// \endcode 5812 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5813 SourceLocation Loc, 5814 ReductionCodeGen &RCG, unsigned N) { 5815 ASTContext &C = CGM.getContext(); 5816 QualType VoidPtrTy = C.VoidPtrTy; 5817 VoidPtrTy.addRestrict(); 5818 FunctionArgList Args; 5819 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5820 ImplicitParamDecl::Other); 5821 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5822 ImplicitParamDecl::Other); 5823 Args.emplace_back(&Param); 5824 Args.emplace_back(&ParamOrig); 5825 const auto &FnInfo = 5826 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5827 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5828 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5829 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5830 Name, &CGM.getModule()); 5831 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5832 Fn->setDoesNotRecurse(); 5833 CodeGenFunction CGF(CGM); 5834 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5835 Address PrivateAddr = CGF.EmitLoadOfPointer( 5836 CGF.GetAddrOfLocalVar(&Param), 5837 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5838 llvm::Value *Size = nullptr; 5839 // If the size of the reduction item is non-constant, load it from global 5840 // threadprivate variable. 5841 if (RCG.getSizes(N).second) { 5842 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5843 CGF, CGM.getContext().getSizeType(), 5844 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5845 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5846 CGM.getContext().getSizeType(), Loc); 5847 } 5848 RCG.emitAggregateType(CGF, N, Size); 5849 LValue OrigLVal; 5850 // If initializer uses initializer from declare reduction construct, emit a 5851 // pointer to the address of the original reduction item (reuired by reduction 5852 // initializer) 5853 if (RCG.usesReductionInitializer(N)) { 5854 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5855 SharedAddr = CGF.EmitLoadOfPointer( 5856 SharedAddr, 5857 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5858 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5859 } else { 5860 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5861 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5862 CGM.getContext().VoidPtrTy); 5863 } 5864 // Emit the initializer: 5865 // %0 = bitcast void* %arg to <type>* 5866 // store <type> <init>, <type>* %0 5867 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5868 [](CodeGenFunction &) { return false; }); 5869 CGF.FinishFunction(); 5870 return Fn; 5871 } 5872 5873 /// Emits reduction combiner function: 5874 /// \code 5875 /// void @.red_comb(void* %arg0, void* %arg1) { 5876 /// %lhs = bitcast void* %arg0 to <type>* 5877 /// %rhs = bitcast void* %arg1 to <type>* 5878 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5879 /// store <type> %2, <type>* %lhs 5880 /// ret void 5881 /// } 5882 /// \endcode 5883 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5884 SourceLocation Loc, 5885 ReductionCodeGen &RCG, unsigned N, 5886 const Expr *ReductionOp, 5887 const Expr *LHS, const Expr *RHS, 5888 const Expr *PrivateRef) { 5889 ASTContext &C = CGM.getContext(); 5890 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5891 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5892 FunctionArgList Args; 5893 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5894 C.VoidPtrTy, ImplicitParamDecl::Other); 5895 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5896 ImplicitParamDecl::Other); 5897 Args.emplace_back(&ParamInOut); 5898 Args.emplace_back(&ParamIn); 5899 const auto &FnInfo = 5900 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5901 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5902 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5903 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5904 Name, &CGM.getModule()); 5905 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5906 Fn->setDoesNotRecurse(); 5907 CodeGenFunction CGF(CGM); 5908 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5909 llvm::Value *Size = nullptr; 5910 // If the size of the reduction item is non-constant, load it from global 5911 // threadprivate variable. 5912 if (RCG.getSizes(N).second) { 5913 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5914 CGF, CGM.getContext().getSizeType(), 5915 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5916 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5917 CGM.getContext().getSizeType(), Loc); 5918 } 5919 RCG.emitAggregateType(CGF, N, Size); 5920 // Remap lhs and rhs variables to the addresses of the function arguments. 5921 // %lhs = bitcast void* %arg0 to <type>* 5922 // %rhs = bitcast void* %arg1 to <type>* 5923 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5924 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5925 // Pull out the pointer to the variable. 5926 Address PtrAddr = CGF.EmitLoadOfPointer( 5927 CGF.GetAddrOfLocalVar(&ParamInOut), 5928 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5929 return CGF.Builder.CreateElementBitCast( 5930 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5931 }); 5932 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5933 // Pull out the pointer to the variable. 5934 Address PtrAddr = CGF.EmitLoadOfPointer( 5935 CGF.GetAddrOfLocalVar(&ParamIn), 5936 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5937 return CGF.Builder.CreateElementBitCast( 5938 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5939 }); 5940 PrivateScope.Privatize(); 5941 // Emit the combiner body: 5942 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5943 // store <type> %2, <type>* %lhs 5944 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5945 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5946 cast<DeclRefExpr>(RHS)); 5947 CGF.FinishFunction(); 5948 return Fn; 5949 } 5950 5951 /// Emits reduction finalizer function: 5952 /// \code 5953 /// void @.red_fini(void* %arg) { 5954 /// %0 = bitcast void* %arg to <type>* 5955 /// <destroy>(<type>* %0) 5956 /// ret void 5957 /// } 5958 /// \endcode 5959 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5960 SourceLocation Loc, 5961 ReductionCodeGen &RCG, unsigned N) { 5962 if (!RCG.needCleanups(N)) 5963 return nullptr; 5964 ASTContext &C = CGM.getContext(); 5965 FunctionArgList Args; 5966 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5967 ImplicitParamDecl::Other); 5968 Args.emplace_back(&Param); 5969 const auto &FnInfo = 5970 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5971 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5972 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5973 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5974 Name, &CGM.getModule()); 5975 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5976 Fn->setDoesNotRecurse(); 5977 CodeGenFunction CGF(CGM); 5978 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5979 Address PrivateAddr = CGF.EmitLoadOfPointer( 5980 CGF.GetAddrOfLocalVar(&Param), 5981 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5982 llvm::Value *Size = nullptr; 5983 // If the size of the reduction item is non-constant, load it from global 5984 // threadprivate variable. 5985 if (RCG.getSizes(N).second) { 5986 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5987 CGF, CGM.getContext().getSizeType(), 5988 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5989 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5990 CGM.getContext().getSizeType(), Loc); 5991 } 5992 RCG.emitAggregateType(CGF, N, Size); 5993 // Emit the finalizer body: 5994 // <destroy>(<type>* %0) 5995 RCG.emitCleanups(CGF, N, PrivateAddr); 5996 CGF.FinishFunction(Loc); 5997 return Fn; 5998 } 5999 6000 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6001 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6002 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6003 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6004 return nullptr; 6005 6006 // Build typedef struct: 6007 // kmp_taskred_input { 6008 // void *reduce_shar; // shared reduction item 6009 // void *reduce_orig; // original reduction item used for initialization 6010 // size_t reduce_size; // size of data item 6011 // void *reduce_init; // data initialization routine 6012 // void *reduce_fini; // data finalization routine 6013 // void *reduce_comb; // data combiner routine 6014 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6015 // } kmp_taskred_input_t; 6016 ASTContext &C = CGM.getContext(); 6017 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6018 RD->startDefinition(); 6019 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6020 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6021 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6022 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6023 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6024 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6025 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6026 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6027 RD->completeDefinition(); 6028 QualType RDType = C.getRecordType(RD); 6029 unsigned Size = Data.ReductionVars.size(); 6030 llvm::APInt ArraySize(/*numBits=*/64, Size); 6031 QualType ArrayRDType = C.getConstantArrayType( 6032 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6033 // kmp_task_red_input_t .rd_input.[Size]; 6034 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6035 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6036 Data.ReductionCopies, Data.ReductionOps); 6037 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6038 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6039 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6040 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6041 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6042 TaskRedInput.getPointer(), Idxs, 6043 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6044 ".rd_input.gep."); 6045 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6046 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6047 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6048 RCG.emitSharedOrigLValue(CGF, Cnt); 6049 llvm::Value *CastedShared = 6050 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6051 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6052 // ElemLVal.reduce_orig = &Origs[Cnt]; 6053 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6054 llvm::Value *CastedOrig = 6055 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6056 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6057 RCG.emitAggregateType(CGF, Cnt); 6058 llvm::Value *SizeValInChars; 6059 llvm::Value *SizeVal; 6060 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6061 // We use delayed creation/initialization for VLAs and array sections. It is 6062 // required because runtime does not provide the way to pass the sizes of 6063 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6064 // threadprivate global variables are used to store these values and use 6065 // them in the functions. 6066 bool DelayedCreation = !!SizeVal; 6067 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6068 /*isSigned=*/false); 6069 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6070 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6071 // ElemLVal.reduce_init = init; 6072 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6073 llvm::Value *InitAddr = 6074 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6075 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6076 // ElemLVal.reduce_fini = fini; 6077 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6078 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6079 llvm::Value *FiniAddr = Fini 6080 ? CGF.EmitCastToVoidPtr(Fini) 6081 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6082 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6083 // ElemLVal.reduce_comb = comb; 6084 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6085 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6086 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6087 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6088 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6089 // ElemLVal.flags = 0; 6090 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6091 if (DelayedCreation) { 6092 CGF.EmitStoreOfScalar( 6093 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6094 FlagsLVal); 6095 } else 6096 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6097 FlagsLVal.getType()); 6098 } 6099 if (Data.IsReductionWithTaskMod) { 6100 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6101 // is_ws, int num, void *data); 6102 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6103 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6104 CGM.IntTy, /*isSigned=*/true); 6105 llvm::Value *Args[] = { 6106 IdentTLoc, GTid, 6107 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6108 /*isSigned=*/true), 6109 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6110 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6111 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6112 return CGF.EmitRuntimeCall( 6113 OMPBuilder.getOrCreateRuntimeFunction( 6114 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6115 Args); 6116 } 6117 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6118 llvm::Value *Args[] = { 6119 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6120 /*isSigned=*/true), 6121 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6122 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6123 CGM.VoidPtrTy)}; 6124 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6125 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6126 Args); 6127 } 6128 6129 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6130 SourceLocation Loc, 6131 bool IsWorksharingReduction) { 6132 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6133 // is_ws, int num, void *data); 6134 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6135 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6136 CGM.IntTy, /*isSigned=*/true); 6137 llvm::Value *Args[] = {IdentTLoc, GTid, 6138 llvm::ConstantInt::get(CGM.IntTy, 6139 IsWorksharingReduction ? 1 : 0, 6140 /*isSigned=*/true)}; 6141 (void)CGF.EmitRuntimeCall( 6142 OMPBuilder.getOrCreateRuntimeFunction( 6143 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6144 Args); 6145 } 6146 6147 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6148 SourceLocation Loc, 6149 ReductionCodeGen &RCG, 6150 unsigned N) { 6151 auto Sizes = RCG.getSizes(N); 6152 // Emit threadprivate global variable if the type is non-constant 6153 // (Sizes.second = nullptr). 6154 if (Sizes.second) { 6155 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6156 /*isSigned=*/false); 6157 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6158 CGF, CGM.getContext().getSizeType(), 6159 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6160 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6161 } 6162 } 6163 6164 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6165 SourceLocation Loc, 6166 llvm::Value *ReductionsPtr, 6167 LValue SharedLVal) { 6168 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6169 // *d); 6170 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6171 CGM.IntTy, 6172 /*isSigned=*/true), 6173 ReductionsPtr, 6174 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6175 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6176 return Address( 6177 CGF.EmitRuntimeCall( 6178 OMPBuilder.getOrCreateRuntimeFunction( 6179 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6180 Args), 6181 SharedLVal.getAlignment()); 6182 } 6183 6184 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6185 SourceLocation Loc) { 6186 if (!CGF.HaveInsertPoint()) 6187 return; 6188 6189 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6190 OMPBuilder.CreateTaskwait(CGF.Builder); 6191 } else { 6192 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6193 // global_tid); 6194 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6195 // Ignore return result until untied tasks are supported. 6196 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6197 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6198 Args); 6199 } 6200 6201 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6202 Region->emitUntiedSwitch(CGF); 6203 } 6204 6205 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6206 OpenMPDirectiveKind InnerKind, 6207 const RegionCodeGenTy &CodeGen, 6208 bool HasCancel) { 6209 if (!CGF.HaveInsertPoint()) 6210 return; 6211 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6212 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6213 } 6214 6215 namespace { 6216 enum RTCancelKind { 6217 CancelNoreq = 0, 6218 CancelParallel = 1, 6219 CancelLoop = 2, 6220 CancelSections = 3, 6221 CancelTaskgroup = 4 6222 }; 6223 } // anonymous namespace 6224 6225 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6226 RTCancelKind CancelKind = CancelNoreq; 6227 if (CancelRegion == OMPD_parallel) 6228 CancelKind = CancelParallel; 6229 else if (CancelRegion == OMPD_for) 6230 CancelKind = CancelLoop; 6231 else if (CancelRegion == OMPD_sections) 6232 CancelKind = CancelSections; 6233 else { 6234 assert(CancelRegion == OMPD_taskgroup); 6235 CancelKind = CancelTaskgroup; 6236 } 6237 return CancelKind; 6238 } 6239 6240 void CGOpenMPRuntime::emitCancellationPointCall( 6241 CodeGenFunction &CGF, SourceLocation Loc, 6242 OpenMPDirectiveKind CancelRegion) { 6243 if (!CGF.HaveInsertPoint()) 6244 return; 6245 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6246 // global_tid, kmp_int32 cncl_kind); 6247 if (auto *OMPRegionInfo = 6248 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6249 // For 'cancellation point taskgroup', the task region info may not have a 6250 // cancel. This may instead happen in another adjacent task. 6251 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6252 llvm::Value *Args[] = { 6253 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6254 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6255 // Ignore return result until untied tasks are supported. 6256 llvm::Value *Result = CGF.EmitRuntimeCall( 6257 OMPBuilder.getOrCreateRuntimeFunction( 6258 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6259 Args); 6260 // if (__kmpc_cancellationpoint()) { 6261 // exit from construct; 6262 // } 6263 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6264 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6265 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6266 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6267 CGF.EmitBlock(ExitBB); 6268 // exit from construct; 6269 CodeGenFunction::JumpDest CancelDest = 6270 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6271 CGF.EmitBranchThroughCleanup(CancelDest); 6272 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6273 } 6274 } 6275 } 6276 6277 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6278 const Expr *IfCond, 6279 OpenMPDirectiveKind CancelRegion) { 6280 if (!CGF.HaveInsertPoint()) 6281 return; 6282 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6283 // kmp_int32 cncl_kind); 6284 auto &M = CGM.getModule(); 6285 if (auto *OMPRegionInfo = 6286 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6287 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6288 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6289 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6290 llvm::Value *Args[] = { 6291 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6292 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6293 // Ignore return result until untied tasks are supported. 6294 llvm::Value *Result = CGF.EmitRuntimeCall( 6295 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6296 // if (__kmpc_cancel()) { 6297 // exit from construct; 6298 // } 6299 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6300 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6301 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6302 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6303 CGF.EmitBlock(ExitBB); 6304 // exit from construct; 6305 CodeGenFunction::JumpDest CancelDest = 6306 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6307 CGF.EmitBranchThroughCleanup(CancelDest); 6308 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6309 }; 6310 if (IfCond) { 6311 emitIfClause(CGF, IfCond, ThenGen, 6312 [](CodeGenFunction &, PrePostActionTy &) {}); 6313 } else { 6314 RegionCodeGenTy ThenRCG(ThenGen); 6315 ThenRCG(CGF); 6316 } 6317 } 6318 } 6319 6320 namespace { 6321 /// Cleanup action for uses_allocators support. 6322 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6323 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6324 6325 public: 6326 OMPUsesAllocatorsActionTy( 6327 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6328 : Allocators(Allocators) {} 6329 void Enter(CodeGenFunction &CGF) override { 6330 if (!CGF.HaveInsertPoint()) 6331 return; 6332 for (const auto &AllocatorData : Allocators) { 6333 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6334 CGF, AllocatorData.first, AllocatorData.second); 6335 } 6336 } 6337 void Exit(CodeGenFunction &CGF) override { 6338 if (!CGF.HaveInsertPoint()) 6339 return; 6340 for (const auto &AllocatorData : Allocators) { 6341 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6342 AllocatorData.first); 6343 } 6344 } 6345 }; 6346 } // namespace 6347 6348 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6349 const OMPExecutableDirective &D, StringRef ParentName, 6350 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6351 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6352 assert(!ParentName.empty() && "Invalid target region parent name!"); 6353 HasEmittedTargetRegion = true; 6354 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6355 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6356 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6357 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6358 if (!D.AllocatorTraits) 6359 continue; 6360 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6361 } 6362 } 6363 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6364 CodeGen.setAction(UsesAllocatorAction); 6365 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6366 IsOffloadEntry, CodeGen); 6367 } 6368 6369 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6370 const Expr *Allocator, 6371 const Expr *AllocatorTraits) { 6372 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6373 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6374 // Use default memspace handle. 6375 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6376 llvm::Value *NumTraits = llvm::ConstantInt::get( 6377 CGF.IntTy, cast<ConstantArrayType>( 6378 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6379 ->getSize() 6380 .getLimitedValue()); 6381 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6382 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6383 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6384 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6385 AllocatorTraitsLVal.getBaseInfo(), 6386 AllocatorTraitsLVal.getTBAAInfo()); 6387 llvm::Value *Traits = 6388 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6389 6390 llvm::Value *AllocatorVal = 6391 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6392 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6393 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6394 // Store to allocator. 6395 CGF.EmitVarDecl(*cast<VarDecl>( 6396 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6397 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6398 AllocatorVal = 6399 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6400 Allocator->getType(), Allocator->getExprLoc()); 6401 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6402 } 6403 6404 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6405 const Expr *Allocator) { 6406 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6407 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6408 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6409 llvm::Value *AllocatorVal = 6410 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6411 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6412 CGF.getContext().VoidPtrTy, 6413 Allocator->getExprLoc()); 6414 (void)CGF.EmitRuntimeCall( 6415 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6416 OMPRTL___kmpc_destroy_allocator), 6417 {ThreadId, AllocatorVal}); 6418 } 6419 6420 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6421 const OMPExecutableDirective &D, StringRef ParentName, 6422 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6423 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6424 // Create a unique name for the entry function using the source location 6425 // information of the current target region. The name will be something like: 6426 // 6427 // __omp_offloading_DD_FFFF_PP_lBB 6428 // 6429 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6430 // mangled name of the function that encloses the target region and BB is the 6431 // line number of the target region. 6432 6433 unsigned DeviceID; 6434 unsigned FileID; 6435 unsigned Line; 6436 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6437 Line); 6438 SmallString<64> EntryFnName; 6439 { 6440 llvm::raw_svector_ostream OS(EntryFnName); 6441 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6442 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6443 } 6444 6445 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6446 6447 CodeGenFunction CGF(CGM, true); 6448 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6449 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6450 6451 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6452 6453 // If this target outline function is not an offload entry, we don't need to 6454 // register it. 6455 if (!IsOffloadEntry) 6456 return; 6457 6458 // The target region ID is used by the runtime library to identify the current 6459 // target region, so it only has to be unique and not necessarily point to 6460 // anything. It could be the pointer to the outlined function that implements 6461 // the target region, but we aren't using that so that the compiler doesn't 6462 // need to keep that, and could therefore inline the host function if proven 6463 // worthwhile during optimization. In the other hand, if emitting code for the 6464 // device, the ID has to be the function address so that it can retrieved from 6465 // the offloading entry and launched by the runtime library. We also mark the 6466 // outlined function to have external linkage in case we are emitting code for 6467 // the device, because these functions will be entry points to the device. 6468 6469 if (CGM.getLangOpts().OpenMPIsDevice) { 6470 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6471 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6472 OutlinedFn->setDSOLocal(false); 6473 } else { 6474 std::string Name = getName({EntryFnName, "region_id"}); 6475 OutlinedFnID = new llvm::GlobalVariable( 6476 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6477 llvm::GlobalValue::WeakAnyLinkage, 6478 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6479 } 6480 6481 // Register the information for the entry associated with this target region. 6482 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6483 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6484 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6485 } 6486 6487 /// Checks if the expression is constant or does not have non-trivial function 6488 /// calls. 6489 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6490 // We can skip constant expressions. 6491 // We can skip expressions with trivial calls or simple expressions. 6492 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6493 !E->hasNonTrivialCall(Ctx)) && 6494 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6495 } 6496 6497 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6498 const Stmt *Body) { 6499 const Stmt *Child = Body->IgnoreContainers(); 6500 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6501 Child = nullptr; 6502 for (const Stmt *S : C->body()) { 6503 if (const auto *E = dyn_cast<Expr>(S)) { 6504 if (isTrivial(Ctx, E)) 6505 continue; 6506 } 6507 // Some of the statements can be ignored. 6508 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6509 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6510 continue; 6511 // Analyze declarations. 6512 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6513 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6514 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6515 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6516 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6517 isa<UsingDirectiveDecl>(D) || 6518 isa<OMPDeclareReductionDecl>(D) || 6519 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6520 return true; 6521 const auto *VD = dyn_cast<VarDecl>(D); 6522 if (!VD) 6523 return false; 6524 return VD->isConstexpr() || 6525 ((VD->getType().isTrivialType(Ctx) || 6526 VD->getType()->isReferenceType()) && 6527 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6528 })) 6529 continue; 6530 } 6531 // Found multiple children - cannot get the one child only. 6532 if (Child) 6533 return nullptr; 6534 Child = S; 6535 } 6536 if (Child) 6537 Child = Child->IgnoreContainers(); 6538 } 6539 return Child; 6540 } 6541 6542 /// Emit the number of teams for a target directive. Inspect the num_teams 6543 /// clause associated with a teams construct combined or closely nested 6544 /// with the target directive. 6545 /// 6546 /// Emit a team of size one for directives such as 'target parallel' that 6547 /// have no associated teams construct. 6548 /// 6549 /// Otherwise, return nullptr. 6550 static llvm::Value * 6551 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6552 const OMPExecutableDirective &D) { 6553 assert(!CGF.getLangOpts().OpenMPIsDevice && 6554 "Clauses associated with the teams directive expected to be emitted " 6555 "only for the host!"); 6556 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6557 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6558 "Expected target-based executable directive."); 6559 CGBuilderTy &Bld = CGF.Builder; 6560 switch (DirectiveKind) { 6561 case OMPD_target: { 6562 const auto *CS = D.getInnermostCapturedStmt(); 6563 const auto *Body = 6564 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6565 const Stmt *ChildStmt = 6566 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6567 if (const auto *NestedDir = 6568 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6569 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6570 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6571 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6572 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6573 const Expr *NumTeams = 6574 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6575 llvm::Value *NumTeamsVal = 6576 CGF.EmitScalarExpr(NumTeams, 6577 /*IgnoreResultAssign*/ true); 6578 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6579 /*isSigned=*/true); 6580 } 6581 return Bld.getInt32(0); 6582 } 6583 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6584 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6585 return Bld.getInt32(1); 6586 return Bld.getInt32(0); 6587 } 6588 return nullptr; 6589 } 6590 case OMPD_target_teams: 6591 case OMPD_target_teams_distribute: 6592 case OMPD_target_teams_distribute_simd: 6593 case OMPD_target_teams_distribute_parallel_for: 6594 case OMPD_target_teams_distribute_parallel_for_simd: { 6595 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6596 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6597 const Expr *NumTeams = 6598 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6599 llvm::Value *NumTeamsVal = 6600 CGF.EmitScalarExpr(NumTeams, 6601 /*IgnoreResultAssign*/ true); 6602 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6603 /*isSigned=*/true); 6604 } 6605 return Bld.getInt32(0); 6606 } 6607 case OMPD_target_parallel: 6608 case OMPD_target_parallel_for: 6609 case OMPD_target_parallel_for_simd: 6610 case OMPD_target_simd: 6611 return Bld.getInt32(1); 6612 case OMPD_parallel: 6613 case OMPD_for: 6614 case OMPD_parallel_for: 6615 case OMPD_parallel_master: 6616 case OMPD_parallel_sections: 6617 case OMPD_for_simd: 6618 case OMPD_parallel_for_simd: 6619 case OMPD_cancel: 6620 case OMPD_cancellation_point: 6621 case OMPD_ordered: 6622 case OMPD_threadprivate: 6623 case OMPD_allocate: 6624 case OMPD_task: 6625 case OMPD_simd: 6626 case OMPD_sections: 6627 case OMPD_section: 6628 case OMPD_single: 6629 case OMPD_master: 6630 case OMPD_critical: 6631 case OMPD_taskyield: 6632 case OMPD_barrier: 6633 case OMPD_taskwait: 6634 case OMPD_taskgroup: 6635 case OMPD_atomic: 6636 case OMPD_flush: 6637 case OMPD_depobj: 6638 case OMPD_scan: 6639 case OMPD_teams: 6640 case OMPD_target_data: 6641 case OMPD_target_exit_data: 6642 case OMPD_target_enter_data: 6643 case OMPD_distribute: 6644 case OMPD_distribute_simd: 6645 case OMPD_distribute_parallel_for: 6646 case OMPD_distribute_parallel_for_simd: 6647 case OMPD_teams_distribute: 6648 case OMPD_teams_distribute_simd: 6649 case OMPD_teams_distribute_parallel_for: 6650 case OMPD_teams_distribute_parallel_for_simd: 6651 case OMPD_target_update: 6652 case OMPD_declare_simd: 6653 case OMPD_declare_variant: 6654 case OMPD_begin_declare_variant: 6655 case OMPD_end_declare_variant: 6656 case OMPD_declare_target: 6657 case OMPD_end_declare_target: 6658 case OMPD_declare_reduction: 6659 case OMPD_declare_mapper: 6660 case OMPD_taskloop: 6661 case OMPD_taskloop_simd: 6662 case OMPD_master_taskloop: 6663 case OMPD_master_taskloop_simd: 6664 case OMPD_parallel_master_taskloop: 6665 case OMPD_parallel_master_taskloop_simd: 6666 case OMPD_requires: 6667 case OMPD_unknown: 6668 break; 6669 default: 6670 break; 6671 } 6672 llvm_unreachable("Unexpected directive kind."); 6673 } 6674 6675 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6676 llvm::Value *DefaultThreadLimitVal) { 6677 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6678 CGF.getContext(), CS->getCapturedStmt()); 6679 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6680 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6681 llvm::Value *NumThreads = nullptr; 6682 llvm::Value *CondVal = nullptr; 6683 // Handle if clause. If if clause present, the number of threads is 6684 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6685 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6686 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6687 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6688 const OMPIfClause *IfClause = nullptr; 6689 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6690 if (C->getNameModifier() == OMPD_unknown || 6691 C->getNameModifier() == OMPD_parallel) { 6692 IfClause = C; 6693 break; 6694 } 6695 } 6696 if (IfClause) { 6697 const Expr *Cond = IfClause->getCondition(); 6698 bool Result; 6699 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6700 if (!Result) 6701 return CGF.Builder.getInt32(1); 6702 } else { 6703 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6704 if (const auto *PreInit = 6705 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6706 for (const auto *I : PreInit->decls()) { 6707 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6708 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6709 } else { 6710 CodeGenFunction::AutoVarEmission Emission = 6711 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6712 CGF.EmitAutoVarCleanups(Emission); 6713 } 6714 } 6715 } 6716 CondVal = CGF.EvaluateExprAsBool(Cond); 6717 } 6718 } 6719 } 6720 // Check the value of num_threads clause iff if clause was not specified 6721 // or is not evaluated to false. 6722 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6723 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6724 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6725 const auto *NumThreadsClause = 6726 Dir->getSingleClause<OMPNumThreadsClause>(); 6727 CodeGenFunction::LexicalScope Scope( 6728 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6729 if (const auto *PreInit = 6730 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6731 for (const auto *I : PreInit->decls()) { 6732 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6733 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6734 } else { 6735 CodeGenFunction::AutoVarEmission Emission = 6736 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6737 CGF.EmitAutoVarCleanups(Emission); 6738 } 6739 } 6740 } 6741 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6742 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6743 /*isSigned=*/false); 6744 if (DefaultThreadLimitVal) 6745 NumThreads = CGF.Builder.CreateSelect( 6746 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6747 DefaultThreadLimitVal, NumThreads); 6748 } else { 6749 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6750 : CGF.Builder.getInt32(0); 6751 } 6752 // Process condition of the if clause. 6753 if (CondVal) { 6754 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6755 CGF.Builder.getInt32(1)); 6756 } 6757 return NumThreads; 6758 } 6759 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6760 return CGF.Builder.getInt32(1); 6761 return DefaultThreadLimitVal; 6762 } 6763 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6764 : CGF.Builder.getInt32(0); 6765 } 6766 6767 /// Emit the number of threads for a target directive. Inspect the 6768 /// thread_limit clause associated with a teams construct combined or closely 6769 /// nested with the target directive. 6770 /// 6771 /// Emit the num_threads clause for directives such as 'target parallel' that 6772 /// have no associated teams construct. 6773 /// 6774 /// Otherwise, return nullptr. 6775 static llvm::Value * 6776 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6777 const OMPExecutableDirective &D) { 6778 assert(!CGF.getLangOpts().OpenMPIsDevice && 6779 "Clauses associated with the teams directive expected to be emitted " 6780 "only for the host!"); 6781 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6782 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6783 "Expected target-based executable directive."); 6784 CGBuilderTy &Bld = CGF.Builder; 6785 llvm::Value *ThreadLimitVal = nullptr; 6786 llvm::Value *NumThreadsVal = nullptr; 6787 switch (DirectiveKind) { 6788 case OMPD_target: { 6789 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6790 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6791 return NumThreads; 6792 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6793 CGF.getContext(), CS->getCapturedStmt()); 6794 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6795 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6796 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6797 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6798 const auto *ThreadLimitClause = 6799 Dir->getSingleClause<OMPThreadLimitClause>(); 6800 CodeGenFunction::LexicalScope Scope( 6801 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6802 if (const auto *PreInit = 6803 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6804 for (const auto *I : PreInit->decls()) { 6805 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6806 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6807 } else { 6808 CodeGenFunction::AutoVarEmission Emission = 6809 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6810 CGF.EmitAutoVarCleanups(Emission); 6811 } 6812 } 6813 } 6814 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6815 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6816 ThreadLimitVal = 6817 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6818 } 6819 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6820 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6821 CS = Dir->getInnermostCapturedStmt(); 6822 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6823 CGF.getContext(), CS->getCapturedStmt()); 6824 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6825 } 6826 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6827 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6828 CS = Dir->getInnermostCapturedStmt(); 6829 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6830 return NumThreads; 6831 } 6832 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6833 return Bld.getInt32(1); 6834 } 6835 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6836 } 6837 case OMPD_target_teams: { 6838 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6839 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6840 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6841 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6842 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6843 ThreadLimitVal = 6844 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6845 } 6846 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6847 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6848 return NumThreads; 6849 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6850 CGF.getContext(), CS->getCapturedStmt()); 6851 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6852 if (Dir->getDirectiveKind() == OMPD_distribute) { 6853 CS = Dir->getInnermostCapturedStmt(); 6854 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6855 return NumThreads; 6856 } 6857 } 6858 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6859 } 6860 case OMPD_target_teams_distribute: 6861 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6862 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6863 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6864 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6865 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6866 ThreadLimitVal = 6867 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6868 } 6869 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6870 case OMPD_target_parallel: 6871 case OMPD_target_parallel_for: 6872 case OMPD_target_parallel_for_simd: 6873 case OMPD_target_teams_distribute_parallel_for: 6874 case OMPD_target_teams_distribute_parallel_for_simd: { 6875 llvm::Value *CondVal = nullptr; 6876 // Handle if clause. If if clause present, the number of threads is 6877 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6878 if (D.hasClausesOfKind<OMPIfClause>()) { 6879 const OMPIfClause *IfClause = nullptr; 6880 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6881 if (C->getNameModifier() == OMPD_unknown || 6882 C->getNameModifier() == OMPD_parallel) { 6883 IfClause = C; 6884 break; 6885 } 6886 } 6887 if (IfClause) { 6888 const Expr *Cond = IfClause->getCondition(); 6889 bool Result; 6890 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6891 if (!Result) 6892 return Bld.getInt32(1); 6893 } else { 6894 CodeGenFunction::RunCleanupsScope Scope(CGF); 6895 CondVal = CGF.EvaluateExprAsBool(Cond); 6896 } 6897 } 6898 } 6899 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6900 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6901 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6902 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6903 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6904 ThreadLimitVal = 6905 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6906 } 6907 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6908 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6909 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6910 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6911 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6912 NumThreadsVal = 6913 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6914 ThreadLimitVal = ThreadLimitVal 6915 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6916 ThreadLimitVal), 6917 NumThreadsVal, ThreadLimitVal) 6918 : NumThreadsVal; 6919 } 6920 if (!ThreadLimitVal) 6921 ThreadLimitVal = Bld.getInt32(0); 6922 if (CondVal) 6923 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6924 return ThreadLimitVal; 6925 } 6926 case OMPD_target_teams_distribute_simd: 6927 case OMPD_target_simd: 6928 return Bld.getInt32(1); 6929 case OMPD_parallel: 6930 case OMPD_for: 6931 case OMPD_parallel_for: 6932 case OMPD_parallel_master: 6933 case OMPD_parallel_sections: 6934 case OMPD_for_simd: 6935 case OMPD_parallel_for_simd: 6936 case OMPD_cancel: 6937 case OMPD_cancellation_point: 6938 case OMPD_ordered: 6939 case OMPD_threadprivate: 6940 case OMPD_allocate: 6941 case OMPD_task: 6942 case OMPD_simd: 6943 case OMPD_sections: 6944 case OMPD_section: 6945 case OMPD_single: 6946 case OMPD_master: 6947 case OMPD_critical: 6948 case OMPD_taskyield: 6949 case OMPD_barrier: 6950 case OMPD_taskwait: 6951 case OMPD_taskgroup: 6952 case OMPD_atomic: 6953 case OMPD_flush: 6954 case OMPD_depobj: 6955 case OMPD_scan: 6956 case OMPD_teams: 6957 case OMPD_target_data: 6958 case OMPD_target_exit_data: 6959 case OMPD_target_enter_data: 6960 case OMPD_distribute: 6961 case OMPD_distribute_simd: 6962 case OMPD_distribute_parallel_for: 6963 case OMPD_distribute_parallel_for_simd: 6964 case OMPD_teams_distribute: 6965 case OMPD_teams_distribute_simd: 6966 case OMPD_teams_distribute_parallel_for: 6967 case OMPD_teams_distribute_parallel_for_simd: 6968 case OMPD_target_update: 6969 case OMPD_declare_simd: 6970 case OMPD_declare_variant: 6971 case OMPD_begin_declare_variant: 6972 case OMPD_end_declare_variant: 6973 case OMPD_declare_target: 6974 case OMPD_end_declare_target: 6975 case OMPD_declare_reduction: 6976 case OMPD_declare_mapper: 6977 case OMPD_taskloop: 6978 case OMPD_taskloop_simd: 6979 case OMPD_master_taskloop: 6980 case OMPD_master_taskloop_simd: 6981 case OMPD_parallel_master_taskloop: 6982 case OMPD_parallel_master_taskloop_simd: 6983 case OMPD_requires: 6984 case OMPD_unknown: 6985 break; 6986 default: 6987 break; 6988 } 6989 llvm_unreachable("Unsupported directive kind."); 6990 } 6991 6992 namespace { 6993 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6994 6995 // Utility to handle information from clauses associated with a given 6996 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6997 // It provides a convenient interface to obtain the information and generate 6998 // code for that information. 6999 class MappableExprsHandler { 7000 public: 7001 /// Values for bit flags used to specify the mapping type for 7002 /// offloading. 7003 enum OpenMPOffloadMappingFlags : uint64_t { 7004 /// No flags 7005 OMP_MAP_NONE = 0x0, 7006 /// Allocate memory on the device and move data from host to device. 7007 OMP_MAP_TO = 0x01, 7008 /// Allocate memory on the device and move data from device to host. 7009 OMP_MAP_FROM = 0x02, 7010 /// Always perform the requested mapping action on the element, even 7011 /// if it was already mapped before. 7012 OMP_MAP_ALWAYS = 0x04, 7013 /// Delete the element from the device environment, ignoring the 7014 /// current reference count associated with the element. 7015 OMP_MAP_DELETE = 0x08, 7016 /// The element being mapped is a pointer-pointee pair; both the 7017 /// pointer and the pointee should be mapped. 7018 OMP_MAP_PTR_AND_OBJ = 0x10, 7019 /// This flags signals that the base address of an entry should be 7020 /// passed to the target kernel as an argument. 7021 OMP_MAP_TARGET_PARAM = 0x20, 7022 /// Signal that the runtime library has to return the device pointer 7023 /// in the current position for the data being mapped. Used when we have the 7024 /// use_device_ptr or use_device_addr clause. 7025 OMP_MAP_RETURN_PARAM = 0x40, 7026 /// This flag signals that the reference being passed is a pointer to 7027 /// private data. 7028 OMP_MAP_PRIVATE = 0x80, 7029 /// Pass the element to the device by value. 7030 OMP_MAP_LITERAL = 0x100, 7031 /// Implicit map 7032 OMP_MAP_IMPLICIT = 0x200, 7033 /// Close is a hint to the runtime to allocate memory close to 7034 /// the target device. 7035 OMP_MAP_CLOSE = 0x400, 7036 /// 0x800 is reserved for compatibility with XLC. 7037 /// Produce a runtime error if the data is not already allocated. 7038 OMP_MAP_PRESENT = 0x1000, 7039 /// Signal that the runtime library should use args as an array of 7040 /// descriptor_dim pointers and use args_size as dims. Used when we have 7041 /// non-contiguous list items in target update directive 7042 OMP_MAP_NON_CONTIG = 0x100000000000, 7043 /// The 16 MSBs of the flags indicate whether the entry is member of some 7044 /// struct/class. 7045 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7046 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7047 }; 7048 7049 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7050 static unsigned getFlagMemberOffset() { 7051 unsigned Offset = 0; 7052 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7053 Remain = Remain >> 1) 7054 Offset++; 7055 return Offset; 7056 } 7057 7058 /// Class that associates information with a base pointer to be passed to the 7059 /// runtime library. 7060 class BasePointerInfo { 7061 /// The base pointer. 7062 llvm::Value *Ptr = nullptr; 7063 /// The base declaration that refers to this device pointer, or null if 7064 /// there is none. 7065 const ValueDecl *DevPtrDecl = nullptr; 7066 7067 public: 7068 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7069 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7070 llvm::Value *operator*() const { return Ptr; } 7071 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7072 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7073 }; 7074 7075 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7076 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7077 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7078 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7079 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7080 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7081 7082 /// This structure contains combined information generated for mappable 7083 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7084 /// mappers, and non-contiguous information. 7085 struct MapCombinedInfoTy { 7086 struct StructNonContiguousInfo { 7087 bool IsNonContiguous = false; 7088 MapDimArrayTy Dims; 7089 MapNonContiguousArrayTy Offsets; 7090 MapNonContiguousArrayTy Counts; 7091 MapNonContiguousArrayTy Strides; 7092 }; 7093 MapBaseValuesArrayTy BasePointers; 7094 MapValuesArrayTy Pointers; 7095 MapValuesArrayTy Sizes; 7096 MapFlagsArrayTy Types; 7097 MapMappersArrayTy Mappers; 7098 StructNonContiguousInfo NonContigInfo; 7099 7100 /// Append arrays in \a CurInfo. 7101 void append(MapCombinedInfoTy &CurInfo) { 7102 BasePointers.append(CurInfo.BasePointers.begin(), 7103 CurInfo.BasePointers.end()); 7104 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7105 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7106 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7107 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7108 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7109 CurInfo.NonContigInfo.Dims.end()); 7110 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7111 CurInfo.NonContigInfo.Offsets.end()); 7112 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7113 CurInfo.NonContigInfo.Counts.end()); 7114 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7115 CurInfo.NonContigInfo.Strides.end()); 7116 } 7117 }; 7118 7119 /// Map between a struct and the its lowest & highest elements which have been 7120 /// mapped. 7121 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7122 /// HE(FieldIndex, Pointer)} 7123 struct StructRangeInfoTy { 7124 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7125 0, Address::invalid()}; 7126 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7127 0, Address::invalid()}; 7128 Address Base = Address::invalid(); 7129 }; 7130 7131 private: 7132 /// Kind that defines how a device pointer has to be returned. 7133 struct MapInfo { 7134 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7135 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7136 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7137 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7138 bool ReturnDevicePointer = false; 7139 bool IsImplicit = false; 7140 const ValueDecl *Mapper = nullptr; 7141 bool ForDeviceAddr = false; 7142 7143 MapInfo() = default; 7144 MapInfo( 7145 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7146 OpenMPMapClauseKind MapType, 7147 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7148 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7149 bool ReturnDevicePointer, bool IsImplicit, 7150 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false) 7151 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7152 MotionModifiers(MotionModifiers), 7153 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7154 Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {} 7155 }; 7156 7157 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7158 /// member and there is no map information about it, then emission of that 7159 /// entry is deferred until the whole struct has been processed. 7160 struct DeferredDevicePtrEntryTy { 7161 const Expr *IE = nullptr; 7162 const ValueDecl *VD = nullptr; 7163 bool ForDeviceAddr = false; 7164 7165 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7166 bool ForDeviceAddr) 7167 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7168 }; 7169 7170 /// The target directive from where the mappable clauses were extracted. It 7171 /// is either a executable directive or a user-defined mapper directive. 7172 llvm::PointerUnion<const OMPExecutableDirective *, 7173 const OMPDeclareMapperDecl *> 7174 CurDir; 7175 7176 /// Function the directive is being generated for. 7177 CodeGenFunction &CGF; 7178 7179 /// Set of all first private variables in the current directive. 7180 /// bool data is set to true if the variable is implicitly marked as 7181 /// firstprivate, false otherwise. 7182 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7183 7184 /// Map between device pointer declarations and their expression components. 7185 /// The key value for declarations in 'this' is null. 7186 llvm::DenseMap< 7187 const ValueDecl *, 7188 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7189 DevPointersMap; 7190 7191 llvm::Value *getExprTypeSize(const Expr *E) const { 7192 QualType ExprTy = E->getType().getCanonicalType(); 7193 7194 // Calculate the size for array shaping expression. 7195 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7196 llvm::Value *Size = 7197 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7198 for (const Expr *SE : OAE->getDimensions()) { 7199 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7200 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7201 CGF.getContext().getSizeType(), 7202 SE->getExprLoc()); 7203 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7204 } 7205 return Size; 7206 } 7207 7208 // Reference types are ignored for mapping purposes. 7209 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7210 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7211 7212 // Given that an array section is considered a built-in type, we need to 7213 // do the calculation based on the length of the section instead of relying 7214 // on CGF.getTypeSize(E->getType()). 7215 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7216 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7217 OAE->getBase()->IgnoreParenImpCasts()) 7218 .getCanonicalType(); 7219 7220 // If there is no length associated with the expression and lower bound is 7221 // not specified too, that means we are using the whole length of the 7222 // base. 7223 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7224 !OAE->getLowerBound()) 7225 return CGF.getTypeSize(BaseTy); 7226 7227 llvm::Value *ElemSize; 7228 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7229 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7230 } else { 7231 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7232 assert(ATy && "Expecting array type if not a pointer type."); 7233 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7234 } 7235 7236 // If we don't have a length at this point, that is because we have an 7237 // array section with a single element. 7238 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7239 return ElemSize; 7240 7241 if (const Expr *LenExpr = OAE->getLength()) { 7242 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7243 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7244 CGF.getContext().getSizeType(), 7245 LenExpr->getExprLoc()); 7246 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7247 } 7248 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7249 OAE->getLowerBound() && "expected array_section[lb:]."); 7250 // Size = sizetype - lb * elemtype; 7251 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7252 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7253 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7254 CGF.getContext().getSizeType(), 7255 OAE->getLowerBound()->getExprLoc()); 7256 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7257 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7258 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7259 LengthVal = CGF.Builder.CreateSelect( 7260 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7261 return LengthVal; 7262 } 7263 return CGF.getTypeSize(ExprTy); 7264 } 7265 7266 /// Return the corresponding bits for a given map clause modifier. Add 7267 /// a flag marking the map as a pointer if requested. Add a flag marking the 7268 /// map as the first one of a series of maps that relate to the same map 7269 /// expression. 7270 OpenMPOffloadMappingFlags getMapTypeBits( 7271 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7272 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7273 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7274 OpenMPOffloadMappingFlags Bits = 7275 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7276 switch (MapType) { 7277 case OMPC_MAP_alloc: 7278 case OMPC_MAP_release: 7279 // alloc and release is the default behavior in the runtime library, i.e. 7280 // if we don't pass any bits alloc/release that is what the runtime is 7281 // going to do. Therefore, we don't need to signal anything for these two 7282 // type modifiers. 7283 break; 7284 case OMPC_MAP_to: 7285 Bits |= OMP_MAP_TO; 7286 break; 7287 case OMPC_MAP_from: 7288 Bits |= OMP_MAP_FROM; 7289 break; 7290 case OMPC_MAP_tofrom: 7291 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7292 break; 7293 case OMPC_MAP_delete: 7294 Bits |= OMP_MAP_DELETE; 7295 break; 7296 case OMPC_MAP_unknown: 7297 llvm_unreachable("Unexpected map type!"); 7298 } 7299 if (AddPtrFlag) 7300 Bits |= OMP_MAP_PTR_AND_OBJ; 7301 if (AddIsTargetParamFlag) 7302 Bits |= OMP_MAP_TARGET_PARAM; 7303 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7304 != MapModifiers.end()) 7305 Bits |= OMP_MAP_ALWAYS; 7306 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7307 != MapModifiers.end()) 7308 Bits |= OMP_MAP_CLOSE; 7309 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7310 != MapModifiers.end()) 7311 Bits |= OMP_MAP_PRESENT; 7312 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7313 != MotionModifiers.end()) 7314 Bits |= OMP_MAP_PRESENT; 7315 if (IsNonContiguous) 7316 Bits |= OMP_MAP_NON_CONTIG; 7317 return Bits; 7318 } 7319 7320 /// Return true if the provided expression is a final array section. A 7321 /// final array section, is one whose length can't be proved to be one. 7322 bool isFinalArraySectionExpression(const Expr *E) const { 7323 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7324 7325 // It is not an array section and therefore not a unity-size one. 7326 if (!OASE) 7327 return false; 7328 7329 // An array section with no colon always refer to a single element. 7330 if (OASE->getColonLocFirst().isInvalid()) 7331 return false; 7332 7333 const Expr *Length = OASE->getLength(); 7334 7335 // If we don't have a length we have to check if the array has size 1 7336 // for this dimension. Also, we should always expect a length if the 7337 // base type is pointer. 7338 if (!Length) { 7339 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7340 OASE->getBase()->IgnoreParenImpCasts()) 7341 .getCanonicalType(); 7342 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7343 return ATy->getSize().getSExtValue() != 1; 7344 // If we don't have a constant dimension length, we have to consider 7345 // the current section as having any size, so it is not necessarily 7346 // unitary. If it happen to be unity size, that's user fault. 7347 return true; 7348 } 7349 7350 // Check if the length evaluates to 1. 7351 Expr::EvalResult Result; 7352 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7353 return true; // Can have more that size 1. 7354 7355 llvm::APSInt ConstLength = Result.Val.getInt(); 7356 return ConstLength.getSExtValue() != 1; 7357 } 7358 7359 /// Generate the base pointers, section pointers, sizes, map type bits, and 7360 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7361 /// map type, map or motion modifiers, and expression components. 7362 /// \a IsFirstComponent should be set to true if the provided set of 7363 /// components is the first associated with a capture. 7364 void generateInfoForComponentList( 7365 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7366 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7367 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7368 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7369 bool IsFirstComponentList, bool IsImplicit, 7370 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7371 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7372 OverlappedElements = llvm::None) const { 7373 // The following summarizes what has to be generated for each map and the 7374 // types below. The generated information is expressed in this order: 7375 // base pointer, section pointer, size, flags 7376 // (to add to the ones that come from the map type and modifier). 7377 // 7378 // double d; 7379 // int i[100]; 7380 // float *p; 7381 // 7382 // struct S1 { 7383 // int i; 7384 // float f[50]; 7385 // } 7386 // struct S2 { 7387 // int i; 7388 // float f[50]; 7389 // S1 s; 7390 // double *p; 7391 // struct S2 *ps; 7392 // } 7393 // S2 s; 7394 // S2 *ps; 7395 // 7396 // map(d) 7397 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7398 // 7399 // map(i) 7400 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7401 // 7402 // map(i[1:23]) 7403 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7404 // 7405 // map(p) 7406 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7407 // 7408 // map(p[1:24]) 7409 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7410 // in unified shared memory mode or for local pointers 7411 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7412 // 7413 // map(s) 7414 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7415 // 7416 // map(s.i) 7417 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7418 // 7419 // map(s.s.f) 7420 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7421 // 7422 // map(s.p) 7423 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7424 // 7425 // map(to: s.p[:22]) 7426 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7427 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7428 // &(s.p), &(s.p[0]), 22*sizeof(double), 7429 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7430 // (*) alloc space for struct members, only this is a target parameter 7431 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7432 // optimizes this entry out, same in the examples below) 7433 // (***) map the pointee (map: to) 7434 // 7435 // map(s.ps) 7436 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7437 // 7438 // map(from: s.ps->s.i) 7439 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7440 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7441 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7442 // 7443 // map(to: s.ps->ps) 7444 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7445 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7446 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7447 // 7448 // map(s.ps->ps->ps) 7449 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7450 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7451 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7452 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7453 // 7454 // map(to: s.ps->ps->s.f[:22]) 7455 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7456 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7457 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7458 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7459 // 7460 // map(ps) 7461 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7462 // 7463 // map(ps->i) 7464 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7465 // 7466 // map(ps->s.f) 7467 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7468 // 7469 // map(from: ps->p) 7470 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7471 // 7472 // map(to: ps->p[:22]) 7473 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7474 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7475 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7476 // 7477 // map(ps->ps) 7478 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7479 // 7480 // map(from: ps->ps->s.i) 7481 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7482 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7483 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7484 // 7485 // map(from: ps->ps->ps) 7486 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7487 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7488 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7489 // 7490 // map(ps->ps->ps->ps) 7491 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7492 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7493 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7494 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7495 // 7496 // map(to: ps->ps->ps->s.f[:22]) 7497 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7498 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7499 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7500 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7501 // 7502 // map(to: s.f[:22]) map(from: s.p[:33]) 7503 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7504 // sizeof(double*) (**), TARGET_PARAM 7505 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7506 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7507 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7508 // (*) allocate contiguous space needed to fit all mapped members even if 7509 // we allocate space for members not mapped (in this example, 7510 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7511 // them as well because they fall between &s.f[0] and &s.p) 7512 // 7513 // map(from: s.f[:22]) map(to: ps->p[:33]) 7514 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7515 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7516 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7517 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7518 // (*) the struct this entry pertains to is the 2nd element in the list of 7519 // arguments, hence MEMBER_OF(2) 7520 // 7521 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7522 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7523 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7524 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7525 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7526 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7527 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7528 // (*) the struct this entry pertains to is the 4th element in the list 7529 // of arguments, hence MEMBER_OF(4) 7530 7531 // Track if the map information being generated is the first for a capture. 7532 bool IsCaptureFirstInfo = IsFirstComponentList; 7533 // When the variable is on a declare target link or in a to clause with 7534 // unified memory, a reference is needed to hold the host/device address 7535 // of the variable. 7536 bool RequiresReference = false; 7537 7538 // Scan the components from the base to the complete expression. 7539 auto CI = Components.rbegin(); 7540 auto CE = Components.rend(); 7541 auto I = CI; 7542 7543 // Track if the map information being generated is the first for a list of 7544 // components. 7545 bool IsExpressionFirstInfo = true; 7546 bool FirstPointerInComplexData = false; 7547 Address BP = Address::invalid(); 7548 const Expr *AssocExpr = I->getAssociatedExpression(); 7549 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7550 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7551 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7552 7553 if (isa<MemberExpr>(AssocExpr)) { 7554 // The base is the 'this' pointer. The content of the pointer is going 7555 // to be the base of the field being mapped. 7556 BP = CGF.LoadCXXThisAddress(); 7557 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7558 (OASE && 7559 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7560 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7561 } else if (OAShE && 7562 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7563 BP = Address( 7564 CGF.EmitScalarExpr(OAShE->getBase()), 7565 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7566 } else { 7567 // The base is the reference to the variable. 7568 // BP = &Var. 7569 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7570 if (const auto *VD = 7571 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7572 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7573 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7574 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7575 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7576 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7577 RequiresReference = true; 7578 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7579 } 7580 } 7581 } 7582 7583 // If the variable is a pointer and is being dereferenced (i.e. is not 7584 // the last component), the base has to be the pointer itself, not its 7585 // reference. References are ignored for mapping purposes. 7586 QualType Ty = 7587 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7588 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7589 // No need to generate individual map information for the pointer, it 7590 // can be associated with the combined storage if shared memory mode is 7591 // active or the base declaration is not global variable. 7592 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7593 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7594 !VD || VD->hasLocalStorage()) 7595 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7596 else 7597 FirstPointerInComplexData = true; 7598 ++I; 7599 } 7600 } 7601 7602 // Track whether a component of the list should be marked as MEMBER_OF some 7603 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7604 // in a component list should be marked as MEMBER_OF, all subsequent entries 7605 // do not belong to the base struct. E.g. 7606 // struct S2 s; 7607 // s.ps->ps->ps->f[:] 7608 // (1) (2) (3) (4) 7609 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7610 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7611 // is the pointee of ps(2) which is not member of struct s, so it should not 7612 // be marked as such (it is still PTR_AND_OBJ). 7613 // The variable is initialized to false so that PTR_AND_OBJ entries which 7614 // are not struct members are not considered (e.g. array of pointers to 7615 // data). 7616 bool ShouldBeMemberOf = false; 7617 7618 // Variable keeping track of whether or not we have encountered a component 7619 // in the component list which is a member expression. Useful when we have a 7620 // pointer or a final array section, in which case it is the previous 7621 // component in the list which tells us whether we have a member expression. 7622 // E.g. X.f[:] 7623 // While processing the final array section "[:]" it is "f" which tells us 7624 // whether we are dealing with a member of a declared struct. 7625 const MemberExpr *EncounteredME = nullptr; 7626 7627 // Track for the total number of dimension. Start from one for the dummy 7628 // dimension. 7629 uint64_t DimSize = 1; 7630 7631 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7632 7633 for (; I != CE; ++I) { 7634 // If the current component is member of a struct (parent struct) mark it. 7635 if (!EncounteredME) { 7636 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7637 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7638 // as MEMBER_OF the parent struct. 7639 if (EncounteredME) { 7640 ShouldBeMemberOf = true; 7641 // Do not emit as complex pointer if this is actually not array-like 7642 // expression. 7643 if (FirstPointerInComplexData) { 7644 QualType Ty = std::prev(I) 7645 ->getAssociatedDeclaration() 7646 ->getType() 7647 .getNonReferenceType(); 7648 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7649 FirstPointerInComplexData = false; 7650 } 7651 } 7652 } 7653 7654 auto Next = std::next(I); 7655 7656 // We need to generate the addresses and sizes if this is the last 7657 // component, if the component is a pointer or if it is an array section 7658 // whose length can't be proved to be one. If this is a pointer, it 7659 // becomes the base address for the following components. 7660 7661 // A final array section, is one whose length can't be proved to be one. 7662 // If the map item is non-contiguous then we don't treat any array section 7663 // as final array section. 7664 bool IsFinalArraySection = 7665 !IsNonContiguous && 7666 isFinalArraySectionExpression(I->getAssociatedExpression()); 7667 7668 // Get information on whether the element is a pointer. Have to do a 7669 // special treatment for array sections given that they are built-in 7670 // types. 7671 const auto *OASE = 7672 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7673 const auto *OAShE = 7674 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7675 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7676 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7677 bool IsPointer = 7678 OAShE || 7679 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7680 .getCanonicalType() 7681 ->isAnyPointerType()) || 7682 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7683 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7684 7685 if (OASE) 7686 ++DimSize; 7687 7688 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7689 // If this is not the last component, we expect the pointer to be 7690 // associated with an array expression or member expression. 7691 assert((Next == CE || 7692 isa<MemberExpr>(Next->getAssociatedExpression()) || 7693 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7694 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7695 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7696 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7697 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7698 "Unexpected expression"); 7699 7700 Address LB = Address::invalid(); 7701 if (OAShE) { 7702 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7703 CGF.getContext().getTypeAlignInChars( 7704 OAShE->getBase()->getType())); 7705 } else { 7706 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7707 .getAddress(CGF); 7708 } 7709 7710 // If this component is a pointer inside the base struct then we don't 7711 // need to create any entry for it - it will be combined with the object 7712 // it is pointing to into a single PTR_AND_OBJ entry. 7713 bool IsMemberPointerOrAddr = 7714 (IsPointer || ForDeviceAddr) && EncounteredME && 7715 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7716 EncounteredME); 7717 if (!OverlappedElements.empty()) { 7718 // Handle base element with the info for overlapped elements. 7719 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7720 assert(Next == CE && 7721 "Expected last element for the overlapped elements."); 7722 assert(!IsPointer && 7723 "Unexpected base element with the pointer type."); 7724 // Mark the whole struct as the struct that requires allocation on the 7725 // device. 7726 PartialStruct.LowestElem = {0, LB}; 7727 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7728 I->getAssociatedExpression()->getType()); 7729 Address HB = CGF.Builder.CreateConstGEP( 7730 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7731 CGF.VoidPtrTy), 7732 TypeSize.getQuantity() - 1); 7733 PartialStruct.HighestElem = { 7734 std::numeric_limits<decltype( 7735 PartialStruct.HighestElem.first)>::max(), 7736 HB}; 7737 PartialStruct.Base = BP; 7738 // Emit data for non-overlapped data. 7739 OpenMPOffloadMappingFlags Flags = 7740 OMP_MAP_MEMBER_OF | 7741 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7742 /*AddPtrFlag=*/false, 7743 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7744 LB = BP; 7745 llvm::Value *Size = nullptr; 7746 // Do bitcopy of all non-overlapped structure elements. 7747 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7748 Component : OverlappedElements) { 7749 Address ComponentLB = Address::invalid(); 7750 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7751 Component) { 7752 if (MC.getAssociatedDeclaration()) { 7753 ComponentLB = 7754 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7755 .getAddress(CGF); 7756 Size = CGF.Builder.CreatePtrDiff( 7757 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7758 CGF.EmitCastToVoidPtr(LB.getPointer())); 7759 break; 7760 } 7761 } 7762 assert(Size && "Failed to determine structure size"); 7763 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7764 CombinedInfo.Pointers.push_back(LB.getPointer()); 7765 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7766 Size, CGF.Int64Ty, /*isSigned=*/true)); 7767 CombinedInfo.Types.push_back(Flags); 7768 CombinedInfo.Mappers.push_back(nullptr); 7769 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7770 : 1); 7771 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7772 } 7773 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7774 CombinedInfo.Pointers.push_back(LB.getPointer()); 7775 Size = CGF.Builder.CreatePtrDiff( 7776 CGF.EmitCastToVoidPtr( 7777 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7778 CGF.EmitCastToVoidPtr(LB.getPointer())); 7779 CombinedInfo.Sizes.push_back( 7780 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7781 CombinedInfo.Types.push_back(Flags); 7782 CombinedInfo.Mappers.push_back(nullptr); 7783 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7784 : 1); 7785 break; 7786 } 7787 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7788 if (!IsMemberPointerOrAddr) { 7789 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7790 CombinedInfo.Pointers.push_back(LB.getPointer()); 7791 CombinedInfo.Sizes.push_back( 7792 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7793 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7794 : 1); 7795 7796 // If Mapper is valid, the last component inherits the mapper. 7797 bool HasMapper = Mapper && Next == CE; 7798 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7799 7800 // We need to add a pointer flag for each map that comes from the 7801 // same expression except for the first one. We also need to signal 7802 // this map is the first one that relates with the current capture 7803 // (there is a set of entries for each capture). 7804 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7805 MapType, MapModifiers, MotionModifiers, IsImplicit, 7806 !IsExpressionFirstInfo || RequiresReference || 7807 FirstPointerInComplexData, 7808 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7809 7810 if (!IsExpressionFirstInfo) { 7811 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7812 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7813 if (IsPointer) 7814 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7815 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7816 7817 if (ShouldBeMemberOf) { 7818 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7819 // should be later updated with the correct value of MEMBER_OF. 7820 Flags |= OMP_MAP_MEMBER_OF; 7821 // From now on, all subsequent PTR_AND_OBJ entries should not be 7822 // marked as MEMBER_OF. 7823 ShouldBeMemberOf = false; 7824 } 7825 } 7826 7827 CombinedInfo.Types.push_back(Flags); 7828 } 7829 7830 // If we have encountered a member expression so far, keep track of the 7831 // mapped member. If the parent is "*this", then the value declaration 7832 // is nullptr. 7833 if (EncounteredME) { 7834 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7835 unsigned FieldIndex = FD->getFieldIndex(); 7836 7837 // Update info about the lowest and highest elements for this struct 7838 if (!PartialStruct.Base.isValid()) { 7839 PartialStruct.LowestElem = {FieldIndex, LB}; 7840 if (IsFinalArraySection) { 7841 Address HB = 7842 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7843 .getAddress(CGF); 7844 PartialStruct.HighestElem = {FieldIndex, HB}; 7845 } else { 7846 PartialStruct.HighestElem = {FieldIndex, LB}; 7847 } 7848 PartialStruct.Base = BP; 7849 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7850 PartialStruct.LowestElem = {FieldIndex, LB}; 7851 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7852 PartialStruct.HighestElem = {FieldIndex, LB}; 7853 } 7854 } 7855 7856 // If we have a final array section, we are done with this expression. 7857 if (IsFinalArraySection) 7858 break; 7859 7860 // The pointer becomes the base for the next element. 7861 if (Next != CE) 7862 BP = LB; 7863 7864 IsExpressionFirstInfo = false; 7865 IsCaptureFirstInfo = false; 7866 FirstPointerInComplexData = false; 7867 } 7868 } 7869 7870 if (!IsNonContiguous) 7871 return; 7872 7873 const ASTContext &Context = CGF.getContext(); 7874 7875 // For supporting stride in array section, we need to initialize the first 7876 // dimension size as 1, first offset as 0, and first count as 1 7877 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7878 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7879 MapValuesArrayTy CurStrides; 7880 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7881 uint64_t ElementTypeSize; 7882 7883 // Collect Size information for each dimension and get the element size as 7884 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7885 // should be [10, 10] and the first stride is 4 btyes. 7886 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7887 Components) { 7888 const Expr *AssocExpr = Component.getAssociatedExpression(); 7889 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7890 7891 if (!OASE) 7892 continue; 7893 7894 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7895 auto *CAT = Context.getAsConstantArrayType(Ty); 7896 auto *VAT = Context.getAsVariableArrayType(Ty); 7897 7898 // We need all the dimension size except for the last dimension. 7899 assert((VAT || CAT || &Component == &*Components.begin()) && 7900 "Should be either ConstantArray or VariableArray if not the " 7901 "first Component"); 7902 7903 // Get element size if CurStrides is empty. 7904 if (CurStrides.empty()) { 7905 const Type *ElementType = nullptr; 7906 if (CAT) 7907 ElementType = CAT->getElementType().getTypePtr(); 7908 else if (VAT) 7909 ElementType = VAT->getElementType().getTypePtr(); 7910 else 7911 assert(&Component == &*Components.begin() && 7912 "Only expect pointer (non CAT or VAT) when this is the " 7913 "first Component"); 7914 // If ElementType is null, then it means the base is a pointer 7915 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7916 // for next iteration. 7917 if (ElementType) { 7918 // For the case that having pointer as base, we need to remove one 7919 // level of indirection. 7920 if (&Component != &*Components.begin()) 7921 ElementType = ElementType->getPointeeOrArrayElementType(); 7922 ElementTypeSize = 7923 Context.getTypeSizeInChars(ElementType).getQuantity(); 7924 CurStrides.push_back( 7925 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7926 } 7927 } 7928 // Get dimension value except for the last dimension since we don't need 7929 // it. 7930 if (DimSizes.size() < Components.size() - 1) { 7931 if (CAT) 7932 DimSizes.push_back(llvm::ConstantInt::get( 7933 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7934 else if (VAT) 7935 DimSizes.push_back(CGF.Builder.CreateIntCast( 7936 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7937 /*IsSigned=*/false)); 7938 } 7939 } 7940 7941 // Skip the dummy dimension since we have already have its information. 7942 auto DI = DimSizes.begin() + 1; 7943 // Product of dimension. 7944 llvm::Value *DimProd = 7945 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7946 7947 // Collect info for non-contiguous. Notice that offset, count, and stride 7948 // are only meaningful for array-section, so we insert a null for anything 7949 // other than array-section. 7950 // Also, the size of offset, count, and stride are not the same as 7951 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7952 // count, and stride are the same as the number of non-contiguous 7953 // declaration in target update to/from clause. 7954 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7955 Components) { 7956 const Expr *AssocExpr = Component.getAssociatedExpression(); 7957 7958 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 7959 llvm::Value *Offset = CGF.Builder.CreateIntCast( 7960 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 7961 /*isSigned=*/false); 7962 CurOffsets.push_back(Offset); 7963 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 7964 CurStrides.push_back(CurStrides.back()); 7965 continue; 7966 } 7967 7968 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7969 7970 if (!OASE) 7971 continue; 7972 7973 // Offset 7974 const Expr *OffsetExpr = OASE->getLowerBound(); 7975 llvm::Value *Offset = nullptr; 7976 if (!OffsetExpr) { 7977 // If offset is absent, then we just set it to zero. 7978 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 7979 } else { 7980 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 7981 CGF.Int64Ty, 7982 /*isSigned=*/false); 7983 } 7984 CurOffsets.push_back(Offset); 7985 7986 // Count 7987 const Expr *CountExpr = OASE->getLength(); 7988 llvm::Value *Count = nullptr; 7989 if (!CountExpr) { 7990 // In Clang, once a high dimension is an array section, we construct all 7991 // the lower dimension as array section, however, for case like 7992 // arr[0:2][2], Clang construct the inner dimension as an array section 7993 // but it actually is not in an array section form according to spec. 7994 if (!OASE->getColonLocFirst().isValid() && 7995 !OASE->getColonLocSecond().isValid()) { 7996 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 7997 } else { 7998 // OpenMP 5.0, 2.1.5 Array Sections, Description. 7999 // When the length is absent it defaults to ⌈(size − 8000 // lower-bound)/stride⌉, where size is the size of the array 8001 // dimension. 8002 const Expr *StrideExpr = OASE->getStride(); 8003 llvm::Value *Stride = 8004 StrideExpr 8005 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8006 CGF.Int64Ty, /*isSigned=*/false) 8007 : nullptr; 8008 if (Stride) 8009 Count = CGF.Builder.CreateUDiv( 8010 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8011 else 8012 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8013 } 8014 } else { 8015 Count = CGF.EmitScalarExpr(CountExpr); 8016 } 8017 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8018 CurCounts.push_back(Count); 8019 8020 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8021 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8022 // Offset Count Stride 8023 // D0 0 1 4 (int) <- dummy dimension 8024 // D1 0 2 8 (2 * (1) * 4) 8025 // D2 1 2 20 (1 * (1 * 5) * 4) 8026 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8027 const Expr *StrideExpr = OASE->getStride(); 8028 llvm::Value *Stride = 8029 StrideExpr 8030 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8031 CGF.Int64Ty, /*isSigned=*/false) 8032 : nullptr; 8033 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8034 if (Stride) 8035 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8036 else 8037 CurStrides.push_back(DimProd); 8038 if (DI != DimSizes.end()) 8039 ++DI; 8040 } 8041 8042 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8043 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8044 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8045 } 8046 8047 /// Return the adjusted map modifiers if the declaration a capture refers to 8048 /// appears in a first-private clause. This is expected to be used only with 8049 /// directives that start with 'target'. 8050 MappableExprsHandler::OpenMPOffloadMappingFlags 8051 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8052 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8053 8054 // A first private variable captured by reference will use only the 8055 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8056 // declaration is known as first-private in this handler. 8057 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8058 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8059 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8060 return MappableExprsHandler::OMP_MAP_ALWAYS | 8061 MappableExprsHandler::OMP_MAP_TO; 8062 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8063 return MappableExprsHandler::OMP_MAP_TO | 8064 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8065 return MappableExprsHandler::OMP_MAP_PRIVATE | 8066 MappableExprsHandler::OMP_MAP_TO; 8067 } 8068 return MappableExprsHandler::OMP_MAP_TO | 8069 MappableExprsHandler::OMP_MAP_FROM; 8070 } 8071 8072 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8073 // Rotate by getFlagMemberOffset() bits. 8074 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8075 << getFlagMemberOffset()); 8076 } 8077 8078 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8079 OpenMPOffloadMappingFlags MemberOfFlag) { 8080 // If the entry is PTR_AND_OBJ but has not been marked with the special 8081 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8082 // marked as MEMBER_OF. 8083 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8084 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8085 return; 8086 8087 // Reset the placeholder value to prepare the flag for the assignment of the 8088 // proper MEMBER_OF value. 8089 Flags &= ~OMP_MAP_MEMBER_OF; 8090 Flags |= MemberOfFlag; 8091 } 8092 8093 void getPlainLayout(const CXXRecordDecl *RD, 8094 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8095 bool AsBase) const { 8096 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8097 8098 llvm::StructType *St = 8099 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8100 8101 unsigned NumElements = St->getNumElements(); 8102 llvm::SmallVector< 8103 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8104 RecordLayout(NumElements); 8105 8106 // Fill bases. 8107 for (const auto &I : RD->bases()) { 8108 if (I.isVirtual()) 8109 continue; 8110 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8111 // Ignore empty bases. 8112 if (Base->isEmpty() || CGF.getContext() 8113 .getASTRecordLayout(Base) 8114 .getNonVirtualSize() 8115 .isZero()) 8116 continue; 8117 8118 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8119 RecordLayout[FieldIndex] = Base; 8120 } 8121 // Fill in virtual bases. 8122 for (const auto &I : RD->vbases()) { 8123 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8124 // Ignore empty bases. 8125 if (Base->isEmpty()) 8126 continue; 8127 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8128 if (RecordLayout[FieldIndex]) 8129 continue; 8130 RecordLayout[FieldIndex] = Base; 8131 } 8132 // Fill in all the fields. 8133 assert(!RD->isUnion() && "Unexpected union."); 8134 for (const auto *Field : RD->fields()) { 8135 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8136 // will fill in later.) 8137 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8138 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8139 RecordLayout[FieldIndex] = Field; 8140 } 8141 } 8142 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8143 &Data : RecordLayout) { 8144 if (Data.isNull()) 8145 continue; 8146 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8147 getPlainLayout(Base, Layout, /*AsBase=*/true); 8148 else 8149 Layout.push_back(Data.get<const FieldDecl *>()); 8150 } 8151 } 8152 8153 public: 8154 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8155 : CurDir(&Dir), CGF(CGF) { 8156 // Extract firstprivate clause information. 8157 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8158 for (const auto *D : C->varlists()) 8159 FirstPrivateDecls.try_emplace( 8160 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8161 // Extract implicit firstprivates from uses_allocators clauses. 8162 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8163 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8164 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8165 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8166 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8167 /*Implicit=*/true); 8168 else if (const auto *VD = dyn_cast<VarDecl>( 8169 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8170 ->getDecl())) 8171 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8172 } 8173 } 8174 // Extract device pointer clause information. 8175 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8176 for (auto L : C->component_lists()) 8177 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8178 } 8179 8180 /// Constructor for the declare mapper directive. 8181 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8182 : CurDir(&Dir), CGF(CGF) {} 8183 8184 /// Generate code for the combined entry if we have a partially mapped struct 8185 /// and take care of the mapping flags of the arguments corresponding to 8186 /// individual struct members. 8187 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8188 MapFlagsArrayTy &CurTypes, 8189 const StructRangeInfoTy &PartialStruct, 8190 bool NotTargetParams = false) const { 8191 // Base is the base of the struct 8192 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8193 // Pointer is the address of the lowest element 8194 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8195 CombinedInfo.Pointers.push_back(LB); 8196 // There should not be a mapper for a combined entry. 8197 CombinedInfo.Mappers.push_back(nullptr); 8198 // Size is (addr of {highest+1} element) - (addr of lowest element) 8199 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8200 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8201 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8202 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8203 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8204 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8205 /*isSigned=*/false); 8206 CombinedInfo.Sizes.push_back(Size); 8207 // Map type is always TARGET_PARAM, if generate info for captures. 8208 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8209 : OMP_MAP_TARGET_PARAM); 8210 // If any element has the present modifier, then make sure the runtime 8211 // doesn't attempt to allocate the struct. 8212 if (CurTypes.end() != 8213 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8214 return Type & OMP_MAP_PRESENT; 8215 })) 8216 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8217 // Remove TARGET_PARAM flag from the first element 8218 CurTypes.front() &= ~OMP_MAP_TARGET_PARAM; 8219 8220 // All other current entries will be MEMBER_OF the combined entry 8221 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8222 // 0xFFFF in the MEMBER_OF field). 8223 OpenMPOffloadMappingFlags MemberOfFlag = 8224 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8225 for (auto &M : CurTypes) 8226 setCorrectMemberOfFlag(M, MemberOfFlag); 8227 } 8228 8229 /// Generate all the base pointers, section pointers, sizes, map types, and 8230 /// mappers for the extracted mappable expressions (all included in \a 8231 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8232 /// pair of the relevant declaration and index where it occurs is appended to 8233 /// the device pointers info array. 8234 void generateAllInfo( 8235 MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false, 8236 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8237 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8238 // We have to process the component lists that relate with the same 8239 // declaration in a single chunk so that we can generate the map flags 8240 // correctly. Therefore, we organize all lists in a map. 8241 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8242 8243 // Helper function to fill the information map for the different supported 8244 // clauses. 8245 auto &&InfoGen = 8246 [&Info, &SkipVarSet]( 8247 const ValueDecl *D, 8248 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8249 OpenMPMapClauseKind MapType, 8250 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8251 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8252 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8253 bool ForDeviceAddr = false) { 8254 const ValueDecl *VD = 8255 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8256 if (SkipVarSet.count(VD)) 8257 return; 8258 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 8259 ReturnDevicePointer, IsImplicit, Mapper, 8260 ForDeviceAddr); 8261 }; 8262 8263 assert(CurDir.is<const OMPExecutableDirective *>() && 8264 "Expect a executable directive"); 8265 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8266 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8267 for (const auto L : C->component_lists()) { 8268 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8269 C->getMapTypeModifiers(), llvm::None, 8270 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); 8271 } 8272 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8273 for (const auto L : C->component_lists()) { 8274 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8275 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8276 C->isImplicit(), std::get<2>(L)); 8277 } 8278 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8279 for (const auto L : C->component_lists()) { 8280 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8281 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8282 C->isImplicit(), std::get<2>(L)); 8283 } 8284 8285 // Look at the use_device_ptr clause information and mark the existing map 8286 // entries as such. If there is no map information for an entry in the 8287 // use_device_ptr list, we create one with map type 'alloc' and zero size 8288 // section. It is the user fault if that was not mapped before. If there is 8289 // no map information and the pointer is a struct member, then we defer the 8290 // emission of that entry until the whole struct has been processed. 8291 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8292 DeferredInfo; 8293 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8294 8295 for (const auto *C : 8296 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8297 for (const auto L : C->component_lists()) { 8298 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8299 std::get<1>(L); 8300 assert(!Components.empty() && 8301 "Not expecting empty list of components!"); 8302 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8303 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8304 const Expr *IE = Components.back().getAssociatedExpression(); 8305 // If the first component is a member expression, we have to look into 8306 // 'this', which maps to null in the map of map information. Otherwise 8307 // look directly for the information. 8308 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8309 8310 // We potentially have map information for this declaration already. 8311 // Look for the first set of components that refer to it. 8312 if (It != Info.end()) { 8313 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8314 return MI.Components.back().getAssociatedDeclaration() == VD; 8315 }); 8316 // If we found a map entry, signal that the pointer has to be returned 8317 // and move on to the next declaration. 8318 // Exclude cases where the base pointer is mapped as array subscript, 8319 // array section or array shaping. The base address is passed as a 8320 // pointer to base in this case and cannot be used as a base for 8321 // use_device_ptr list item. 8322 if (CI != It->second.end()) { 8323 auto PrevCI = std::next(CI->Components.rbegin()); 8324 const auto *VarD = dyn_cast<VarDecl>(VD); 8325 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8326 isa<MemberExpr>(IE) || 8327 !VD->getType().getNonReferenceType()->isPointerType() || 8328 PrevCI == CI->Components.rend() || 8329 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8330 VarD->hasLocalStorage()) { 8331 CI->ReturnDevicePointer = true; 8332 continue; 8333 } 8334 } 8335 } 8336 8337 // We didn't find any match in our map information - generate a zero 8338 // size array section - if the pointer is a struct member we defer this 8339 // action until the whole struct has been processed. 8340 if (isa<MemberExpr>(IE)) { 8341 // Insert the pointer into Info to be processed by 8342 // generateInfoForComponentList. Because it is a member pointer 8343 // without a pointee, no entry will be generated for it, therefore 8344 // we need to generate one after the whole struct has been processed. 8345 // Nonetheless, generateInfoForComponentList must be called to take 8346 // the pointer into account for the calculation of the range of the 8347 // partial struct. 8348 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8349 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8350 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8351 } else { 8352 llvm::Value *Ptr = 8353 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8354 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8355 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8356 UseDevicePtrCombinedInfo.Sizes.push_back( 8357 llvm::Constant::getNullValue(CGF.Int64Ty)); 8358 UseDevicePtrCombinedInfo.Types.push_back( 8359 OMP_MAP_RETURN_PARAM | 8360 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8361 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8362 } 8363 } 8364 } 8365 8366 // Look at the use_device_addr clause information and mark the existing map 8367 // entries as such. If there is no map information for an entry in the 8368 // use_device_addr list, we create one with map type 'alloc' and zero size 8369 // section. It is the user fault if that was not mapped before. If there is 8370 // no map information and the pointer is a struct member, then we defer the 8371 // emission of that entry until the whole struct has been processed. 8372 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8373 for (const auto *C : 8374 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8375 for (const auto L : C->component_lists()) { 8376 assert(!std::get<1>(L).empty() && 8377 "Not expecting empty list of components!"); 8378 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8379 if (!Processed.insert(VD).second) 8380 continue; 8381 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8382 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8383 // If the first component is a member expression, we have to look into 8384 // 'this', which maps to null in the map of map information. Otherwise 8385 // look directly for the information. 8386 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8387 8388 // We potentially have map information for this declaration already. 8389 // Look for the first set of components that refer to it. 8390 if (It != Info.end()) { 8391 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8392 return MI.Components.back().getAssociatedDeclaration() == VD; 8393 }); 8394 // If we found a map entry, signal that the pointer has to be returned 8395 // and move on to the next declaration. 8396 if (CI != It->second.end()) { 8397 CI->ReturnDevicePointer = true; 8398 continue; 8399 } 8400 } 8401 8402 // We didn't find any match in our map information - generate a zero 8403 // size array section - if the pointer is a struct member we defer this 8404 // action until the whole struct has been processed. 8405 if (isa<MemberExpr>(IE)) { 8406 // Insert the pointer into Info to be processed by 8407 // generateInfoForComponentList. Because it is a member pointer 8408 // without a pointee, no entry will be generated for it, therefore 8409 // we need to generate one after the whole struct has been processed. 8410 // Nonetheless, generateInfoForComponentList must be called to take 8411 // the pointer into account for the calculation of the range of the 8412 // partial struct. 8413 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8414 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8415 nullptr, /*ForDeviceAddr=*/true); 8416 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8417 } else { 8418 llvm::Value *Ptr; 8419 if (IE->isGLValue()) 8420 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8421 else 8422 Ptr = CGF.EmitScalarExpr(IE); 8423 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8424 CombinedInfo.Pointers.push_back(Ptr); 8425 CombinedInfo.Sizes.push_back( 8426 llvm::Constant::getNullValue(CGF.Int64Ty)); 8427 CombinedInfo.Types.push_back( 8428 OMP_MAP_RETURN_PARAM | 8429 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8430 CombinedInfo.Mappers.push_back(nullptr); 8431 } 8432 } 8433 } 8434 8435 for (const auto &M : Info) { 8436 // We need to know when we generate information for the first component 8437 // associated with a capture, because the mapping flags depend on it. 8438 bool IsFirstComponentList = !NotTargetParams; 8439 8440 // Temporary generated information. 8441 MapCombinedInfoTy CurInfo; 8442 StructRangeInfoTy PartialStruct; 8443 8444 for (const MapInfo &L : M.second) { 8445 assert(!L.Components.empty() && 8446 "Not expecting declaration with no component lists."); 8447 8448 // Remember the current base pointer index. 8449 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8450 CurInfo.NonContigInfo.IsNonContiguous = 8451 L.Components.back().isNonContiguous(); 8452 generateInfoForComponentList(L.MapType, L.MapModifiers, 8453 L.MotionModifiers, L.Components, CurInfo, 8454 PartialStruct, IsFirstComponentList, 8455 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8456 8457 // If this entry relates with a device pointer, set the relevant 8458 // declaration and add the 'return pointer' flag. 8459 if (L.ReturnDevicePointer) { 8460 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8461 "Unexpected number of mapped base pointers."); 8462 8463 const ValueDecl *RelevantVD = 8464 L.Components.back().getAssociatedDeclaration(); 8465 assert(RelevantVD && 8466 "No relevant declaration related with device pointer??"); 8467 8468 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8469 RelevantVD); 8470 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8471 } 8472 IsFirstComponentList = false; 8473 } 8474 8475 // Append any pending zero-length pointers which are struct members and 8476 // used with use_device_ptr or use_device_addr. 8477 auto CI = DeferredInfo.find(M.first); 8478 if (CI != DeferredInfo.end()) { 8479 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8480 llvm::Value *BasePtr; 8481 llvm::Value *Ptr; 8482 if (L.ForDeviceAddr) { 8483 if (L.IE->isGLValue()) 8484 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8485 else 8486 Ptr = this->CGF.EmitScalarExpr(L.IE); 8487 BasePtr = Ptr; 8488 // Entry is RETURN_PARAM. Also, set the placeholder value 8489 // MEMBER_OF=FFFF so that the entry is later updated with the 8490 // correct value of MEMBER_OF. 8491 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8492 } else { 8493 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8494 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8495 L.IE->getExprLoc()); 8496 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8497 // value MEMBER_OF=FFFF so that the entry is later updated with the 8498 // correct value of MEMBER_OF. 8499 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8500 OMP_MAP_MEMBER_OF); 8501 } 8502 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8503 CurInfo.Pointers.push_back(Ptr); 8504 CurInfo.Sizes.push_back( 8505 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8506 CurInfo.Mappers.push_back(nullptr); 8507 } 8508 } 8509 8510 // If there is an entry in PartialStruct it means we have a struct with 8511 // individual members mapped. Emit an extra combined entry. 8512 if (PartialStruct.Base.isValid()) 8513 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, 8514 NotTargetParams); 8515 8516 // We need to append the results of this capture to what we already have. 8517 CombinedInfo.append(CurInfo); 8518 } 8519 // Append data for use_device_ptr clauses. 8520 CombinedInfo.append(UseDevicePtrCombinedInfo); 8521 } 8522 8523 /// Generate all the base pointers, section pointers, sizes, map types, and 8524 /// mappers for the extracted map clauses of user-defined mapper (all included 8525 /// in \a CombinedInfo). 8526 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8527 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8528 "Expect a declare mapper directive"); 8529 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8530 // We have to process the component lists that relate with the same 8531 // declaration in a single chunk so that we can generate the map flags 8532 // correctly. Therefore, we organize all lists in a map. 8533 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8534 8535 // Fill the information map for map clauses. 8536 for (const auto *C : CurMapperDir->clauselists()) { 8537 const auto *MC = cast<OMPMapClause>(C); 8538 for (const auto L : MC->component_lists()) { 8539 const ValueDecl *VD = 8540 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8541 : nullptr; 8542 // Get the corresponding user-defined mapper. 8543 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8544 MC->getMapTypeModifiers(), llvm::None, 8545 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8546 std::get<2>(L)); 8547 } 8548 } 8549 8550 for (const auto &M : Info) { 8551 // We need to know when we generate information for the first component 8552 // associated with a capture, because the mapping flags depend on it. 8553 bool IsFirstComponentList = true; 8554 8555 // Temporary generated information. 8556 MapCombinedInfoTy CurInfo; 8557 StructRangeInfoTy PartialStruct; 8558 8559 for (const MapInfo &L : M.second) { 8560 assert(!L.Components.empty() && 8561 "Not expecting declaration with no component lists."); 8562 generateInfoForComponentList(L.MapType, L.MapModifiers, 8563 L.MotionModifiers, L.Components, CurInfo, 8564 PartialStruct, IsFirstComponentList, 8565 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8566 IsFirstComponentList = false; 8567 } 8568 8569 // If there is an entry in PartialStruct it means we have a struct with 8570 // individual members mapped. Emit an extra combined entry. 8571 if (PartialStruct.Base.isValid()) { 8572 CurInfo.NonContigInfo.Dims.push_back(0); 8573 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8574 } 8575 8576 // We need to append the results of this capture to what we already have. 8577 CombinedInfo.append(CurInfo); 8578 } 8579 } 8580 8581 /// Emit capture info for lambdas for variables captured by reference. 8582 void generateInfoForLambdaCaptures( 8583 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8584 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8585 const auto *RD = VD->getType() 8586 .getCanonicalType() 8587 .getNonReferenceType() 8588 ->getAsCXXRecordDecl(); 8589 if (!RD || !RD->isLambda()) 8590 return; 8591 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8592 LValue VDLVal = CGF.MakeAddrLValue( 8593 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8594 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8595 FieldDecl *ThisCapture = nullptr; 8596 RD->getCaptureFields(Captures, ThisCapture); 8597 if (ThisCapture) { 8598 LValue ThisLVal = 8599 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8600 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8601 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8602 VDLVal.getPointer(CGF)); 8603 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8604 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8605 CombinedInfo.Sizes.push_back( 8606 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8607 CGF.Int64Ty, /*isSigned=*/true)); 8608 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8609 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8610 CombinedInfo.Mappers.push_back(nullptr); 8611 } 8612 for (const LambdaCapture &LC : RD->captures()) { 8613 if (!LC.capturesVariable()) 8614 continue; 8615 const VarDecl *VD = LC.getCapturedVar(); 8616 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8617 continue; 8618 auto It = Captures.find(VD); 8619 assert(It != Captures.end() && "Found lambda capture without field."); 8620 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8621 if (LC.getCaptureKind() == LCK_ByRef) { 8622 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8623 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8624 VDLVal.getPointer(CGF)); 8625 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8626 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8627 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8628 CGF.getTypeSize( 8629 VD->getType().getCanonicalType().getNonReferenceType()), 8630 CGF.Int64Ty, /*isSigned=*/true)); 8631 } else { 8632 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8633 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8634 VDLVal.getPointer(CGF)); 8635 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8636 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8637 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8638 } 8639 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8640 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8641 CombinedInfo.Mappers.push_back(nullptr); 8642 } 8643 } 8644 8645 /// Set correct indices for lambdas captures. 8646 void adjustMemberOfForLambdaCaptures( 8647 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8648 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8649 MapFlagsArrayTy &Types) const { 8650 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8651 // Set correct member_of idx for all implicit lambda captures. 8652 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8653 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8654 continue; 8655 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8656 assert(BasePtr && "Unable to find base lambda address."); 8657 int TgtIdx = -1; 8658 for (unsigned J = I; J > 0; --J) { 8659 unsigned Idx = J - 1; 8660 if (Pointers[Idx] != BasePtr) 8661 continue; 8662 TgtIdx = Idx; 8663 break; 8664 } 8665 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8666 // All other current entries will be MEMBER_OF the combined entry 8667 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8668 // 0xFFFF in the MEMBER_OF field). 8669 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8670 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8671 } 8672 } 8673 8674 /// Generate the base pointers, section pointers, sizes, map types, and 8675 /// mappers associated to a given capture (all included in \a CombinedInfo). 8676 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8677 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8678 StructRangeInfoTy &PartialStruct) const { 8679 assert(!Cap->capturesVariableArrayType() && 8680 "Not expecting to generate map info for a variable array type!"); 8681 8682 // We need to know when we generating information for the first component 8683 const ValueDecl *VD = Cap->capturesThis() 8684 ? nullptr 8685 : Cap->getCapturedVar()->getCanonicalDecl(); 8686 8687 // If this declaration appears in a is_device_ptr clause we just have to 8688 // pass the pointer by value. If it is a reference to a declaration, we just 8689 // pass its value. 8690 if (DevPointersMap.count(VD)) { 8691 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8692 CombinedInfo.Pointers.push_back(Arg); 8693 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8694 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8695 /*isSigned=*/true)); 8696 CombinedInfo.Types.push_back( 8697 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8698 OMP_MAP_TARGET_PARAM); 8699 CombinedInfo.Mappers.push_back(nullptr); 8700 return; 8701 } 8702 8703 using MapData = 8704 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8705 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8706 const ValueDecl *>; 8707 SmallVector<MapData, 4> DeclComponentLists; 8708 assert(CurDir.is<const OMPExecutableDirective *>() && 8709 "Expect a executable directive"); 8710 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8711 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8712 for (const auto L : C->decl_component_lists(VD)) { 8713 const ValueDecl *VDecl, *Mapper; 8714 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8715 std::tie(VDecl, Components, Mapper) = L; 8716 assert(VDecl == VD && "We got information for the wrong declaration??"); 8717 assert(!Components.empty() && 8718 "Not expecting declaration with no component lists."); 8719 DeclComponentLists.emplace_back(Components, C->getMapType(), 8720 C->getMapTypeModifiers(), 8721 C->isImplicit(), Mapper); 8722 } 8723 } 8724 8725 // Find overlapping elements (including the offset from the base element). 8726 llvm::SmallDenseMap< 8727 const MapData *, 8728 llvm::SmallVector< 8729 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8730 4> 8731 OverlappedData; 8732 size_t Count = 0; 8733 for (const MapData &L : DeclComponentLists) { 8734 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8735 OpenMPMapClauseKind MapType; 8736 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8737 bool IsImplicit; 8738 const ValueDecl *Mapper; 8739 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8740 ++Count; 8741 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8742 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8743 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1; 8744 auto CI = Components.rbegin(); 8745 auto CE = Components.rend(); 8746 auto SI = Components1.rbegin(); 8747 auto SE = Components1.rend(); 8748 for (; CI != CE && SI != SE; ++CI, ++SI) { 8749 if (CI->getAssociatedExpression()->getStmtClass() != 8750 SI->getAssociatedExpression()->getStmtClass()) 8751 break; 8752 // Are we dealing with different variables/fields? 8753 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8754 break; 8755 } 8756 // Found overlapping if, at least for one component, reached the head of 8757 // the components list. 8758 if (CI == CE || SI == SE) { 8759 assert((CI != CE || SI != SE) && 8760 "Unexpected full match of the mapping components."); 8761 const MapData &BaseData = CI == CE ? L : L1; 8762 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8763 SI == SE ? Components : Components1; 8764 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8765 OverlappedElements.getSecond().push_back(SubData); 8766 } 8767 } 8768 } 8769 // Sort the overlapped elements for each item. 8770 llvm::SmallVector<const FieldDecl *, 4> Layout; 8771 if (!OverlappedData.empty()) { 8772 if (const auto *CRD = 8773 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8774 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8775 else { 8776 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8777 Layout.append(RD->field_begin(), RD->field_end()); 8778 } 8779 } 8780 for (auto &Pair : OverlappedData) { 8781 llvm::sort( 8782 Pair.getSecond(), 8783 [&Layout]( 8784 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8785 OMPClauseMappableExprCommon::MappableExprComponentListRef 8786 Second) { 8787 auto CI = First.rbegin(); 8788 auto CE = First.rend(); 8789 auto SI = Second.rbegin(); 8790 auto SE = Second.rend(); 8791 for (; CI != CE && SI != SE; ++CI, ++SI) { 8792 if (CI->getAssociatedExpression()->getStmtClass() != 8793 SI->getAssociatedExpression()->getStmtClass()) 8794 break; 8795 // Are we dealing with different variables/fields? 8796 if (CI->getAssociatedDeclaration() != 8797 SI->getAssociatedDeclaration()) 8798 break; 8799 } 8800 8801 // Lists contain the same elements. 8802 if (CI == CE && SI == SE) 8803 return false; 8804 8805 // List with less elements is less than list with more elements. 8806 if (CI == CE || SI == SE) 8807 return CI == CE; 8808 8809 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8810 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8811 if (FD1->getParent() == FD2->getParent()) 8812 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8813 const auto It = 8814 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8815 return FD == FD1 || FD == FD2; 8816 }); 8817 return *It == FD1; 8818 }); 8819 } 8820 8821 // Associated with a capture, because the mapping flags depend on it. 8822 // Go through all of the elements with the overlapped elements. 8823 for (const auto &Pair : OverlappedData) { 8824 const MapData &L = *Pair.getFirst(); 8825 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8826 OpenMPMapClauseKind MapType; 8827 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8828 bool IsImplicit; 8829 const ValueDecl *Mapper; 8830 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8831 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8832 OverlappedComponents = Pair.getSecond(); 8833 bool IsFirstComponentList = true; 8834 generateInfoForComponentList( 8835 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8836 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8837 /*ForDeviceAddr=*/false, OverlappedComponents); 8838 } 8839 // Go through other elements without overlapped elements. 8840 bool IsFirstComponentList = OverlappedData.empty(); 8841 for (const MapData &L : DeclComponentLists) { 8842 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8843 OpenMPMapClauseKind MapType; 8844 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8845 bool IsImplicit; 8846 const ValueDecl *Mapper; 8847 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8848 auto It = OverlappedData.find(&L); 8849 if (It == OverlappedData.end()) 8850 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8851 Components, CombinedInfo, PartialStruct, 8852 IsFirstComponentList, IsImplicit, Mapper); 8853 IsFirstComponentList = false; 8854 } 8855 } 8856 8857 /// Generate the default map information for a given capture \a CI, 8858 /// record field declaration \a RI and captured value \a CV. 8859 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8860 const FieldDecl &RI, llvm::Value *CV, 8861 MapCombinedInfoTy &CombinedInfo) const { 8862 bool IsImplicit = true; 8863 // Do the default mapping. 8864 if (CI.capturesThis()) { 8865 CombinedInfo.BasePointers.push_back(CV); 8866 CombinedInfo.Pointers.push_back(CV); 8867 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8868 CombinedInfo.Sizes.push_back( 8869 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8870 CGF.Int64Ty, /*isSigned=*/true)); 8871 // Default map type. 8872 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8873 } else if (CI.capturesVariableByCopy()) { 8874 CombinedInfo.BasePointers.push_back(CV); 8875 CombinedInfo.Pointers.push_back(CV); 8876 if (!RI.getType()->isAnyPointerType()) { 8877 // We have to signal to the runtime captures passed by value that are 8878 // not pointers. 8879 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8880 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8881 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8882 } else { 8883 // Pointers are implicitly mapped with a zero size and no flags 8884 // (other than first map that is added for all implicit maps). 8885 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8886 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8887 } 8888 const VarDecl *VD = CI.getCapturedVar(); 8889 auto I = FirstPrivateDecls.find(VD); 8890 if (I != FirstPrivateDecls.end()) 8891 IsImplicit = I->getSecond(); 8892 } else { 8893 assert(CI.capturesVariable() && "Expected captured reference."); 8894 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8895 QualType ElementType = PtrTy->getPointeeType(); 8896 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8897 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8898 // The default map type for a scalar/complex type is 'to' because by 8899 // default the value doesn't have to be retrieved. For an aggregate 8900 // type, the default is 'tofrom'. 8901 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8902 const VarDecl *VD = CI.getCapturedVar(); 8903 auto I = FirstPrivateDecls.find(VD); 8904 if (I != FirstPrivateDecls.end() && 8905 VD->getType().isConstant(CGF.getContext())) { 8906 llvm::Constant *Addr = 8907 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8908 // Copy the value of the original variable to the new global copy. 8909 CGF.Builder.CreateMemCpy( 8910 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8911 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8912 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 8913 // Use new global variable as the base pointers. 8914 CombinedInfo.BasePointers.push_back(Addr); 8915 CombinedInfo.Pointers.push_back(Addr); 8916 } else { 8917 CombinedInfo.BasePointers.push_back(CV); 8918 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8919 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8920 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8921 AlignmentSource::Decl)); 8922 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8923 } else { 8924 CombinedInfo.Pointers.push_back(CV); 8925 } 8926 } 8927 if (I != FirstPrivateDecls.end()) 8928 IsImplicit = I->getSecond(); 8929 } 8930 // Every default map produces a single argument which is a target parameter. 8931 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 8932 8933 // Add flag stating this is an implicit map. 8934 if (IsImplicit) 8935 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 8936 8937 // No user-defined mapper for default mapping. 8938 CombinedInfo.Mappers.push_back(nullptr); 8939 } 8940 }; 8941 } // anonymous namespace 8942 8943 static void emitNonContiguousDescriptor( 8944 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8945 CGOpenMPRuntime::TargetDataInfo &Info) { 8946 CodeGenModule &CGM = CGF.CGM; 8947 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 8948 &NonContigInfo = CombinedInfo.NonContigInfo; 8949 8950 // Build an array of struct descriptor_dim and then assign it to 8951 // offload_args. 8952 // 8953 // struct descriptor_dim { 8954 // uint64_t offset; 8955 // uint64_t count; 8956 // uint64_t stride 8957 // }; 8958 ASTContext &C = CGF.getContext(); 8959 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 8960 RecordDecl *RD; 8961 RD = C.buildImplicitRecord("descriptor_dim"); 8962 RD->startDefinition(); 8963 addFieldToRecordDecl(C, RD, Int64Ty); 8964 addFieldToRecordDecl(C, RD, Int64Ty); 8965 addFieldToRecordDecl(C, RD, Int64Ty); 8966 RD->completeDefinition(); 8967 QualType DimTy = C.getRecordType(RD); 8968 8969 enum { OffsetFD = 0, CountFD, StrideFD }; 8970 // We need two index variable here since the size of "Dims" is the same as the 8971 // size of Components, however, the size of offset, count, and stride is equal 8972 // to the size of base declaration that is non-contiguous. 8973 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 8974 // Skip emitting ir if dimension size is 1 since it cannot be 8975 // non-contiguous. 8976 if (NonContigInfo.Dims[I] == 1) 8977 continue; 8978 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 8979 QualType ArrayTy = 8980 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 8981 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 8982 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 8983 unsigned RevIdx = EE - II - 1; 8984 LValue DimsLVal = CGF.MakeAddrLValue( 8985 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 8986 // Offset 8987 LValue OffsetLVal = CGF.EmitLValueForField( 8988 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 8989 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 8990 // Count 8991 LValue CountLVal = CGF.EmitLValueForField( 8992 DimsLVal, *std::next(RD->field_begin(), CountFD)); 8993 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 8994 // Stride 8995 LValue StrideLVal = CGF.EmitLValueForField( 8996 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 8997 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 8998 } 8999 // args[I] = &dims 9000 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9001 DimsAddr, CGM.Int8PtrTy); 9002 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9003 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9004 Info.PointersArray, 0, I); 9005 Address PAddr(P, CGF.getPointerAlign()); 9006 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9007 ++L; 9008 } 9009 } 9010 9011 /// Emit the arrays used to pass the captures and map information to the 9012 /// offloading runtime library. If there is no map or capture information, 9013 /// return nullptr by reference. 9014 static void 9015 emitOffloadingArrays(CodeGenFunction &CGF, 9016 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9017 CGOpenMPRuntime::TargetDataInfo &Info, 9018 bool IsNonContiguous = false) { 9019 CodeGenModule &CGM = CGF.CGM; 9020 ASTContext &Ctx = CGF.getContext(); 9021 9022 // Reset the array information. 9023 Info.clearArrayInfo(); 9024 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9025 9026 if (Info.NumberOfPtrs) { 9027 // Detect if we have any capture size requiring runtime evaluation of the 9028 // size so that a constant array could be eventually used. 9029 bool hasRuntimeEvaluationCaptureSize = false; 9030 for (llvm::Value *S : CombinedInfo.Sizes) 9031 if (!isa<llvm::Constant>(S)) { 9032 hasRuntimeEvaluationCaptureSize = true; 9033 break; 9034 } 9035 9036 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9037 QualType PointerArrayType = Ctx.getConstantArrayType( 9038 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9039 /*IndexTypeQuals=*/0); 9040 9041 Info.BasePointersArray = 9042 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9043 Info.PointersArray = 9044 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9045 Address MappersArray = 9046 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9047 Info.MappersArray = MappersArray.getPointer(); 9048 9049 // If we don't have any VLA types or other types that require runtime 9050 // evaluation, we can use a constant array for the map sizes, otherwise we 9051 // need to fill up the arrays as we do for the pointers. 9052 QualType Int64Ty = 9053 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9054 if (hasRuntimeEvaluationCaptureSize) { 9055 QualType SizeArrayType = Ctx.getConstantArrayType( 9056 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9057 /*IndexTypeQuals=*/0); 9058 Info.SizesArray = 9059 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9060 } else { 9061 // We expect all the sizes to be constant, so we collect them to create 9062 // a constant array. 9063 SmallVector<llvm::Constant *, 16> ConstSizes; 9064 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9065 if (IsNonContiguous && 9066 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9067 ConstSizes.push_back(llvm::ConstantInt::get( 9068 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9069 } else { 9070 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9071 } 9072 } 9073 9074 auto *SizesArrayInit = llvm::ConstantArray::get( 9075 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9076 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9077 auto *SizesArrayGbl = new llvm::GlobalVariable( 9078 CGM.getModule(), SizesArrayInit->getType(), 9079 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9080 SizesArrayInit, Name); 9081 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9082 Info.SizesArray = SizesArrayGbl; 9083 } 9084 9085 // The map types are always constant so we don't need to generate code to 9086 // fill arrays. Instead, we create an array constant. 9087 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9088 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9089 llvm::Constant *MapTypesArrayInit = 9090 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9091 std::string MaptypesName = 9092 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9093 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9094 CGM.getModule(), MapTypesArrayInit->getType(), 9095 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9096 MapTypesArrayInit, MaptypesName); 9097 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9098 Info.MapTypesArray = MapTypesArrayGbl; 9099 9100 // If there's a present map type modifier, it must not be applied to the end 9101 // of a region, so generate a separate map type array in that case. 9102 if (Info.separateBeginEndCalls()) { 9103 bool EndMapTypesDiffer = false; 9104 for (uint64_t &Type : Mapping) { 9105 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9106 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9107 EndMapTypesDiffer = true; 9108 } 9109 } 9110 if (EndMapTypesDiffer) { 9111 MapTypesArrayInit = 9112 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9113 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9114 MapTypesArrayGbl = new llvm::GlobalVariable( 9115 CGM.getModule(), MapTypesArrayInit->getType(), 9116 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9117 MapTypesArrayInit, MaptypesName); 9118 MapTypesArrayGbl->setUnnamedAddr( 9119 llvm::GlobalValue::UnnamedAddr::Global); 9120 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9121 } 9122 } 9123 9124 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9125 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9126 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9127 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9128 Info.BasePointersArray, 0, I); 9129 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9130 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9131 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9132 CGF.Builder.CreateStore(BPVal, BPAddr); 9133 9134 if (Info.requiresDevicePointerInfo()) 9135 if (const ValueDecl *DevVD = 9136 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9137 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9138 9139 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9140 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9141 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9142 Info.PointersArray, 0, I); 9143 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9144 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9145 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9146 CGF.Builder.CreateStore(PVal, PAddr); 9147 9148 if (hasRuntimeEvaluationCaptureSize) { 9149 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9150 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9151 Info.SizesArray, 9152 /*Idx0=*/0, 9153 /*Idx1=*/I); 9154 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9155 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9156 CGM.Int64Ty, 9157 /*isSigned=*/true), 9158 SAddr); 9159 } 9160 9161 // Fill up the mapper array. 9162 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9163 if (CombinedInfo.Mappers[I]) { 9164 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9165 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9166 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9167 Info.HasMapper = true; 9168 } 9169 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9170 CGF.Builder.CreateStore(MFunc, MAddr); 9171 } 9172 } 9173 9174 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9175 Info.NumberOfPtrs == 0) 9176 return; 9177 9178 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9179 } 9180 9181 namespace { 9182 /// Additional arguments for emitOffloadingArraysArgument function. 9183 struct ArgumentsOptions { 9184 bool ForEndCall = false; 9185 ArgumentsOptions() = default; 9186 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9187 }; 9188 } // namespace 9189 9190 /// Emit the arguments to be passed to the runtime library based on the 9191 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9192 /// ForEndCall, emit map types to be passed for the end of the region instead of 9193 /// the beginning. 9194 static void emitOffloadingArraysArgument( 9195 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9196 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9197 llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg, 9198 CGOpenMPRuntime::TargetDataInfo &Info, 9199 const ArgumentsOptions &Options = ArgumentsOptions()) { 9200 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9201 "expected region end call to runtime only when end call is separate"); 9202 CodeGenModule &CGM = CGF.CGM; 9203 if (Info.NumberOfPtrs) { 9204 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9205 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9206 Info.BasePointersArray, 9207 /*Idx0=*/0, /*Idx1=*/0); 9208 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9209 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9210 Info.PointersArray, 9211 /*Idx0=*/0, 9212 /*Idx1=*/0); 9213 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9214 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9215 /*Idx0=*/0, /*Idx1=*/0); 9216 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9217 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9218 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9219 : Info.MapTypesArray, 9220 /*Idx0=*/0, 9221 /*Idx1=*/0); 9222 // If there is no user-defined mapper, set the mapper array to nullptr to 9223 // avoid an unnecessary data privatization 9224 if (!Info.HasMapper) 9225 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9226 else 9227 MappersArrayArg = 9228 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9229 } else { 9230 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9231 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9232 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9233 MapTypesArrayArg = 9234 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9235 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9236 } 9237 } 9238 9239 /// Check for inner distribute directive. 9240 static const OMPExecutableDirective * 9241 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9242 const auto *CS = D.getInnermostCapturedStmt(); 9243 const auto *Body = 9244 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9245 const Stmt *ChildStmt = 9246 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9247 9248 if (const auto *NestedDir = 9249 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9250 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9251 switch (D.getDirectiveKind()) { 9252 case OMPD_target: 9253 if (isOpenMPDistributeDirective(DKind)) 9254 return NestedDir; 9255 if (DKind == OMPD_teams) { 9256 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9257 /*IgnoreCaptured=*/true); 9258 if (!Body) 9259 return nullptr; 9260 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9261 if (const auto *NND = 9262 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9263 DKind = NND->getDirectiveKind(); 9264 if (isOpenMPDistributeDirective(DKind)) 9265 return NND; 9266 } 9267 } 9268 return nullptr; 9269 case OMPD_target_teams: 9270 if (isOpenMPDistributeDirective(DKind)) 9271 return NestedDir; 9272 return nullptr; 9273 case OMPD_target_parallel: 9274 case OMPD_target_simd: 9275 case OMPD_target_parallel_for: 9276 case OMPD_target_parallel_for_simd: 9277 return nullptr; 9278 case OMPD_target_teams_distribute: 9279 case OMPD_target_teams_distribute_simd: 9280 case OMPD_target_teams_distribute_parallel_for: 9281 case OMPD_target_teams_distribute_parallel_for_simd: 9282 case OMPD_parallel: 9283 case OMPD_for: 9284 case OMPD_parallel_for: 9285 case OMPD_parallel_master: 9286 case OMPD_parallel_sections: 9287 case OMPD_for_simd: 9288 case OMPD_parallel_for_simd: 9289 case OMPD_cancel: 9290 case OMPD_cancellation_point: 9291 case OMPD_ordered: 9292 case OMPD_threadprivate: 9293 case OMPD_allocate: 9294 case OMPD_task: 9295 case OMPD_simd: 9296 case OMPD_sections: 9297 case OMPD_section: 9298 case OMPD_single: 9299 case OMPD_master: 9300 case OMPD_critical: 9301 case OMPD_taskyield: 9302 case OMPD_barrier: 9303 case OMPD_taskwait: 9304 case OMPD_taskgroup: 9305 case OMPD_atomic: 9306 case OMPD_flush: 9307 case OMPD_depobj: 9308 case OMPD_scan: 9309 case OMPD_teams: 9310 case OMPD_target_data: 9311 case OMPD_target_exit_data: 9312 case OMPD_target_enter_data: 9313 case OMPD_distribute: 9314 case OMPD_distribute_simd: 9315 case OMPD_distribute_parallel_for: 9316 case OMPD_distribute_parallel_for_simd: 9317 case OMPD_teams_distribute: 9318 case OMPD_teams_distribute_simd: 9319 case OMPD_teams_distribute_parallel_for: 9320 case OMPD_teams_distribute_parallel_for_simd: 9321 case OMPD_target_update: 9322 case OMPD_declare_simd: 9323 case OMPD_declare_variant: 9324 case OMPD_begin_declare_variant: 9325 case OMPD_end_declare_variant: 9326 case OMPD_declare_target: 9327 case OMPD_end_declare_target: 9328 case OMPD_declare_reduction: 9329 case OMPD_declare_mapper: 9330 case OMPD_taskloop: 9331 case OMPD_taskloop_simd: 9332 case OMPD_master_taskloop: 9333 case OMPD_master_taskloop_simd: 9334 case OMPD_parallel_master_taskloop: 9335 case OMPD_parallel_master_taskloop_simd: 9336 case OMPD_requires: 9337 case OMPD_unknown: 9338 default: 9339 llvm_unreachable("Unexpected directive."); 9340 } 9341 } 9342 9343 return nullptr; 9344 } 9345 9346 /// Emit the user-defined mapper function. The code generation follows the 9347 /// pattern in the example below. 9348 /// \code 9349 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9350 /// void *base, void *begin, 9351 /// int64_t size, int64_t type) { 9352 /// // Allocate space for an array section first. 9353 /// if (size > 1 && !maptype.IsDelete) 9354 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9355 /// size*sizeof(Ty), clearToFrom(type)); 9356 /// // Map members. 9357 /// for (unsigned i = 0; i < size; i++) { 9358 /// // For each component specified by this mapper: 9359 /// for (auto c : all_components) { 9360 /// if (c.hasMapper()) 9361 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9362 /// c.arg_type); 9363 /// else 9364 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9365 /// c.arg_begin, c.arg_size, c.arg_type); 9366 /// } 9367 /// } 9368 /// // Delete the array section. 9369 /// if (size > 1 && maptype.IsDelete) 9370 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9371 /// size*sizeof(Ty), clearToFrom(type)); 9372 /// } 9373 /// \endcode 9374 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9375 CodeGenFunction *CGF) { 9376 if (UDMMap.count(D) > 0) 9377 return; 9378 ASTContext &C = CGM.getContext(); 9379 QualType Ty = D->getType(); 9380 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9381 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9382 auto *MapperVarDecl = 9383 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9384 SourceLocation Loc = D->getLocation(); 9385 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9386 9387 // Prepare mapper function arguments and attributes. 9388 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9389 C.VoidPtrTy, ImplicitParamDecl::Other); 9390 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9391 ImplicitParamDecl::Other); 9392 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9393 C.VoidPtrTy, ImplicitParamDecl::Other); 9394 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9395 ImplicitParamDecl::Other); 9396 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9397 ImplicitParamDecl::Other); 9398 FunctionArgList Args; 9399 Args.push_back(&HandleArg); 9400 Args.push_back(&BaseArg); 9401 Args.push_back(&BeginArg); 9402 Args.push_back(&SizeArg); 9403 Args.push_back(&TypeArg); 9404 const CGFunctionInfo &FnInfo = 9405 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9406 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9407 SmallString<64> TyStr; 9408 llvm::raw_svector_ostream Out(TyStr); 9409 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9410 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9411 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9412 Name, &CGM.getModule()); 9413 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9414 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9415 // Start the mapper function code generation. 9416 CodeGenFunction MapperCGF(CGM); 9417 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9418 // Compute the starting and end addreses of array elements. 9419 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9420 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9421 C.getPointerType(Int64Ty), Loc); 9422 // Convert the size in bytes into the number of array elements. 9423 Size = MapperCGF.Builder.CreateExactUDiv( 9424 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9425 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9426 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9427 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9428 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9429 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9430 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9431 C.getPointerType(Int64Ty), Loc); 9432 // Prepare common arguments for array initiation and deletion. 9433 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9434 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9435 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9436 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9437 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9438 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9439 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9440 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9441 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9442 9443 // Emit array initiation if this is an array section and \p MapType indicates 9444 // that memory allocation is required. 9445 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9446 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9447 ElementSize, HeadBB, /*IsInit=*/true); 9448 9449 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9450 9451 // Emit the loop header block. 9452 MapperCGF.EmitBlock(HeadBB); 9453 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9454 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9455 // Evaluate whether the initial condition is satisfied. 9456 llvm::Value *IsEmpty = 9457 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9458 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9459 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9460 9461 // Emit the loop body block. 9462 MapperCGF.EmitBlock(BodyBB); 9463 llvm::BasicBlock *LastBB = BodyBB; 9464 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9465 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9466 PtrPHI->addIncoming(PtrBegin, EntryBB); 9467 Address PtrCurrent = 9468 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9469 .getAlignment() 9470 .alignmentOfArrayElement(ElementSize)); 9471 // Privatize the declared variable of mapper to be the current array element. 9472 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9473 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9474 return MapperCGF 9475 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9476 .getAddress(MapperCGF); 9477 }); 9478 (void)Scope.Privatize(); 9479 9480 // Get map clause information. Fill up the arrays with all mapped variables. 9481 MappableExprsHandler::MapCombinedInfoTy Info; 9482 MappableExprsHandler MEHandler(*D, MapperCGF); 9483 MEHandler.generateAllInfoForMapper(Info); 9484 9485 // Call the runtime API __tgt_mapper_num_components to get the number of 9486 // pre-existing components. 9487 llvm::Value *OffloadingArgs[] = {Handle}; 9488 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9489 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9490 OMPRTL___tgt_mapper_num_components), 9491 OffloadingArgs); 9492 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9493 PreviousSize, 9494 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9495 9496 // Fill up the runtime mapper handle for all components. 9497 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9498 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9499 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9500 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9501 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9502 llvm::Value *CurSizeArg = Info.Sizes[I]; 9503 9504 // Extract the MEMBER_OF field from the map type. 9505 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9506 MapperCGF.EmitBlock(MemberBB); 9507 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9508 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9509 OriMapType, 9510 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9511 llvm::BasicBlock *MemberCombineBB = 9512 MapperCGF.createBasicBlock("omp.member.combine"); 9513 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9514 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9515 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9516 // Add the number of pre-existing components to the MEMBER_OF field if it 9517 // is valid. 9518 MapperCGF.EmitBlock(MemberCombineBB); 9519 llvm::Value *CombinedMember = 9520 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9521 // Do nothing if it is not a member of previous components. 9522 MapperCGF.EmitBlock(TypeBB); 9523 llvm::PHINode *MemberMapType = 9524 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9525 MemberMapType->addIncoming(OriMapType, MemberBB); 9526 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9527 9528 // Combine the map type inherited from user-defined mapper with that 9529 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9530 // bits of the \a MapType, which is the input argument of the mapper 9531 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9532 // bits of MemberMapType. 9533 // [OpenMP 5.0], 1.2.6. map-type decay. 9534 // | alloc | to | from | tofrom | release | delete 9535 // ---------------------------------------------------------- 9536 // alloc | alloc | alloc | alloc | alloc | release | delete 9537 // to | alloc | to | alloc | to | release | delete 9538 // from | alloc | alloc | from | from | release | delete 9539 // tofrom | alloc | to | from | tofrom | release | delete 9540 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9541 MapType, 9542 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9543 MappableExprsHandler::OMP_MAP_FROM)); 9544 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9545 llvm::BasicBlock *AllocElseBB = 9546 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9547 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9548 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9549 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9550 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9551 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9552 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9553 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9554 MapperCGF.EmitBlock(AllocBB); 9555 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9556 MemberMapType, 9557 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9558 MappableExprsHandler::OMP_MAP_FROM))); 9559 MapperCGF.Builder.CreateBr(EndBB); 9560 MapperCGF.EmitBlock(AllocElseBB); 9561 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9562 LeftToFrom, 9563 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9564 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9565 // In case of to, clear OMP_MAP_FROM. 9566 MapperCGF.EmitBlock(ToBB); 9567 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9568 MemberMapType, 9569 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9570 MapperCGF.Builder.CreateBr(EndBB); 9571 MapperCGF.EmitBlock(ToElseBB); 9572 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9573 LeftToFrom, 9574 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9575 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9576 // In case of from, clear OMP_MAP_TO. 9577 MapperCGF.EmitBlock(FromBB); 9578 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9579 MemberMapType, 9580 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9581 // In case of tofrom, do nothing. 9582 MapperCGF.EmitBlock(EndBB); 9583 LastBB = EndBB; 9584 llvm::PHINode *CurMapType = 9585 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9586 CurMapType->addIncoming(AllocMapType, AllocBB); 9587 CurMapType->addIncoming(ToMapType, ToBB); 9588 CurMapType->addIncoming(FromMapType, FromBB); 9589 CurMapType->addIncoming(MemberMapType, ToElseBB); 9590 9591 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9592 CurSizeArg, CurMapType}; 9593 if (Info.Mappers[I]) { 9594 // Call the corresponding mapper function. 9595 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9596 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9597 assert(MapperFunc && "Expect a valid mapper function is available."); 9598 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9599 } else { 9600 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9601 // data structure. 9602 MapperCGF.EmitRuntimeCall( 9603 OMPBuilder.getOrCreateRuntimeFunction( 9604 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9605 OffloadingArgs); 9606 } 9607 } 9608 9609 // Update the pointer to point to the next element that needs to be mapped, 9610 // and check whether we have mapped all elements. 9611 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9612 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9613 PtrPHI->addIncoming(PtrNext, LastBB); 9614 llvm::Value *IsDone = 9615 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9616 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9617 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9618 9619 MapperCGF.EmitBlock(ExitBB); 9620 // Emit array deletion if this is an array section and \p MapType indicates 9621 // that deletion is required. 9622 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9623 ElementSize, DoneBB, /*IsInit=*/false); 9624 9625 // Emit the function exit block. 9626 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9627 MapperCGF.FinishFunction(); 9628 UDMMap.try_emplace(D, Fn); 9629 if (CGF) { 9630 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9631 Decls.second.push_back(D); 9632 } 9633 } 9634 9635 /// Emit the array initialization or deletion portion for user-defined mapper 9636 /// code generation. First, it evaluates whether an array section is mapped and 9637 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9638 /// true, and \a MapType indicates to not delete this array, array 9639 /// initialization code is generated. If \a IsInit is false, and \a MapType 9640 /// indicates to not this array, array deletion code is generated. 9641 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9642 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9643 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9644 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9645 StringRef Prefix = IsInit ? ".init" : ".del"; 9646 9647 // Evaluate if this is an array section. 9648 llvm::BasicBlock *IsDeleteBB = 9649 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9650 llvm::BasicBlock *BodyBB = 9651 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9652 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9653 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9654 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9655 9656 // Evaluate if we are going to delete this section. 9657 MapperCGF.EmitBlock(IsDeleteBB); 9658 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9659 MapType, 9660 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9661 llvm::Value *DeleteCond; 9662 if (IsInit) { 9663 DeleteCond = MapperCGF.Builder.CreateIsNull( 9664 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9665 } else { 9666 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9667 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9668 } 9669 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9670 9671 MapperCGF.EmitBlock(BodyBB); 9672 // Get the array size by multiplying element size and element number (i.e., \p 9673 // Size). 9674 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9675 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9676 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9677 // memory allocation/deletion purpose only. 9678 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9679 MapType, 9680 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9681 MappableExprsHandler::OMP_MAP_FROM))); 9682 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9683 // data structure. 9684 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9685 MapperCGF.EmitRuntimeCall( 9686 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9687 OMPRTL___tgt_push_mapper_component), 9688 OffloadingArgs); 9689 } 9690 9691 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9692 const OMPDeclareMapperDecl *D) { 9693 auto I = UDMMap.find(D); 9694 if (I != UDMMap.end()) 9695 return I->second; 9696 emitUserDefinedMapper(D); 9697 return UDMMap.lookup(D); 9698 } 9699 9700 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9701 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9702 llvm::Value *DeviceID, 9703 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9704 const OMPLoopDirective &D)> 9705 SizeEmitter) { 9706 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9707 const OMPExecutableDirective *TD = &D; 9708 // Get nested teams distribute kind directive, if any. 9709 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9710 TD = getNestedDistributeDirective(CGM.getContext(), D); 9711 if (!TD) 9712 return; 9713 const auto *LD = cast<OMPLoopDirective>(TD); 9714 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9715 PrePostActionTy &) { 9716 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9717 llvm::Value *Args[] = {DeviceID, NumIterations}; 9718 CGF.EmitRuntimeCall( 9719 OMPBuilder.getOrCreateRuntimeFunction( 9720 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9721 Args); 9722 } 9723 }; 9724 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9725 } 9726 9727 void CGOpenMPRuntime::emitTargetCall( 9728 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9729 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9730 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9731 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9732 const OMPLoopDirective &D)> 9733 SizeEmitter) { 9734 if (!CGF.HaveInsertPoint()) 9735 return; 9736 9737 assert(OutlinedFn && "Invalid outlined function!"); 9738 9739 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9740 D.hasClausesOfKind<OMPNowaitClause>(); 9741 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9742 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9743 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9744 PrePostActionTy &) { 9745 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9746 }; 9747 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9748 9749 CodeGenFunction::OMPTargetDataInfo InputInfo; 9750 llvm::Value *MapTypesArray = nullptr; 9751 // Fill up the pointer arrays and transfer execution to the device. 9752 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9753 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9754 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9755 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9756 // Reverse offloading is not supported, so just execute on the host. 9757 if (RequiresOuterTask) { 9758 CapturedVars.clear(); 9759 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9760 } 9761 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9762 return; 9763 } 9764 9765 // On top of the arrays that were filled up, the target offloading call 9766 // takes as arguments the device id as well as the host pointer. The host 9767 // pointer is used by the runtime library to identify the current target 9768 // region, so it only has to be unique and not necessarily point to 9769 // anything. It could be the pointer to the outlined function that 9770 // implements the target region, but we aren't using that so that the 9771 // compiler doesn't need to keep that, and could therefore inline the host 9772 // function if proven worthwhile during optimization. 9773 9774 // From this point on, we need to have an ID of the target region defined. 9775 assert(OutlinedFnID && "Invalid outlined function ID!"); 9776 9777 // Emit device ID if any. 9778 llvm::Value *DeviceID; 9779 if (Device.getPointer()) { 9780 assert((Device.getInt() == OMPC_DEVICE_unknown || 9781 Device.getInt() == OMPC_DEVICE_device_num) && 9782 "Expected device_num modifier."); 9783 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9784 DeviceID = 9785 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9786 } else { 9787 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9788 } 9789 9790 // Emit the number of elements in the offloading arrays. 9791 llvm::Value *PointerNum = 9792 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9793 9794 // Return value of the runtime offloading call. 9795 llvm::Value *Return; 9796 9797 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9798 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9799 9800 // Emit tripcount for the target loop-based directive. 9801 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9802 9803 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9804 // The target region is an outlined function launched by the runtime 9805 // via calls __tgt_target() or __tgt_target_teams(). 9806 // 9807 // __tgt_target() launches a target region with one team and one thread, 9808 // executing a serial region. This master thread may in turn launch 9809 // more threads within its team upon encountering a parallel region, 9810 // however, no additional teams can be launched on the device. 9811 // 9812 // __tgt_target_teams() launches a target region with one or more teams, 9813 // each with one or more threads. This call is required for target 9814 // constructs such as: 9815 // 'target teams' 9816 // 'target' / 'teams' 9817 // 'target teams distribute parallel for' 9818 // 'target parallel' 9819 // and so on. 9820 // 9821 // Note that on the host and CPU targets, the runtime implementation of 9822 // these calls simply call the outlined function without forking threads. 9823 // The outlined functions themselves have runtime calls to 9824 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9825 // the compiler in emitTeamsCall() and emitParallelCall(). 9826 // 9827 // In contrast, on the NVPTX target, the implementation of 9828 // __tgt_target_teams() launches a GPU kernel with the requested number 9829 // of teams and threads so no additional calls to the runtime are required. 9830 if (NumTeams) { 9831 // If we have NumTeams defined this means that we have an enclosed teams 9832 // region. Therefore we also expect to have NumThreads defined. These two 9833 // values should be defined in the presence of a teams directive, 9834 // regardless of having any clauses associated. If the user is using teams 9835 // but no clauses, these two values will be the default that should be 9836 // passed to the runtime library - a 32-bit integer with the value zero. 9837 assert(NumThreads && "Thread limit expression should be available along " 9838 "with number of teams."); 9839 llvm::Value *OffloadingArgs[] = {DeviceID, 9840 OutlinedFnID, 9841 PointerNum, 9842 InputInfo.BasePointersArray.getPointer(), 9843 InputInfo.PointersArray.getPointer(), 9844 InputInfo.SizesArray.getPointer(), 9845 MapTypesArray, 9846 InputInfo.MappersArray.getPointer(), 9847 NumTeams, 9848 NumThreads}; 9849 Return = CGF.EmitRuntimeCall( 9850 OMPBuilder.getOrCreateRuntimeFunction( 9851 CGM.getModule(), HasNowait 9852 ? OMPRTL___tgt_target_teams_nowait_mapper 9853 : OMPRTL___tgt_target_teams_mapper), 9854 OffloadingArgs); 9855 } else { 9856 llvm::Value *OffloadingArgs[] = {DeviceID, 9857 OutlinedFnID, 9858 PointerNum, 9859 InputInfo.BasePointersArray.getPointer(), 9860 InputInfo.PointersArray.getPointer(), 9861 InputInfo.SizesArray.getPointer(), 9862 MapTypesArray, 9863 InputInfo.MappersArray.getPointer()}; 9864 Return = CGF.EmitRuntimeCall( 9865 OMPBuilder.getOrCreateRuntimeFunction( 9866 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 9867 : OMPRTL___tgt_target_mapper), 9868 OffloadingArgs); 9869 } 9870 9871 // Check the error code and execute the host version if required. 9872 llvm::BasicBlock *OffloadFailedBlock = 9873 CGF.createBasicBlock("omp_offload.failed"); 9874 llvm::BasicBlock *OffloadContBlock = 9875 CGF.createBasicBlock("omp_offload.cont"); 9876 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9877 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9878 9879 CGF.EmitBlock(OffloadFailedBlock); 9880 if (RequiresOuterTask) { 9881 CapturedVars.clear(); 9882 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9883 } 9884 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9885 CGF.EmitBranch(OffloadContBlock); 9886 9887 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9888 }; 9889 9890 // Notify that the host version must be executed. 9891 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9892 RequiresOuterTask](CodeGenFunction &CGF, 9893 PrePostActionTy &) { 9894 if (RequiresOuterTask) { 9895 CapturedVars.clear(); 9896 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9897 } 9898 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9899 }; 9900 9901 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9902 &CapturedVars, RequiresOuterTask, 9903 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9904 // Fill up the arrays with all the captured variables. 9905 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9906 9907 // Get mappable expression information. 9908 MappableExprsHandler MEHandler(D, CGF); 9909 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9910 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9911 9912 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9913 auto CV = CapturedVars.begin(); 9914 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9915 CE = CS.capture_end(); 9916 CI != CE; ++CI, ++RI, ++CV) { 9917 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9918 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9919 9920 // VLA sizes are passed to the outlined region by copy and do not have map 9921 // information associated. 9922 if (CI->capturesVariableArrayType()) { 9923 CurInfo.BasePointers.push_back(*CV); 9924 CurInfo.Pointers.push_back(*CV); 9925 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9926 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9927 // Copy to the device as an argument. No need to retrieve it. 9928 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9929 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9930 MappableExprsHandler::OMP_MAP_IMPLICIT); 9931 CurInfo.Mappers.push_back(nullptr); 9932 } else { 9933 // If we have any information in the map clause, we use it, otherwise we 9934 // just do a default mapping. 9935 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9936 if (!CI->capturesThis()) 9937 MappedVarSet.insert(CI->getCapturedVar()); 9938 else 9939 MappedVarSet.insert(nullptr); 9940 if (CurInfo.BasePointers.empty()) 9941 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9942 // Generate correct mapping for variables captured by reference in 9943 // lambdas. 9944 if (CI->capturesVariable()) 9945 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9946 CurInfo, LambdaPointers); 9947 } 9948 // We expect to have at least an element of information for this capture. 9949 assert(!CurInfo.BasePointers.empty() && 9950 "Non-existing map pointer for capture!"); 9951 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9952 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9953 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9954 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9955 "Inconsistent map information sizes!"); 9956 9957 // If there is an entry in PartialStruct it means we have a struct with 9958 // individual members mapped. Emit an extra combined entry. 9959 if (PartialStruct.Base.isValid()) 9960 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 9961 9962 // We need to append the results of this capture to what we already have. 9963 CombinedInfo.append(CurInfo); 9964 } 9965 // Adjust MEMBER_OF flags for the lambdas captures. 9966 MEHandler.adjustMemberOfForLambdaCaptures( 9967 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 9968 CombinedInfo.Types); 9969 // Map any list items in a map clause that were not captures because they 9970 // weren't referenced within the construct. 9971 MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true, 9972 MappedVarSet); 9973 9974 TargetDataInfo Info; 9975 // Fill up the arrays and create the arguments. 9976 emitOffloadingArrays(CGF, CombinedInfo, Info); 9977 emitOffloadingArraysArgument( 9978 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 9979 Info.MapTypesArray, Info.MappersArray, Info, {/*ForEndTask=*/false}); 9980 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9981 InputInfo.BasePointersArray = 9982 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9983 InputInfo.PointersArray = 9984 Address(Info.PointersArray, CGM.getPointerAlign()); 9985 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9986 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 9987 MapTypesArray = Info.MapTypesArray; 9988 if (RequiresOuterTask) 9989 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9990 else 9991 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9992 }; 9993 9994 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9995 CodeGenFunction &CGF, PrePostActionTy &) { 9996 if (RequiresOuterTask) { 9997 CodeGenFunction::OMPTargetDataInfo InputInfo; 9998 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9999 } else { 10000 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10001 } 10002 }; 10003 10004 // If we have a target function ID it means that we need to support 10005 // offloading, otherwise, just execute on the host. We need to execute on host 10006 // regardless of the conditional in the if clause if, e.g., the user do not 10007 // specify target triples. 10008 if (OutlinedFnID) { 10009 if (IfCond) { 10010 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10011 } else { 10012 RegionCodeGenTy ThenRCG(TargetThenGen); 10013 ThenRCG(CGF); 10014 } 10015 } else { 10016 RegionCodeGenTy ElseRCG(TargetElseGen); 10017 ElseRCG(CGF); 10018 } 10019 } 10020 10021 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10022 StringRef ParentName) { 10023 if (!S) 10024 return; 10025 10026 // Codegen OMP target directives that offload compute to the device. 10027 bool RequiresDeviceCodegen = 10028 isa<OMPExecutableDirective>(S) && 10029 isOpenMPTargetExecutionDirective( 10030 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10031 10032 if (RequiresDeviceCodegen) { 10033 const auto &E = *cast<OMPExecutableDirective>(S); 10034 unsigned DeviceID; 10035 unsigned FileID; 10036 unsigned Line; 10037 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10038 FileID, Line); 10039 10040 // Is this a target region that should not be emitted as an entry point? If 10041 // so just signal we are done with this target region. 10042 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10043 ParentName, Line)) 10044 return; 10045 10046 switch (E.getDirectiveKind()) { 10047 case OMPD_target: 10048 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10049 cast<OMPTargetDirective>(E)); 10050 break; 10051 case OMPD_target_parallel: 10052 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10053 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10054 break; 10055 case OMPD_target_teams: 10056 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10057 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10058 break; 10059 case OMPD_target_teams_distribute: 10060 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10061 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10062 break; 10063 case OMPD_target_teams_distribute_simd: 10064 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10065 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10066 break; 10067 case OMPD_target_parallel_for: 10068 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10069 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10070 break; 10071 case OMPD_target_parallel_for_simd: 10072 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10073 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10074 break; 10075 case OMPD_target_simd: 10076 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10077 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10078 break; 10079 case OMPD_target_teams_distribute_parallel_for: 10080 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10081 CGM, ParentName, 10082 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10083 break; 10084 case OMPD_target_teams_distribute_parallel_for_simd: 10085 CodeGenFunction:: 10086 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10087 CGM, ParentName, 10088 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10089 break; 10090 case OMPD_parallel: 10091 case OMPD_for: 10092 case OMPD_parallel_for: 10093 case OMPD_parallel_master: 10094 case OMPD_parallel_sections: 10095 case OMPD_for_simd: 10096 case OMPD_parallel_for_simd: 10097 case OMPD_cancel: 10098 case OMPD_cancellation_point: 10099 case OMPD_ordered: 10100 case OMPD_threadprivate: 10101 case OMPD_allocate: 10102 case OMPD_task: 10103 case OMPD_simd: 10104 case OMPD_sections: 10105 case OMPD_section: 10106 case OMPD_single: 10107 case OMPD_master: 10108 case OMPD_critical: 10109 case OMPD_taskyield: 10110 case OMPD_barrier: 10111 case OMPD_taskwait: 10112 case OMPD_taskgroup: 10113 case OMPD_atomic: 10114 case OMPD_flush: 10115 case OMPD_depobj: 10116 case OMPD_scan: 10117 case OMPD_teams: 10118 case OMPD_target_data: 10119 case OMPD_target_exit_data: 10120 case OMPD_target_enter_data: 10121 case OMPD_distribute: 10122 case OMPD_distribute_simd: 10123 case OMPD_distribute_parallel_for: 10124 case OMPD_distribute_parallel_for_simd: 10125 case OMPD_teams_distribute: 10126 case OMPD_teams_distribute_simd: 10127 case OMPD_teams_distribute_parallel_for: 10128 case OMPD_teams_distribute_parallel_for_simd: 10129 case OMPD_target_update: 10130 case OMPD_declare_simd: 10131 case OMPD_declare_variant: 10132 case OMPD_begin_declare_variant: 10133 case OMPD_end_declare_variant: 10134 case OMPD_declare_target: 10135 case OMPD_end_declare_target: 10136 case OMPD_declare_reduction: 10137 case OMPD_declare_mapper: 10138 case OMPD_taskloop: 10139 case OMPD_taskloop_simd: 10140 case OMPD_master_taskloop: 10141 case OMPD_master_taskloop_simd: 10142 case OMPD_parallel_master_taskloop: 10143 case OMPD_parallel_master_taskloop_simd: 10144 case OMPD_requires: 10145 case OMPD_unknown: 10146 default: 10147 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10148 } 10149 return; 10150 } 10151 10152 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10153 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10154 return; 10155 10156 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10157 return; 10158 } 10159 10160 // If this is a lambda function, look into its body. 10161 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10162 S = L->getBody(); 10163 10164 // Keep looking for target regions recursively. 10165 for (const Stmt *II : S->children()) 10166 scanForTargetRegionsFunctions(II, ParentName); 10167 } 10168 10169 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10170 // If emitting code for the host, we do not process FD here. Instead we do 10171 // the normal code generation. 10172 if (!CGM.getLangOpts().OpenMPIsDevice) { 10173 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10174 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10175 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10176 // Do not emit device_type(nohost) functions for the host. 10177 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10178 return true; 10179 } 10180 return false; 10181 } 10182 10183 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10184 // Try to detect target regions in the function. 10185 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10186 StringRef Name = CGM.getMangledName(GD); 10187 scanForTargetRegionsFunctions(FD->getBody(), Name); 10188 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10189 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10190 // Do not emit device_type(nohost) functions for the host. 10191 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10192 return true; 10193 } 10194 10195 // Do not to emit function if it is not marked as declare target. 10196 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10197 AlreadyEmittedTargetDecls.count(VD) == 0; 10198 } 10199 10200 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10201 if (!CGM.getLangOpts().OpenMPIsDevice) 10202 return false; 10203 10204 // Check if there are Ctors/Dtors in this declaration and look for target 10205 // regions in it. We use the complete variant to produce the kernel name 10206 // mangling. 10207 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10208 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10209 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10210 StringRef ParentName = 10211 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10212 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10213 } 10214 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10215 StringRef ParentName = 10216 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10217 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10218 } 10219 } 10220 10221 // Do not to emit variable if it is not marked as declare target. 10222 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10223 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10224 cast<VarDecl>(GD.getDecl())); 10225 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10226 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10227 HasRequiresUnifiedSharedMemory)) { 10228 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10229 return true; 10230 } 10231 return false; 10232 } 10233 10234 llvm::Constant * 10235 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10236 const VarDecl *VD) { 10237 assert(VD->getType().isConstant(CGM.getContext()) && 10238 "Expected constant variable."); 10239 StringRef VarName; 10240 llvm::Constant *Addr; 10241 llvm::GlobalValue::LinkageTypes Linkage; 10242 QualType Ty = VD->getType(); 10243 SmallString<128> Buffer; 10244 { 10245 unsigned DeviceID; 10246 unsigned FileID; 10247 unsigned Line; 10248 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10249 FileID, Line); 10250 llvm::raw_svector_ostream OS(Buffer); 10251 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10252 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10253 VarName = OS.str(); 10254 } 10255 Linkage = llvm::GlobalValue::InternalLinkage; 10256 Addr = 10257 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10258 getDefaultFirstprivateAddressSpace()); 10259 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10260 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10261 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10262 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10263 VarName, Addr, VarSize, 10264 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10265 return Addr; 10266 } 10267 10268 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10269 llvm::Constant *Addr) { 10270 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10271 !CGM.getLangOpts().OpenMPIsDevice) 10272 return; 10273 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10274 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10275 if (!Res) { 10276 if (CGM.getLangOpts().OpenMPIsDevice) { 10277 // Register non-target variables being emitted in device code (debug info 10278 // may cause this). 10279 StringRef VarName = CGM.getMangledName(VD); 10280 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10281 } 10282 return; 10283 } 10284 // Register declare target variables. 10285 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10286 StringRef VarName; 10287 CharUnits VarSize; 10288 llvm::GlobalValue::LinkageTypes Linkage; 10289 10290 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10291 !HasRequiresUnifiedSharedMemory) { 10292 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10293 VarName = CGM.getMangledName(VD); 10294 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10295 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10296 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10297 } else { 10298 VarSize = CharUnits::Zero(); 10299 } 10300 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10301 // Temp solution to prevent optimizations of the internal variables. 10302 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10303 std::string RefName = getName({VarName, "ref"}); 10304 if (!CGM.GetGlobalValue(RefName)) { 10305 llvm::Constant *AddrRef = 10306 getOrCreateInternalVariable(Addr->getType(), RefName); 10307 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10308 GVAddrRef->setConstant(/*Val=*/true); 10309 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10310 GVAddrRef->setInitializer(Addr); 10311 CGM.addCompilerUsedGlobal(GVAddrRef); 10312 } 10313 } 10314 } else { 10315 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10316 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10317 HasRequiresUnifiedSharedMemory)) && 10318 "Declare target attribute must link or to with unified memory."); 10319 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10320 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10321 else 10322 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10323 10324 if (CGM.getLangOpts().OpenMPIsDevice) { 10325 VarName = Addr->getName(); 10326 Addr = nullptr; 10327 } else { 10328 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10329 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10330 } 10331 VarSize = CGM.getPointerSize(); 10332 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10333 } 10334 10335 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10336 VarName, Addr, VarSize, Flags, Linkage); 10337 } 10338 10339 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10340 if (isa<FunctionDecl>(GD.getDecl()) || 10341 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10342 return emitTargetFunctions(GD); 10343 10344 return emitTargetGlobalVariable(GD); 10345 } 10346 10347 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10348 for (const VarDecl *VD : DeferredGlobalVariables) { 10349 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10350 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10351 if (!Res) 10352 continue; 10353 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10354 !HasRequiresUnifiedSharedMemory) { 10355 CGM.EmitGlobal(VD); 10356 } else { 10357 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10358 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10359 HasRequiresUnifiedSharedMemory)) && 10360 "Expected link clause or to clause with unified memory."); 10361 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10362 } 10363 } 10364 } 10365 10366 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10367 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10368 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10369 " Expected target-based directive."); 10370 } 10371 10372 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10373 for (const OMPClause *Clause : D->clauselists()) { 10374 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10375 HasRequiresUnifiedSharedMemory = true; 10376 } else if (const auto *AC = 10377 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10378 switch (AC->getAtomicDefaultMemOrderKind()) { 10379 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10380 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10381 break; 10382 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10383 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10384 break; 10385 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10386 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10387 break; 10388 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10389 break; 10390 } 10391 } 10392 } 10393 } 10394 10395 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10396 return RequiresAtomicOrdering; 10397 } 10398 10399 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10400 LangAS &AS) { 10401 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10402 return false; 10403 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10404 switch(A->getAllocatorType()) { 10405 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10406 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10407 // Not supported, fallback to the default mem space. 10408 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10409 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10410 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10411 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10412 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10413 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10414 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10415 AS = LangAS::Default; 10416 return true; 10417 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10418 llvm_unreachable("Expected predefined allocator for the variables with the " 10419 "static storage."); 10420 } 10421 return false; 10422 } 10423 10424 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10425 return HasRequiresUnifiedSharedMemory; 10426 } 10427 10428 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10429 CodeGenModule &CGM) 10430 : CGM(CGM) { 10431 if (CGM.getLangOpts().OpenMPIsDevice) { 10432 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10433 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10434 } 10435 } 10436 10437 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10438 if (CGM.getLangOpts().OpenMPIsDevice) 10439 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10440 } 10441 10442 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10443 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10444 return true; 10445 10446 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10447 // Do not to emit function if it is marked as declare target as it was already 10448 // emitted. 10449 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10450 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10451 if (auto *F = dyn_cast_or_null<llvm::Function>( 10452 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10453 return !F->isDeclaration(); 10454 return false; 10455 } 10456 return true; 10457 } 10458 10459 return !AlreadyEmittedTargetDecls.insert(D).second; 10460 } 10461 10462 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10463 // If we don't have entries or if we are emitting code for the device, we 10464 // don't need to do anything. 10465 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10466 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10467 (OffloadEntriesInfoManager.empty() && 10468 !HasEmittedDeclareTargetRegion && 10469 !HasEmittedTargetRegion)) 10470 return nullptr; 10471 10472 // Create and register the function that handles the requires directives. 10473 ASTContext &C = CGM.getContext(); 10474 10475 llvm::Function *RequiresRegFn; 10476 { 10477 CodeGenFunction CGF(CGM); 10478 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10479 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10480 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10481 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10482 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10483 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10484 // TODO: check for other requires clauses. 10485 // The requires directive takes effect only when a target region is 10486 // present in the compilation unit. Otherwise it is ignored and not 10487 // passed to the runtime. This avoids the runtime from throwing an error 10488 // for mismatching requires clauses across compilation units that don't 10489 // contain at least 1 target region. 10490 assert((HasEmittedTargetRegion || 10491 HasEmittedDeclareTargetRegion || 10492 !OffloadEntriesInfoManager.empty()) && 10493 "Target or declare target region expected."); 10494 if (HasRequiresUnifiedSharedMemory) 10495 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10496 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10497 CGM.getModule(), OMPRTL___tgt_register_requires), 10498 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10499 CGF.FinishFunction(); 10500 } 10501 return RequiresRegFn; 10502 } 10503 10504 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10505 const OMPExecutableDirective &D, 10506 SourceLocation Loc, 10507 llvm::Function *OutlinedFn, 10508 ArrayRef<llvm::Value *> CapturedVars) { 10509 if (!CGF.HaveInsertPoint()) 10510 return; 10511 10512 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10513 CodeGenFunction::RunCleanupsScope Scope(CGF); 10514 10515 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10516 llvm::Value *Args[] = { 10517 RTLoc, 10518 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10519 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10520 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10521 RealArgs.append(std::begin(Args), std::end(Args)); 10522 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10523 10524 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10525 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10526 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10527 } 10528 10529 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10530 const Expr *NumTeams, 10531 const Expr *ThreadLimit, 10532 SourceLocation Loc) { 10533 if (!CGF.HaveInsertPoint()) 10534 return; 10535 10536 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10537 10538 llvm::Value *NumTeamsVal = 10539 NumTeams 10540 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10541 CGF.CGM.Int32Ty, /* isSigned = */ true) 10542 : CGF.Builder.getInt32(0); 10543 10544 llvm::Value *ThreadLimitVal = 10545 ThreadLimit 10546 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10547 CGF.CGM.Int32Ty, /* isSigned = */ true) 10548 : CGF.Builder.getInt32(0); 10549 10550 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10551 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10552 ThreadLimitVal}; 10553 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10554 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10555 PushNumTeamsArgs); 10556 } 10557 10558 void CGOpenMPRuntime::emitTargetDataCalls( 10559 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10560 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10561 if (!CGF.HaveInsertPoint()) 10562 return; 10563 10564 // Action used to replace the default codegen action and turn privatization 10565 // off. 10566 PrePostActionTy NoPrivAction; 10567 10568 // Generate the code for the opening of the data environment. Capture all the 10569 // arguments of the runtime call by reference because they are used in the 10570 // closing of the region. 10571 auto &&BeginThenGen = [this, &D, Device, &Info, 10572 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10573 // Fill up the arrays with all the mapped variables. 10574 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10575 10576 // Get map clause information. 10577 MappableExprsHandler MEHandler(D, CGF); 10578 MEHandler.generateAllInfo(CombinedInfo); 10579 10580 // Fill up the arrays and create the arguments. 10581 emitOffloadingArrays(CGF, CombinedInfo, Info, /*IsNonContiguous=*/true); 10582 10583 llvm::Value *BasePointersArrayArg = nullptr; 10584 llvm::Value *PointersArrayArg = nullptr; 10585 llvm::Value *SizesArrayArg = nullptr; 10586 llvm::Value *MapTypesArrayArg = nullptr; 10587 llvm::Value *MappersArrayArg = nullptr; 10588 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10589 SizesArrayArg, MapTypesArrayArg, 10590 MappersArrayArg, Info); 10591 10592 // Emit device ID if any. 10593 llvm::Value *DeviceID = nullptr; 10594 if (Device) { 10595 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10596 CGF.Int64Ty, /*isSigned=*/true); 10597 } else { 10598 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10599 } 10600 10601 // Emit the number of elements in the offloading arrays. 10602 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10603 10604 llvm::Value *OffloadingArgs[] = { 10605 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10606 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10607 CGF.EmitRuntimeCall( 10608 OMPBuilder.getOrCreateRuntimeFunction( 10609 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10610 OffloadingArgs); 10611 10612 // If device pointer privatization is required, emit the body of the region 10613 // here. It will have to be duplicated: with and without privatization. 10614 if (!Info.CaptureDeviceAddrMap.empty()) 10615 CodeGen(CGF); 10616 }; 10617 10618 // Generate code for the closing of the data region. 10619 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10620 PrePostActionTy &) { 10621 assert(Info.isValid() && "Invalid data environment closing arguments."); 10622 10623 llvm::Value *BasePointersArrayArg = nullptr; 10624 llvm::Value *PointersArrayArg = nullptr; 10625 llvm::Value *SizesArrayArg = nullptr; 10626 llvm::Value *MapTypesArrayArg = nullptr; 10627 llvm::Value *MappersArrayArg = nullptr; 10628 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10629 SizesArrayArg, MapTypesArrayArg, 10630 MappersArrayArg, Info, {/*ForEndCall=*/true}); 10631 10632 // Emit device ID if any. 10633 llvm::Value *DeviceID = nullptr; 10634 if (Device) { 10635 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10636 CGF.Int64Ty, /*isSigned=*/true); 10637 } else { 10638 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10639 } 10640 10641 // Emit the number of elements in the offloading arrays. 10642 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10643 10644 llvm::Value *OffloadingArgs[] = { 10645 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10646 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10647 CGF.EmitRuntimeCall( 10648 OMPBuilder.getOrCreateRuntimeFunction( 10649 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10650 OffloadingArgs); 10651 }; 10652 10653 // If we need device pointer privatization, we need to emit the body of the 10654 // region with no privatization in the 'else' branch of the conditional. 10655 // Otherwise, we don't have to do anything. 10656 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10657 PrePostActionTy &) { 10658 if (!Info.CaptureDeviceAddrMap.empty()) { 10659 CodeGen.setAction(NoPrivAction); 10660 CodeGen(CGF); 10661 } 10662 }; 10663 10664 // We don't have to do anything to close the region if the if clause evaluates 10665 // to false. 10666 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10667 10668 if (IfCond) { 10669 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10670 } else { 10671 RegionCodeGenTy RCG(BeginThenGen); 10672 RCG(CGF); 10673 } 10674 10675 // If we don't require privatization of device pointers, we emit the body in 10676 // between the runtime calls. This avoids duplicating the body code. 10677 if (Info.CaptureDeviceAddrMap.empty()) { 10678 CodeGen.setAction(NoPrivAction); 10679 CodeGen(CGF); 10680 } 10681 10682 if (IfCond) { 10683 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10684 } else { 10685 RegionCodeGenTy RCG(EndThenGen); 10686 RCG(CGF); 10687 } 10688 } 10689 10690 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10691 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10692 const Expr *Device) { 10693 if (!CGF.HaveInsertPoint()) 10694 return; 10695 10696 assert((isa<OMPTargetEnterDataDirective>(D) || 10697 isa<OMPTargetExitDataDirective>(D) || 10698 isa<OMPTargetUpdateDirective>(D)) && 10699 "Expecting either target enter, exit data, or update directives."); 10700 10701 CodeGenFunction::OMPTargetDataInfo InputInfo; 10702 llvm::Value *MapTypesArray = nullptr; 10703 // Generate the code for the opening of the data environment. 10704 auto &&ThenGen = [this, &D, Device, &InputInfo, 10705 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10706 // Emit device ID if any. 10707 llvm::Value *DeviceID = nullptr; 10708 if (Device) { 10709 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10710 CGF.Int64Ty, /*isSigned=*/true); 10711 } else { 10712 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10713 } 10714 10715 // Emit the number of elements in the offloading arrays. 10716 llvm::Constant *PointerNum = 10717 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10718 10719 llvm::Value *OffloadingArgs[] = {DeviceID, 10720 PointerNum, 10721 InputInfo.BasePointersArray.getPointer(), 10722 InputInfo.PointersArray.getPointer(), 10723 InputInfo.SizesArray.getPointer(), 10724 MapTypesArray, 10725 InputInfo.MappersArray.getPointer()}; 10726 10727 // Select the right runtime function call for each standalone 10728 // directive. 10729 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10730 RuntimeFunction RTLFn; 10731 switch (D.getDirectiveKind()) { 10732 case OMPD_target_enter_data: 10733 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10734 : OMPRTL___tgt_target_data_begin_mapper; 10735 break; 10736 case OMPD_target_exit_data: 10737 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10738 : OMPRTL___tgt_target_data_end_mapper; 10739 break; 10740 case OMPD_target_update: 10741 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10742 : OMPRTL___tgt_target_data_update_mapper; 10743 break; 10744 case OMPD_parallel: 10745 case OMPD_for: 10746 case OMPD_parallel_for: 10747 case OMPD_parallel_master: 10748 case OMPD_parallel_sections: 10749 case OMPD_for_simd: 10750 case OMPD_parallel_for_simd: 10751 case OMPD_cancel: 10752 case OMPD_cancellation_point: 10753 case OMPD_ordered: 10754 case OMPD_threadprivate: 10755 case OMPD_allocate: 10756 case OMPD_task: 10757 case OMPD_simd: 10758 case OMPD_sections: 10759 case OMPD_section: 10760 case OMPD_single: 10761 case OMPD_master: 10762 case OMPD_critical: 10763 case OMPD_taskyield: 10764 case OMPD_barrier: 10765 case OMPD_taskwait: 10766 case OMPD_taskgroup: 10767 case OMPD_atomic: 10768 case OMPD_flush: 10769 case OMPD_depobj: 10770 case OMPD_scan: 10771 case OMPD_teams: 10772 case OMPD_target_data: 10773 case OMPD_distribute: 10774 case OMPD_distribute_simd: 10775 case OMPD_distribute_parallel_for: 10776 case OMPD_distribute_parallel_for_simd: 10777 case OMPD_teams_distribute: 10778 case OMPD_teams_distribute_simd: 10779 case OMPD_teams_distribute_parallel_for: 10780 case OMPD_teams_distribute_parallel_for_simd: 10781 case OMPD_declare_simd: 10782 case OMPD_declare_variant: 10783 case OMPD_begin_declare_variant: 10784 case OMPD_end_declare_variant: 10785 case OMPD_declare_target: 10786 case OMPD_end_declare_target: 10787 case OMPD_declare_reduction: 10788 case OMPD_declare_mapper: 10789 case OMPD_taskloop: 10790 case OMPD_taskloop_simd: 10791 case OMPD_master_taskloop: 10792 case OMPD_master_taskloop_simd: 10793 case OMPD_parallel_master_taskloop: 10794 case OMPD_parallel_master_taskloop_simd: 10795 case OMPD_target: 10796 case OMPD_target_simd: 10797 case OMPD_target_teams_distribute: 10798 case OMPD_target_teams_distribute_simd: 10799 case OMPD_target_teams_distribute_parallel_for: 10800 case OMPD_target_teams_distribute_parallel_for_simd: 10801 case OMPD_target_teams: 10802 case OMPD_target_parallel: 10803 case OMPD_target_parallel_for: 10804 case OMPD_target_parallel_for_simd: 10805 case OMPD_requires: 10806 case OMPD_unknown: 10807 default: 10808 llvm_unreachable("Unexpected standalone target data directive."); 10809 break; 10810 } 10811 CGF.EmitRuntimeCall( 10812 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10813 OffloadingArgs); 10814 }; 10815 10816 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10817 CodeGenFunction &CGF, PrePostActionTy &) { 10818 // Fill up the arrays with all the mapped variables. 10819 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10820 10821 // Get map clause information. 10822 MappableExprsHandler MEHandler(D, CGF); 10823 MEHandler.generateAllInfo(CombinedInfo); 10824 10825 TargetDataInfo Info; 10826 // Fill up the arrays and create the arguments. 10827 emitOffloadingArrays(CGF, CombinedInfo, Info, /*IsNonContiguous=*/true); 10828 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10829 D.hasClausesOfKind<OMPNowaitClause>(); 10830 emitOffloadingArraysArgument( 10831 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10832 Info.MapTypesArray, Info.MappersArray, Info, {/*ForEndTask=*/false}); 10833 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10834 InputInfo.BasePointersArray = 10835 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10836 InputInfo.PointersArray = 10837 Address(Info.PointersArray, CGM.getPointerAlign()); 10838 InputInfo.SizesArray = 10839 Address(Info.SizesArray, CGM.getPointerAlign()); 10840 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10841 MapTypesArray = Info.MapTypesArray; 10842 if (RequiresOuterTask) 10843 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10844 else 10845 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10846 }; 10847 10848 if (IfCond) { 10849 emitIfClause(CGF, IfCond, TargetThenGen, 10850 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10851 } else { 10852 RegionCodeGenTy ThenRCG(TargetThenGen); 10853 ThenRCG(CGF); 10854 } 10855 } 10856 10857 namespace { 10858 /// Kind of parameter in a function with 'declare simd' directive. 10859 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10860 /// Attribute set of the parameter. 10861 struct ParamAttrTy { 10862 ParamKindTy Kind = Vector; 10863 llvm::APSInt StrideOrArg; 10864 llvm::APSInt Alignment; 10865 }; 10866 } // namespace 10867 10868 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10869 ArrayRef<ParamAttrTy> ParamAttrs) { 10870 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10871 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10872 // of that clause. The VLEN value must be power of 2. 10873 // In other case the notion of the function`s "characteristic data type" (CDT) 10874 // is used to compute the vector length. 10875 // CDT is defined in the following order: 10876 // a) For non-void function, the CDT is the return type. 10877 // b) If the function has any non-uniform, non-linear parameters, then the 10878 // CDT is the type of the first such parameter. 10879 // c) If the CDT determined by a) or b) above is struct, union, or class 10880 // type which is pass-by-value (except for the type that maps to the 10881 // built-in complex data type), the characteristic data type is int. 10882 // d) If none of the above three cases is applicable, the CDT is int. 10883 // The VLEN is then determined based on the CDT and the size of vector 10884 // register of that ISA for which current vector version is generated. The 10885 // VLEN is computed using the formula below: 10886 // VLEN = sizeof(vector_register) / sizeof(CDT), 10887 // where vector register size specified in section 3.2.1 Registers and the 10888 // Stack Frame of original AMD64 ABI document. 10889 QualType RetType = FD->getReturnType(); 10890 if (RetType.isNull()) 10891 return 0; 10892 ASTContext &C = FD->getASTContext(); 10893 QualType CDT; 10894 if (!RetType.isNull() && !RetType->isVoidType()) { 10895 CDT = RetType; 10896 } else { 10897 unsigned Offset = 0; 10898 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10899 if (ParamAttrs[Offset].Kind == Vector) 10900 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10901 ++Offset; 10902 } 10903 if (CDT.isNull()) { 10904 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10905 if (ParamAttrs[I + Offset].Kind == Vector) { 10906 CDT = FD->getParamDecl(I)->getType(); 10907 break; 10908 } 10909 } 10910 } 10911 } 10912 if (CDT.isNull()) 10913 CDT = C.IntTy; 10914 CDT = CDT->getCanonicalTypeUnqualified(); 10915 if (CDT->isRecordType() || CDT->isUnionType()) 10916 CDT = C.IntTy; 10917 return C.getTypeSize(CDT); 10918 } 10919 10920 static void 10921 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10922 const llvm::APSInt &VLENVal, 10923 ArrayRef<ParamAttrTy> ParamAttrs, 10924 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10925 struct ISADataTy { 10926 char ISA; 10927 unsigned VecRegSize; 10928 }; 10929 ISADataTy ISAData[] = { 10930 { 10931 'b', 128 10932 }, // SSE 10933 { 10934 'c', 256 10935 }, // AVX 10936 { 10937 'd', 256 10938 }, // AVX2 10939 { 10940 'e', 512 10941 }, // AVX512 10942 }; 10943 llvm::SmallVector<char, 2> Masked; 10944 switch (State) { 10945 case OMPDeclareSimdDeclAttr::BS_Undefined: 10946 Masked.push_back('N'); 10947 Masked.push_back('M'); 10948 break; 10949 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10950 Masked.push_back('N'); 10951 break; 10952 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10953 Masked.push_back('M'); 10954 break; 10955 } 10956 for (char Mask : Masked) { 10957 for (const ISADataTy &Data : ISAData) { 10958 SmallString<256> Buffer; 10959 llvm::raw_svector_ostream Out(Buffer); 10960 Out << "_ZGV" << Data.ISA << Mask; 10961 if (!VLENVal) { 10962 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10963 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10964 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10965 } else { 10966 Out << VLENVal; 10967 } 10968 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10969 switch (ParamAttr.Kind){ 10970 case LinearWithVarStride: 10971 Out << 's' << ParamAttr.StrideOrArg; 10972 break; 10973 case Linear: 10974 Out << 'l'; 10975 if (ParamAttr.StrideOrArg != 1) 10976 Out << ParamAttr.StrideOrArg; 10977 break; 10978 case Uniform: 10979 Out << 'u'; 10980 break; 10981 case Vector: 10982 Out << 'v'; 10983 break; 10984 } 10985 if (!!ParamAttr.Alignment) 10986 Out << 'a' << ParamAttr.Alignment; 10987 } 10988 Out << '_' << Fn->getName(); 10989 Fn->addFnAttr(Out.str()); 10990 } 10991 } 10992 } 10993 10994 // This are the Functions that are needed to mangle the name of the 10995 // vector functions generated by the compiler, according to the rules 10996 // defined in the "Vector Function ABI specifications for AArch64", 10997 // available at 10998 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10999 11000 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11001 /// 11002 /// TODO: Need to implement the behavior for reference marked with a 11003 /// var or no linear modifiers (1.b in the section). For this, we 11004 /// need to extend ParamKindTy to support the linear modifiers. 11005 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11006 QT = QT.getCanonicalType(); 11007 11008 if (QT->isVoidType()) 11009 return false; 11010 11011 if (Kind == ParamKindTy::Uniform) 11012 return false; 11013 11014 if (Kind == ParamKindTy::Linear) 11015 return false; 11016 11017 // TODO: Handle linear references with modifiers 11018 11019 if (Kind == ParamKindTy::LinearWithVarStride) 11020 return false; 11021 11022 return true; 11023 } 11024 11025 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11026 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11027 QT = QT.getCanonicalType(); 11028 unsigned Size = C.getTypeSize(QT); 11029 11030 // Only scalars and complex within 16 bytes wide set PVB to true. 11031 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11032 return false; 11033 11034 if (QT->isFloatingType()) 11035 return true; 11036 11037 if (QT->isIntegerType()) 11038 return true; 11039 11040 if (QT->isPointerType()) 11041 return true; 11042 11043 // TODO: Add support for complex types (section 3.1.2, item 2). 11044 11045 return false; 11046 } 11047 11048 /// Computes the lane size (LS) of a return type or of an input parameter, 11049 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11050 /// TODO: Add support for references, section 3.2.1, item 1. 11051 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11052 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11053 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11054 if (getAArch64PBV(PTy, C)) 11055 return C.getTypeSize(PTy); 11056 } 11057 if (getAArch64PBV(QT, C)) 11058 return C.getTypeSize(QT); 11059 11060 return C.getTypeSize(C.getUIntPtrType()); 11061 } 11062 11063 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11064 // signature of the scalar function, as defined in 3.2.2 of the 11065 // AAVFABI. 11066 static std::tuple<unsigned, unsigned, bool> 11067 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11068 QualType RetType = FD->getReturnType().getCanonicalType(); 11069 11070 ASTContext &C = FD->getASTContext(); 11071 11072 bool OutputBecomesInput = false; 11073 11074 llvm::SmallVector<unsigned, 8> Sizes; 11075 if (!RetType->isVoidType()) { 11076 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11077 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11078 OutputBecomesInput = true; 11079 } 11080 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11081 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11082 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11083 } 11084 11085 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11086 // The LS of a function parameter / return value can only be a power 11087 // of 2, starting from 8 bits, up to 128. 11088 assert(std::all_of(Sizes.begin(), Sizes.end(), 11089 [](unsigned Size) { 11090 return Size == 8 || Size == 16 || Size == 32 || 11091 Size == 64 || Size == 128; 11092 }) && 11093 "Invalid size"); 11094 11095 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11096 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11097 OutputBecomesInput); 11098 } 11099 11100 /// Mangle the parameter part of the vector function name according to 11101 /// their OpenMP classification. The mangling function is defined in 11102 /// section 3.5 of the AAVFABI. 11103 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11104 SmallString<256> Buffer; 11105 llvm::raw_svector_ostream Out(Buffer); 11106 for (const auto &ParamAttr : ParamAttrs) { 11107 switch (ParamAttr.Kind) { 11108 case LinearWithVarStride: 11109 Out << "ls" << ParamAttr.StrideOrArg; 11110 break; 11111 case Linear: 11112 Out << 'l'; 11113 // Don't print the step value if it is not present or if it is 11114 // equal to 1. 11115 if (ParamAttr.StrideOrArg != 1) 11116 Out << ParamAttr.StrideOrArg; 11117 break; 11118 case Uniform: 11119 Out << 'u'; 11120 break; 11121 case Vector: 11122 Out << 'v'; 11123 break; 11124 } 11125 11126 if (!!ParamAttr.Alignment) 11127 Out << 'a' << ParamAttr.Alignment; 11128 } 11129 11130 return std::string(Out.str()); 11131 } 11132 11133 // Function used to add the attribute. The parameter `VLEN` is 11134 // templated to allow the use of "x" when targeting scalable functions 11135 // for SVE. 11136 template <typename T> 11137 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11138 char ISA, StringRef ParSeq, 11139 StringRef MangledName, bool OutputBecomesInput, 11140 llvm::Function *Fn) { 11141 SmallString<256> Buffer; 11142 llvm::raw_svector_ostream Out(Buffer); 11143 Out << Prefix << ISA << LMask << VLEN; 11144 if (OutputBecomesInput) 11145 Out << "v"; 11146 Out << ParSeq << "_" << MangledName; 11147 Fn->addFnAttr(Out.str()); 11148 } 11149 11150 // Helper function to generate the Advanced SIMD names depending on 11151 // the value of the NDS when simdlen is not present. 11152 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11153 StringRef Prefix, char ISA, 11154 StringRef ParSeq, StringRef MangledName, 11155 bool OutputBecomesInput, 11156 llvm::Function *Fn) { 11157 switch (NDS) { 11158 case 8: 11159 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11160 OutputBecomesInput, Fn); 11161 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11162 OutputBecomesInput, Fn); 11163 break; 11164 case 16: 11165 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11166 OutputBecomesInput, Fn); 11167 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11168 OutputBecomesInput, Fn); 11169 break; 11170 case 32: 11171 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11172 OutputBecomesInput, Fn); 11173 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11174 OutputBecomesInput, Fn); 11175 break; 11176 case 64: 11177 case 128: 11178 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11179 OutputBecomesInput, Fn); 11180 break; 11181 default: 11182 llvm_unreachable("Scalar type is too wide."); 11183 } 11184 } 11185 11186 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11187 static void emitAArch64DeclareSimdFunction( 11188 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11189 ArrayRef<ParamAttrTy> ParamAttrs, 11190 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11191 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11192 11193 // Get basic data for building the vector signature. 11194 const auto Data = getNDSWDS(FD, ParamAttrs); 11195 const unsigned NDS = std::get<0>(Data); 11196 const unsigned WDS = std::get<1>(Data); 11197 const bool OutputBecomesInput = std::get<2>(Data); 11198 11199 // Check the values provided via `simdlen` by the user. 11200 // 1. A `simdlen(1)` doesn't produce vector signatures, 11201 if (UserVLEN == 1) { 11202 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11203 DiagnosticsEngine::Warning, 11204 "The clause simdlen(1) has no effect when targeting aarch64."); 11205 CGM.getDiags().Report(SLoc, DiagID); 11206 return; 11207 } 11208 11209 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11210 // Advanced SIMD output. 11211 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11212 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11213 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11214 "power of 2 when targeting Advanced SIMD."); 11215 CGM.getDiags().Report(SLoc, DiagID); 11216 return; 11217 } 11218 11219 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11220 // limits. 11221 if (ISA == 's' && UserVLEN != 0) { 11222 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11223 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11224 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11225 "lanes in the architectural constraints " 11226 "for SVE (min is 128-bit, max is " 11227 "2048-bit, by steps of 128-bit)"); 11228 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11229 return; 11230 } 11231 } 11232 11233 // Sort out parameter sequence. 11234 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11235 StringRef Prefix = "_ZGV"; 11236 // Generate simdlen from user input (if any). 11237 if (UserVLEN) { 11238 if (ISA == 's') { 11239 // SVE generates only a masked function. 11240 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11241 OutputBecomesInput, Fn); 11242 } else { 11243 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11244 // Advanced SIMD generates one or two functions, depending on 11245 // the `[not]inbranch` clause. 11246 switch (State) { 11247 case OMPDeclareSimdDeclAttr::BS_Undefined: 11248 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11249 OutputBecomesInput, Fn); 11250 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11251 OutputBecomesInput, Fn); 11252 break; 11253 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11254 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11255 OutputBecomesInput, Fn); 11256 break; 11257 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11258 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11259 OutputBecomesInput, Fn); 11260 break; 11261 } 11262 } 11263 } else { 11264 // If no user simdlen is provided, follow the AAVFABI rules for 11265 // generating the vector length. 11266 if (ISA == 's') { 11267 // SVE, section 3.4.1, item 1. 11268 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11269 OutputBecomesInput, Fn); 11270 } else { 11271 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11272 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11273 // two vector names depending on the use of the clause 11274 // `[not]inbranch`. 11275 switch (State) { 11276 case OMPDeclareSimdDeclAttr::BS_Undefined: 11277 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11278 OutputBecomesInput, Fn); 11279 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11280 OutputBecomesInput, Fn); 11281 break; 11282 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11283 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11284 OutputBecomesInput, Fn); 11285 break; 11286 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11287 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11288 OutputBecomesInput, Fn); 11289 break; 11290 } 11291 } 11292 } 11293 } 11294 11295 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11296 llvm::Function *Fn) { 11297 ASTContext &C = CGM.getContext(); 11298 FD = FD->getMostRecentDecl(); 11299 // Map params to their positions in function decl. 11300 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11301 if (isa<CXXMethodDecl>(FD)) 11302 ParamPositions.try_emplace(FD, 0); 11303 unsigned ParamPos = ParamPositions.size(); 11304 for (const ParmVarDecl *P : FD->parameters()) { 11305 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11306 ++ParamPos; 11307 } 11308 while (FD) { 11309 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11310 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11311 // Mark uniform parameters. 11312 for (const Expr *E : Attr->uniforms()) { 11313 E = E->IgnoreParenImpCasts(); 11314 unsigned Pos; 11315 if (isa<CXXThisExpr>(E)) { 11316 Pos = ParamPositions[FD]; 11317 } else { 11318 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11319 ->getCanonicalDecl(); 11320 Pos = ParamPositions[PVD]; 11321 } 11322 ParamAttrs[Pos].Kind = Uniform; 11323 } 11324 // Get alignment info. 11325 auto NI = Attr->alignments_begin(); 11326 for (const Expr *E : Attr->aligneds()) { 11327 E = E->IgnoreParenImpCasts(); 11328 unsigned Pos; 11329 QualType ParmTy; 11330 if (isa<CXXThisExpr>(E)) { 11331 Pos = ParamPositions[FD]; 11332 ParmTy = E->getType(); 11333 } else { 11334 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11335 ->getCanonicalDecl(); 11336 Pos = ParamPositions[PVD]; 11337 ParmTy = PVD->getType(); 11338 } 11339 ParamAttrs[Pos].Alignment = 11340 (*NI) 11341 ? (*NI)->EvaluateKnownConstInt(C) 11342 : llvm::APSInt::getUnsigned( 11343 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11344 .getQuantity()); 11345 ++NI; 11346 } 11347 // Mark linear parameters. 11348 auto SI = Attr->steps_begin(); 11349 auto MI = Attr->modifiers_begin(); 11350 for (const Expr *E : Attr->linears()) { 11351 E = E->IgnoreParenImpCasts(); 11352 unsigned Pos; 11353 // Rescaling factor needed to compute the linear parameter 11354 // value in the mangled name. 11355 unsigned PtrRescalingFactor = 1; 11356 if (isa<CXXThisExpr>(E)) { 11357 Pos = ParamPositions[FD]; 11358 } else { 11359 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11360 ->getCanonicalDecl(); 11361 Pos = ParamPositions[PVD]; 11362 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11363 PtrRescalingFactor = CGM.getContext() 11364 .getTypeSizeInChars(P->getPointeeType()) 11365 .getQuantity(); 11366 } 11367 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11368 ParamAttr.Kind = Linear; 11369 // Assuming a stride of 1, for `linear` without modifiers. 11370 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11371 if (*SI) { 11372 Expr::EvalResult Result; 11373 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11374 if (const auto *DRE = 11375 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11376 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11377 ParamAttr.Kind = LinearWithVarStride; 11378 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11379 ParamPositions[StridePVD->getCanonicalDecl()]); 11380 } 11381 } 11382 } else { 11383 ParamAttr.StrideOrArg = Result.Val.getInt(); 11384 } 11385 } 11386 // If we are using a linear clause on a pointer, we need to 11387 // rescale the value of linear_step with the byte size of the 11388 // pointee type. 11389 if (Linear == ParamAttr.Kind) 11390 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11391 ++SI; 11392 ++MI; 11393 } 11394 llvm::APSInt VLENVal; 11395 SourceLocation ExprLoc; 11396 const Expr *VLENExpr = Attr->getSimdlen(); 11397 if (VLENExpr) { 11398 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11399 ExprLoc = VLENExpr->getExprLoc(); 11400 } 11401 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11402 if (CGM.getTriple().isX86()) { 11403 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11404 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11405 unsigned VLEN = VLENVal.getExtValue(); 11406 StringRef MangledName = Fn->getName(); 11407 if (CGM.getTarget().hasFeature("sve")) 11408 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11409 MangledName, 's', 128, Fn, ExprLoc); 11410 if (CGM.getTarget().hasFeature("neon")) 11411 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11412 MangledName, 'n', 128, Fn, ExprLoc); 11413 } 11414 } 11415 FD = FD->getPreviousDecl(); 11416 } 11417 } 11418 11419 namespace { 11420 /// Cleanup action for doacross support. 11421 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11422 public: 11423 static const int DoacrossFinArgs = 2; 11424 11425 private: 11426 llvm::FunctionCallee RTLFn; 11427 llvm::Value *Args[DoacrossFinArgs]; 11428 11429 public: 11430 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11431 ArrayRef<llvm::Value *> CallArgs) 11432 : RTLFn(RTLFn) { 11433 assert(CallArgs.size() == DoacrossFinArgs); 11434 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11435 } 11436 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11437 if (!CGF.HaveInsertPoint()) 11438 return; 11439 CGF.EmitRuntimeCall(RTLFn, Args); 11440 } 11441 }; 11442 } // namespace 11443 11444 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11445 const OMPLoopDirective &D, 11446 ArrayRef<Expr *> NumIterations) { 11447 if (!CGF.HaveInsertPoint()) 11448 return; 11449 11450 ASTContext &C = CGM.getContext(); 11451 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11452 RecordDecl *RD; 11453 if (KmpDimTy.isNull()) { 11454 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11455 // kmp_int64 lo; // lower 11456 // kmp_int64 up; // upper 11457 // kmp_int64 st; // stride 11458 // }; 11459 RD = C.buildImplicitRecord("kmp_dim"); 11460 RD->startDefinition(); 11461 addFieldToRecordDecl(C, RD, Int64Ty); 11462 addFieldToRecordDecl(C, RD, Int64Ty); 11463 addFieldToRecordDecl(C, RD, Int64Ty); 11464 RD->completeDefinition(); 11465 KmpDimTy = C.getRecordType(RD); 11466 } else { 11467 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11468 } 11469 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11470 QualType ArrayTy = 11471 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11472 11473 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11474 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11475 enum { LowerFD = 0, UpperFD, StrideFD }; 11476 // Fill dims with data. 11477 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11478 LValue DimsLVal = CGF.MakeAddrLValue( 11479 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11480 // dims.upper = num_iterations; 11481 LValue UpperLVal = CGF.EmitLValueForField( 11482 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11483 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11484 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11485 Int64Ty, NumIterations[I]->getExprLoc()); 11486 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11487 // dims.stride = 1; 11488 LValue StrideLVal = CGF.EmitLValueForField( 11489 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11490 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11491 StrideLVal); 11492 } 11493 11494 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11495 // kmp_int32 num_dims, struct kmp_dim * dims); 11496 llvm::Value *Args[] = { 11497 emitUpdateLocation(CGF, D.getBeginLoc()), 11498 getThreadID(CGF, D.getBeginLoc()), 11499 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11500 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11501 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11502 CGM.VoidPtrTy)}; 11503 11504 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11505 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11506 CGF.EmitRuntimeCall(RTLFn, Args); 11507 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11508 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11509 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11510 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11511 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11512 llvm::makeArrayRef(FiniArgs)); 11513 } 11514 11515 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11516 const OMPDependClause *C) { 11517 QualType Int64Ty = 11518 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11519 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11520 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11521 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11522 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11523 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11524 const Expr *CounterVal = C->getLoopData(I); 11525 assert(CounterVal); 11526 llvm::Value *CntVal = CGF.EmitScalarConversion( 11527 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11528 CounterVal->getExprLoc()); 11529 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11530 /*Volatile=*/false, Int64Ty); 11531 } 11532 llvm::Value *Args[] = { 11533 emitUpdateLocation(CGF, C->getBeginLoc()), 11534 getThreadID(CGF, C->getBeginLoc()), 11535 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11536 llvm::FunctionCallee RTLFn; 11537 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11538 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11539 OMPRTL___kmpc_doacross_post); 11540 } else { 11541 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11542 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11543 OMPRTL___kmpc_doacross_wait); 11544 } 11545 CGF.EmitRuntimeCall(RTLFn, Args); 11546 } 11547 11548 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11549 llvm::FunctionCallee Callee, 11550 ArrayRef<llvm::Value *> Args) const { 11551 assert(Loc.isValid() && "Outlined function call location must be valid."); 11552 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11553 11554 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11555 if (Fn->doesNotThrow()) { 11556 CGF.EmitNounwindRuntimeCall(Fn, Args); 11557 return; 11558 } 11559 } 11560 CGF.EmitRuntimeCall(Callee, Args); 11561 } 11562 11563 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11564 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11565 ArrayRef<llvm::Value *> Args) const { 11566 emitCall(CGF, Loc, OutlinedFn, Args); 11567 } 11568 11569 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11570 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11571 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11572 HasEmittedDeclareTargetRegion = true; 11573 } 11574 11575 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11576 const VarDecl *NativeParam, 11577 const VarDecl *TargetParam) const { 11578 return CGF.GetAddrOfLocalVar(NativeParam); 11579 } 11580 11581 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11582 const VarDecl *VD) { 11583 if (!VD) 11584 return Address::invalid(); 11585 Address UntiedAddr = Address::invalid(); 11586 Address UntiedRealAddr = Address::invalid(); 11587 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11588 if (It != FunctionToUntiedTaskStackMap.end()) { 11589 const UntiedLocalVarsAddressesMap &UntiedData = 11590 UntiedLocalVarsStack[It->second]; 11591 auto I = UntiedData.find(VD); 11592 if (I != UntiedData.end()) { 11593 UntiedAddr = I->second.first; 11594 UntiedRealAddr = I->second.second; 11595 } 11596 } 11597 const VarDecl *CVD = VD->getCanonicalDecl(); 11598 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11599 // Use the default allocation. 11600 if (!isAllocatableDecl(VD)) 11601 return UntiedAddr; 11602 llvm::Value *Size; 11603 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11604 if (CVD->getType()->isVariablyModifiedType()) { 11605 Size = CGF.getTypeSize(CVD->getType()); 11606 // Align the size: ((size + align - 1) / align) * align 11607 Size = CGF.Builder.CreateNUWAdd( 11608 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11609 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11610 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11611 } else { 11612 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11613 Size = CGM.getSize(Sz.alignTo(Align)); 11614 } 11615 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11616 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11617 assert(AA->getAllocator() && 11618 "Expected allocator expression for non-default allocator."); 11619 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11620 // According to the standard, the original allocator type is a enum 11621 // (integer). Convert to pointer type, if required. 11622 Allocator = CGF.EmitScalarConversion( 11623 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11624 AA->getAllocator()->getExprLoc()); 11625 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11626 11627 llvm::Value *Addr = 11628 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11629 CGM.getModule(), OMPRTL___kmpc_alloc), 11630 Args, getName({CVD->getName(), ".void.addr"})); 11631 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11632 CGM.getModule(), OMPRTL___kmpc_free); 11633 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11634 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11635 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11636 if (UntiedAddr.isValid()) 11637 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11638 11639 // Cleanup action for allocate support. 11640 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11641 llvm::FunctionCallee RTLFn; 11642 unsigned LocEncoding; 11643 Address Addr; 11644 const Expr *Allocator; 11645 11646 public: 11647 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11648 Address Addr, const Expr *Allocator) 11649 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11650 Allocator(Allocator) {} 11651 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11652 if (!CGF.HaveInsertPoint()) 11653 return; 11654 llvm::Value *Args[3]; 11655 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11656 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11657 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11658 Addr.getPointer(), CGF.VoidPtrTy); 11659 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11660 // According to the standard, the original allocator type is a enum 11661 // (integer). Convert to pointer type, if required. 11662 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11663 CGF.getContext().VoidPtrTy, 11664 Allocator->getExprLoc()); 11665 Args[2] = AllocVal; 11666 11667 CGF.EmitRuntimeCall(RTLFn, Args); 11668 } 11669 }; 11670 Address VDAddr = 11671 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11672 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11673 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11674 VDAddr, AA->getAllocator()); 11675 if (UntiedRealAddr.isValid()) 11676 if (auto *Region = 11677 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11678 Region->emitUntiedSwitch(CGF); 11679 return VDAddr; 11680 } 11681 return UntiedAddr; 11682 } 11683 11684 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11685 const VarDecl *VD) const { 11686 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11687 if (It == FunctionToUntiedTaskStackMap.end()) 11688 return false; 11689 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11690 } 11691 11692 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11693 CodeGenModule &CGM, const OMPLoopDirective &S) 11694 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11695 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11696 if (!NeedToPush) 11697 return; 11698 NontemporalDeclsSet &DS = 11699 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11700 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11701 for (const Stmt *Ref : C->private_refs()) { 11702 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11703 const ValueDecl *VD; 11704 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11705 VD = DRE->getDecl(); 11706 } else { 11707 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11708 assert((ME->isImplicitCXXThis() || 11709 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11710 "Expected member of current class."); 11711 VD = ME->getMemberDecl(); 11712 } 11713 DS.insert(VD); 11714 } 11715 } 11716 } 11717 11718 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11719 if (!NeedToPush) 11720 return; 11721 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11722 } 11723 11724 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11725 CodeGenFunction &CGF, 11726 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 11727 std::pair<Address, Address>> &LocalVars) 11728 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11729 if (!NeedToPush) 11730 return; 11731 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11732 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11733 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11734 } 11735 11736 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11737 if (!NeedToPush) 11738 return; 11739 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11740 } 11741 11742 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11743 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11744 11745 return llvm::any_of( 11746 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11747 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11748 } 11749 11750 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11751 const OMPExecutableDirective &S, 11752 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11753 const { 11754 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11755 // Vars in target/task regions must be excluded completely. 11756 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11757 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11758 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11759 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11760 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11761 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11762 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11763 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11764 } 11765 } 11766 // Exclude vars in private clauses. 11767 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11768 for (const Expr *Ref : C->varlists()) { 11769 if (!Ref->getType()->isScalarType()) 11770 continue; 11771 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11772 if (!DRE) 11773 continue; 11774 NeedToCheckForLPCs.insert(DRE->getDecl()); 11775 } 11776 } 11777 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11778 for (const Expr *Ref : C->varlists()) { 11779 if (!Ref->getType()->isScalarType()) 11780 continue; 11781 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11782 if (!DRE) 11783 continue; 11784 NeedToCheckForLPCs.insert(DRE->getDecl()); 11785 } 11786 } 11787 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11788 for (const Expr *Ref : C->varlists()) { 11789 if (!Ref->getType()->isScalarType()) 11790 continue; 11791 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11792 if (!DRE) 11793 continue; 11794 NeedToCheckForLPCs.insert(DRE->getDecl()); 11795 } 11796 } 11797 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11798 for (const Expr *Ref : C->varlists()) { 11799 if (!Ref->getType()->isScalarType()) 11800 continue; 11801 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11802 if (!DRE) 11803 continue; 11804 NeedToCheckForLPCs.insert(DRE->getDecl()); 11805 } 11806 } 11807 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11808 for (const Expr *Ref : C->varlists()) { 11809 if (!Ref->getType()->isScalarType()) 11810 continue; 11811 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11812 if (!DRE) 11813 continue; 11814 NeedToCheckForLPCs.insert(DRE->getDecl()); 11815 } 11816 } 11817 for (const Decl *VD : NeedToCheckForLPCs) { 11818 for (const LastprivateConditionalData &Data : 11819 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11820 if (Data.DeclToUniqueName.count(VD) > 0) { 11821 if (!Data.Disabled) 11822 NeedToAddForLPCsAsDisabled.insert(VD); 11823 break; 11824 } 11825 } 11826 } 11827 } 11828 11829 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11830 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11831 : CGM(CGF.CGM), 11832 Action((CGM.getLangOpts().OpenMP >= 50 && 11833 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11834 [](const OMPLastprivateClause *C) { 11835 return C->getKind() == 11836 OMPC_LASTPRIVATE_conditional; 11837 })) 11838 ? ActionToDo::PushAsLastprivateConditional 11839 : ActionToDo::DoNotPush) { 11840 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11841 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11842 return; 11843 assert(Action == ActionToDo::PushAsLastprivateConditional && 11844 "Expected a push action."); 11845 LastprivateConditionalData &Data = 11846 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11847 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11848 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11849 continue; 11850 11851 for (const Expr *Ref : C->varlists()) { 11852 Data.DeclToUniqueName.insert(std::make_pair( 11853 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11854 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11855 } 11856 } 11857 Data.IVLVal = IVLVal; 11858 Data.Fn = CGF.CurFn; 11859 } 11860 11861 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11862 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11863 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11864 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11865 if (CGM.getLangOpts().OpenMP < 50) 11866 return; 11867 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11868 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11869 if (!NeedToAddForLPCsAsDisabled.empty()) { 11870 Action = ActionToDo::DisableLastprivateConditional; 11871 LastprivateConditionalData &Data = 11872 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11873 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11874 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11875 Data.Fn = CGF.CurFn; 11876 Data.Disabled = true; 11877 } 11878 } 11879 11880 CGOpenMPRuntime::LastprivateConditionalRAII 11881 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11882 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11883 return LastprivateConditionalRAII(CGF, S); 11884 } 11885 11886 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11887 if (CGM.getLangOpts().OpenMP < 50) 11888 return; 11889 if (Action == ActionToDo::DisableLastprivateConditional) { 11890 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11891 "Expected list of disabled private vars."); 11892 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11893 } 11894 if (Action == ActionToDo::PushAsLastprivateConditional) { 11895 assert( 11896 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11897 "Expected list of lastprivate conditional vars."); 11898 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11899 } 11900 } 11901 11902 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11903 const VarDecl *VD) { 11904 ASTContext &C = CGM.getContext(); 11905 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11906 if (I == LastprivateConditionalToTypes.end()) 11907 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11908 QualType NewType; 11909 const FieldDecl *VDField; 11910 const FieldDecl *FiredField; 11911 LValue BaseLVal; 11912 auto VI = I->getSecond().find(VD); 11913 if (VI == I->getSecond().end()) { 11914 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11915 RD->startDefinition(); 11916 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11917 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11918 RD->completeDefinition(); 11919 NewType = C.getRecordType(RD); 11920 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11921 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11922 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11923 } else { 11924 NewType = std::get<0>(VI->getSecond()); 11925 VDField = std::get<1>(VI->getSecond()); 11926 FiredField = std::get<2>(VI->getSecond()); 11927 BaseLVal = std::get<3>(VI->getSecond()); 11928 } 11929 LValue FiredLVal = 11930 CGF.EmitLValueForField(BaseLVal, FiredField); 11931 CGF.EmitStoreOfScalar( 11932 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11933 FiredLVal); 11934 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11935 } 11936 11937 namespace { 11938 /// Checks if the lastprivate conditional variable is referenced in LHS. 11939 class LastprivateConditionalRefChecker final 11940 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11941 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11942 const Expr *FoundE = nullptr; 11943 const Decl *FoundD = nullptr; 11944 StringRef UniqueDeclName; 11945 LValue IVLVal; 11946 llvm::Function *FoundFn = nullptr; 11947 SourceLocation Loc; 11948 11949 public: 11950 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11951 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11952 llvm::reverse(LPM)) { 11953 auto It = D.DeclToUniqueName.find(E->getDecl()); 11954 if (It == D.DeclToUniqueName.end()) 11955 continue; 11956 if (D.Disabled) 11957 return false; 11958 FoundE = E; 11959 FoundD = E->getDecl()->getCanonicalDecl(); 11960 UniqueDeclName = It->second; 11961 IVLVal = D.IVLVal; 11962 FoundFn = D.Fn; 11963 break; 11964 } 11965 return FoundE == E; 11966 } 11967 bool VisitMemberExpr(const MemberExpr *E) { 11968 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11969 return false; 11970 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11971 llvm::reverse(LPM)) { 11972 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11973 if (It == D.DeclToUniqueName.end()) 11974 continue; 11975 if (D.Disabled) 11976 return false; 11977 FoundE = E; 11978 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11979 UniqueDeclName = It->second; 11980 IVLVal = D.IVLVal; 11981 FoundFn = D.Fn; 11982 break; 11983 } 11984 return FoundE == E; 11985 } 11986 bool VisitStmt(const Stmt *S) { 11987 for (const Stmt *Child : S->children()) { 11988 if (!Child) 11989 continue; 11990 if (const auto *E = dyn_cast<Expr>(Child)) 11991 if (!E->isGLValue()) 11992 continue; 11993 if (Visit(Child)) 11994 return true; 11995 } 11996 return false; 11997 } 11998 explicit LastprivateConditionalRefChecker( 11999 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12000 : LPM(LPM) {} 12001 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12002 getFoundData() const { 12003 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12004 } 12005 }; 12006 } // namespace 12007 12008 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12009 LValue IVLVal, 12010 StringRef UniqueDeclName, 12011 LValue LVal, 12012 SourceLocation Loc) { 12013 // Last updated loop counter for the lastprivate conditional var. 12014 // int<xx> last_iv = 0; 12015 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12016 llvm::Constant *LastIV = 12017 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12018 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12019 IVLVal.getAlignment().getAsAlign()); 12020 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12021 12022 // Last value of the lastprivate conditional. 12023 // decltype(priv_a) last_a; 12024 llvm::Constant *Last = getOrCreateInternalVariable( 12025 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12026 cast<llvm::GlobalVariable>(Last)->setAlignment( 12027 LVal.getAlignment().getAsAlign()); 12028 LValue LastLVal = 12029 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12030 12031 // Global loop counter. Required to handle inner parallel-for regions. 12032 // iv 12033 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12034 12035 // #pragma omp critical(a) 12036 // if (last_iv <= iv) { 12037 // last_iv = iv; 12038 // last_a = priv_a; 12039 // } 12040 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12041 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12042 Action.Enter(CGF); 12043 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12044 // (last_iv <= iv) ? Check if the variable is updated and store new 12045 // value in global var. 12046 llvm::Value *CmpRes; 12047 if (IVLVal.getType()->isSignedIntegerType()) { 12048 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12049 } else { 12050 assert(IVLVal.getType()->isUnsignedIntegerType() && 12051 "Loop iteration variable must be integer."); 12052 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12053 } 12054 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12055 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12056 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12057 // { 12058 CGF.EmitBlock(ThenBB); 12059 12060 // last_iv = iv; 12061 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12062 12063 // last_a = priv_a; 12064 switch (CGF.getEvaluationKind(LVal.getType())) { 12065 case TEK_Scalar: { 12066 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12067 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12068 break; 12069 } 12070 case TEK_Complex: { 12071 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12072 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12073 break; 12074 } 12075 case TEK_Aggregate: 12076 llvm_unreachable( 12077 "Aggregates are not supported in lastprivate conditional."); 12078 } 12079 // } 12080 CGF.EmitBranch(ExitBB); 12081 // There is no need to emit line number for unconditional branch. 12082 (void)ApplyDebugLocation::CreateEmpty(CGF); 12083 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12084 }; 12085 12086 if (CGM.getLangOpts().OpenMPSimd) { 12087 // Do not emit as a critical region as no parallel region could be emitted. 12088 RegionCodeGenTy ThenRCG(CodeGen); 12089 ThenRCG(CGF); 12090 } else { 12091 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12092 } 12093 } 12094 12095 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12096 const Expr *LHS) { 12097 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12098 return; 12099 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12100 if (!Checker.Visit(LHS)) 12101 return; 12102 const Expr *FoundE; 12103 const Decl *FoundD; 12104 StringRef UniqueDeclName; 12105 LValue IVLVal; 12106 llvm::Function *FoundFn; 12107 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12108 Checker.getFoundData(); 12109 if (FoundFn != CGF.CurFn) { 12110 // Special codegen for inner parallel regions. 12111 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12112 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12113 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12114 "Lastprivate conditional is not found in outer region."); 12115 QualType StructTy = std::get<0>(It->getSecond()); 12116 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12117 LValue PrivLVal = CGF.EmitLValue(FoundE); 12118 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12119 PrivLVal.getAddress(CGF), 12120 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12121 LValue BaseLVal = 12122 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12123 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12124 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12125 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12126 FiredLVal, llvm::AtomicOrdering::Unordered, 12127 /*IsVolatile=*/true, /*isInit=*/false); 12128 return; 12129 } 12130 12131 // Private address of the lastprivate conditional in the current context. 12132 // priv_a 12133 LValue LVal = CGF.EmitLValue(FoundE); 12134 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12135 FoundE->getExprLoc()); 12136 } 12137 12138 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12139 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12140 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12141 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12142 return; 12143 auto Range = llvm::reverse(LastprivateConditionalStack); 12144 auto It = llvm::find_if( 12145 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12146 if (It == Range.end() || It->Fn != CGF.CurFn) 12147 return; 12148 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12149 assert(LPCI != LastprivateConditionalToTypes.end() && 12150 "Lastprivates must be registered already."); 12151 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12152 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12153 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12154 for (const auto &Pair : It->DeclToUniqueName) { 12155 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12156 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12157 continue; 12158 auto I = LPCI->getSecond().find(Pair.first); 12159 assert(I != LPCI->getSecond().end() && 12160 "Lastprivate must be rehistered already."); 12161 // bool Cmp = priv_a.Fired != 0; 12162 LValue BaseLVal = std::get<3>(I->getSecond()); 12163 LValue FiredLVal = 12164 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12165 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12166 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12167 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12168 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12169 // if (Cmp) { 12170 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12171 CGF.EmitBlock(ThenBB); 12172 Address Addr = CGF.GetAddrOfLocalVar(VD); 12173 LValue LVal; 12174 if (VD->getType()->isReferenceType()) 12175 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12176 AlignmentSource::Decl); 12177 else 12178 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12179 AlignmentSource::Decl); 12180 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12181 D.getBeginLoc()); 12182 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12183 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12184 // } 12185 } 12186 } 12187 12188 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12189 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12190 SourceLocation Loc) { 12191 if (CGF.getLangOpts().OpenMP < 50) 12192 return; 12193 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12194 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12195 "Unknown lastprivate conditional variable."); 12196 StringRef UniqueName = It->second; 12197 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12198 // The variable was not updated in the region - exit. 12199 if (!GV) 12200 return; 12201 LValue LPLVal = CGF.MakeAddrLValue( 12202 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12203 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12204 CGF.EmitStoreOfScalar(Res, PrivLVal); 12205 } 12206 12207 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12208 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12209 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12210 llvm_unreachable("Not supported in SIMD-only mode"); 12211 } 12212 12213 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12214 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12215 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12216 llvm_unreachable("Not supported in SIMD-only mode"); 12217 } 12218 12219 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12220 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12221 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12222 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12223 bool Tied, unsigned &NumberOfParts) { 12224 llvm_unreachable("Not supported in SIMD-only mode"); 12225 } 12226 12227 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12228 SourceLocation Loc, 12229 llvm::Function *OutlinedFn, 12230 ArrayRef<llvm::Value *> CapturedVars, 12231 const Expr *IfCond) { 12232 llvm_unreachable("Not supported in SIMD-only mode"); 12233 } 12234 12235 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12236 CodeGenFunction &CGF, StringRef CriticalName, 12237 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12238 const Expr *Hint) { 12239 llvm_unreachable("Not supported in SIMD-only mode"); 12240 } 12241 12242 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12243 const RegionCodeGenTy &MasterOpGen, 12244 SourceLocation Loc) { 12245 llvm_unreachable("Not supported in SIMD-only mode"); 12246 } 12247 12248 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12249 SourceLocation Loc) { 12250 llvm_unreachable("Not supported in SIMD-only mode"); 12251 } 12252 12253 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12254 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12255 SourceLocation Loc) { 12256 llvm_unreachable("Not supported in SIMD-only mode"); 12257 } 12258 12259 void CGOpenMPSIMDRuntime::emitSingleRegion( 12260 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12261 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12262 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12263 ArrayRef<const Expr *> AssignmentOps) { 12264 llvm_unreachable("Not supported in SIMD-only mode"); 12265 } 12266 12267 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12268 const RegionCodeGenTy &OrderedOpGen, 12269 SourceLocation Loc, 12270 bool IsThreads) { 12271 llvm_unreachable("Not supported in SIMD-only mode"); 12272 } 12273 12274 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12275 SourceLocation Loc, 12276 OpenMPDirectiveKind Kind, 12277 bool EmitChecks, 12278 bool ForceSimpleCall) { 12279 llvm_unreachable("Not supported in SIMD-only mode"); 12280 } 12281 12282 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12283 CodeGenFunction &CGF, SourceLocation Loc, 12284 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12285 bool Ordered, const DispatchRTInput &DispatchValues) { 12286 llvm_unreachable("Not supported in SIMD-only mode"); 12287 } 12288 12289 void CGOpenMPSIMDRuntime::emitForStaticInit( 12290 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12291 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12292 llvm_unreachable("Not supported in SIMD-only mode"); 12293 } 12294 12295 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12296 CodeGenFunction &CGF, SourceLocation Loc, 12297 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12298 llvm_unreachable("Not supported in SIMD-only mode"); 12299 } 12300 12301 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12302 SourceLocation Loc, 12303 unsigned IVSize, 12304 bool IVSigned) { 12305 llvm_unreachable("Not supported in SIMD-only mode"); 12306 } 12307 12308 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12309 SourceLocation Loc, 12310 OpenMPDirectiveKind DKind) { 12311 llvm_unreachable("Not supported in SIMD-only mode"); 12312 } 12313 12314 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12315 SourceLocation Loc, 12316 unsigned IVSize, bool IVSigned, 12317 Address IL, Address LB, 12318 Address UB, Address ST) { 12319 llvm_unreachable("Not supported in SIMD-only mode"); 12320 } 12321 12322 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12323 llvm::Value *NumThreads, 12324 SourceLocation Loc) { 12325 llvm_unreachable("Not supported in SIMD-only mode"); 12326 } 12327 12328 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12329 ProcBindKind ProcBind, 12330 SourceLocation Loc) { 12331 llvm_unreachable("Not supported in SIMD-only mode"); 12332 } 12333 12334 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12335 const VarDecl *VD, 12336 Address VDAddr, 12337 SourceLocation Loc) { 12338 llvm_unreachable("Not supported in SIMD-only mode"); 12339 } 12340 12341 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12342 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12343 CodeGenFunction *CGF) { 12344 llvm_unreachable("Not supported in SIMD-only mode"); 12345 } 12346 12347 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12348 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12349 llvm_unreachable("Not supported in SIMD-only mode"); 12350 } 12351 12352 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12353 ArrayRef<const Expr *> Vars, 12354 SourceLocation Loc, 12355 llvm::AtomicOrdering AO) { 12356 llvm_unreachable("Not supported in SIMD-only mode"); 12357 } 12358 12359 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12360 const OMPExecutableDirective &D, 12361 llvm::Function *TaskFunction, 12362 QualType SharedsTy, Address Shareds, 12363 const Expr *IfCond, 12364 const OMPTaskDataTy &Data) { 12365 llvm_unreachable("Not supported in SIMD-only mode"); 12366 } 12367 12368 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12369 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12370 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12371 const Expr *IfCond, const OMPTaskDataTy &Data) { 12372 llvm_unreachable("Not supported in SIMD-only mode"); 12373 } 12374 12375 void CGOpenMPSIMDRuntime::emitReduction( 12376 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12377 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12378 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12379 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12380 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12381 ReductionOps, Options); 12382 } 12383 12384 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12385 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12386 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12387 llvm_unreachable("Not supported in SIMD-only mode"); 12388 } 12389 12390 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12391 SourceLocation Loc, 12392 bool IsWorksharingReduction) { 12393 llvm_unreachable("Not supported in SIMD-only mode"); 12394 } 12395 12396 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12397 SourceLocation Loc, 12398 ReductionCodeGen &RCG, 12399 unsigned N) { 12400 llvm_unreachable("Not supported in SIMD-only mode"); 12401 } 12402 12403 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12404 SourceLocation Loc, 12405 llvm::Value *ReductionsPtr, 12406 LValue SharedLVal) { 12407 llvm_unreachable("Not supported in SIMD-only mode"); 12408 } 12409 12410 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12411 SourceLocation Loc) { 12412 llvm_unreachable("Not supported in SIMD-only mode"); 12413 } 12414 12415 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12416 CodeGenFunction &CGF, SourceLocation Loc, 12417 OpenMPDirectiveKind CancelRegion) { 12418 llvm_unreachable("Not supported in SIMD-only mode"); 12419 } 12420 12421 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12422 SourceLocation Loc, const Expr *IfCond, 12423 OpenMPDirectiveKind CancelRegion) { 12424 llvm_unreachable("Not supported in SIMD-only mode"); 12425 } 12426 12427 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12428 const OMPExecutableDirective &D, StringRef ParentName, 12429 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12430 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12431 llvm_unreachable("Not supported in SIMD-only mode"); 12432 } 12433 12434 void CGOpenMPSIMDRuntime::emitTargetCall( 12435 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12436 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12437 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12438 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12439 const OMPLoopDirective &D)> 12440 SizeEmitter) { 12441 llvm_unreachable("Not supported in SIMD-only mode"); 12442 } 12443 12444 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12445 llvm_unreachable("Not supported in SIMD-only mode"); 12446 } 12447 12448 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12449 llvm_unreachable("Not supported in SIMD-only mode"); 12450 } 12451 12452 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12453 return false; 12454 } 12455 12456 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12457 const OMPExecutableDirective &D, 12458 SourceLocation Loc, 12459 llvm::Function *OutlinedFn, 12460 ArrayRef<llvm::Value *> CapturedVars) { 12461 llvm_unreachable("Not supported in SIMD-only mode"); 12462 } 12463 12464 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12465 const Expr *NumTeams, 12466 const Expr *ThreadLimit, 12467 SourceLocation Loc) { 12468 llvm_unreachable("Not supported in SIMD-only mode"); 12469 } 12470 12471 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12472 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12473 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12474 llvm_unreachable("Not supported in SIMD-only mode"); 12475 } 12476 12477 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12478 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12479 const Expr *Device) { 12480 llvm_unreachable("Not supported in SIMD-only mode"); 12481 } 12482 12483 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12484 const OMPLoopDirective &D, 12485 ArrayRef<Expr *> NumIterations) { 12486 llvm_unreachable("Not supported in SIMD-only mode"); 12487 } 12488 12489 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12490 const OMPDependClause *C) { 12491 llvm_unreachable("Not supported in SIMD-only mode"); 12492 } 12493 12494 const VarDecl * 12495 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12496 const VarDecl *NativeParam) const { 12497 llvm_unreachable("Not supported in SIMD-only mode"); 12498 } 12499 12500 Address 12501 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12502 const VarDecl *NativeParam, 12503 const VarDecl *TargetParam) const { 12504 llvm_unreachable("Not supported in SIMD-only mode"); 12505 } 12506