1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 } 1530 1531 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1532 return OMPBuilder.IdentPtr; 1533 } 1534 1535 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1536 if (!Kmpc_MicroTy) { 1537 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1538 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1539 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1540 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1541 } 1542 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1543 } 1544 1545 llvm::FunctionCallee 1546 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1547 assert((IVSize == 32 || IVSize == 64) && 1548 "IV size is not compatible with the omp runtime"); 1549 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1550 : "__kmpc_for_static_init_4u") 1551 : (IVSigned ? "__kmpc_for_static_init_8" 1552 : "__kmpc_for_static_init_8u"); 1553 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1554 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1555 llvm::Type *TypeParams[] = { 1556 getIdentTyPointerTy(), // loc 1557 CGM.Int32Ty, // tid 1558 CGM.Int32Ty, // schedtype 1559 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1560 PtrTy, // p_lower 1561 PtrTy, // p_upper 1562 PtrTy, // p_stride 1563 ITy, // incr 1564 ITy // chunk 1565 }; 1566 auto *FnTy = 1567 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1568 return CGM.CreateRuntimeFunction(FnTy, Name); 1569 } 1570 1571 llvm::FunctionCallee 1572 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1573 assert((IVSize == 32 || IVSize == 64) && 1574 "IV size is not compatible with the omp runtime"); 1575 StringRef Name = 1576 IVSize == 32 1577 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1578 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1579 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1580 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1581 CGM.Int32Ty, // tid 1582 CGM.Int32Ty, // schedtype 1583 ITy, // lower 1584 ITy, // upper 1585 ITy, // stride 1586 ITy // chunk 1587 }; 1588 auto *FnTy = 1589 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1590 return CGM.CreateRuntimeFunction(FnTy, Name); 1591 } 1592 1593 llvm::FunctionCallee 1594 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1595 assert((IVSize == 32 || IVSize == 64) && 1596 "IV size is not compatible with the omp runtime"); 1597 StringRef Name = 1598 IVSize == 32 1599 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1600 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1601 llvm::Type *TypeParams[] = { 1602 getIdentTyPointerTy(), // loc 1603 CGM.Int32Ty, // tid 1604 }; 1605 auto *FnTy = 1606 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1607 return CGM.CreateRuntimeFunction(FnTy, Name); 1608 } 1609 1610 llvm::FunctionCallee 1611 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1612 assert((IVSize == 32 || IVSize == 64) && 1613 "IV size is not compatible with the omp runtime"); 1614 StringRef Name = 1615 IVSize == 32 1616 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1617 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1618 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1619 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1620 llvm::Type *TypeParams[] = { 1621 getIdentTyPointerTy(), // loc 1622 CGM.Int32Ty, // tid 1623 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1624 PtrTy, // p_lower 1625 PtrTy, // p_upper 1626 PtrTy // p_stride 1627 }; 1628 auto *FnTy = 1629 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1630 return CGM.CreateRuntimeFunction(FnTy, Name); 1631 } 1632 1633 /// Obtain information that uniquely identifies a target entry. This 1634 /// consists of the file and device IDs as well as line number associated with 1635 /// the relevant entry source location. 1636 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1637 unsigned &DeviceID, unsigned &FileID, 1638 unsigned &LineNum) { 1639 SourceManager &SM = C.getSourceManager(); 1640 1641 // The loc should be always valid and have a file ID (the user cannot use 1642 // #pragma directives in macros) 1643 1644 assert(Loc.isValid() && "Source location is expected to be always valid."); 1645 1646 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1647 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1648 1649 llvm::sys::fs::UniqueID ID; 1650 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1651 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1652 << PLoc.getFilename() << EC.message(); 1653 1654 DeviceID = ID.getDevice(); 1655 FileID = ID.getFile(); 1656 LineNum = PLoc.getLine(); 1657 } 1658 1659 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1660 if (CGM.getLangOpts().OpenMPSimd) 1661 return Address::invalid(); 1662 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1663 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1664 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1665 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1666 HasRequiresUnifiedSharedMemory))) { 1667 SmallString<64> PtrName; 1668 { 1669 llvm::raw_svector_ostream OS(PtrName); 1670 OS << CGM.getMangledName(GlobalDecl(VD)); 1671 if (!VD->isExternallyVisible()) { 1672 unsigned DeviceID, FileID, Line; 1673 getTargetEntryUniqueInfo(CGM.getContext(), 1674 VD->getCanonicalDecl()->getBeginLoc(), 1675 DeviceID, FileID, Line); 1676 OS << llvm::format("_%x", FileID); 1677 } 1678 OS << "_decl_tgt_ref_ptr"; 1679 } 1680 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1681 if (!Ptr) { 1682 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1683 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1684 PtrName); 1685 1686 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1687 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1688 1689 if (!CGM.getLangOpts().OpenMPIsDevice) 1690 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1691 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1692 } 1693 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1694 } 1695 return Address::invalid(); 1696 } 1697 1698 llvm::Constant * 1699 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1700 assert(!CGM.getLangOpts().OpenMPUseTLS || 1701 !CGM.getContext().getTargetInfo().isTLSSupported()); 1702 // Lookup the entry, lazily creating it if necessary. 1703 std::string Suffix = getName({"cache", ""}); 1704 return getOrCreateInternalVariable( 1705 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1706 } 1707 1708 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1709 const VarDecl *VD, 1710 Address VDAddr, 1711 SourceLocation Loc) { 1712 if (CGM.getLangOpts().OpenMPUseTLS && 1713 CGM.getContext().getTargetInfo().isTLSSupported()) 1714 return VDAddr; 1715 1716 llvm::Type *VarTy = VDAddr.getElementType(); 1717 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1718 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1719 CGM.Int8PtrTy), 1720 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1721 getOrCreateThreadPrivateCache(VD)}; 1722 return Address(CGF.EmitRuntimeCall( 1723 OMPBuilder.getOrCreateRuntimeFunction( 1724 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1725 Args), 1726 VDAddr.getAlignment()); 1727 } 1728 1729 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1730 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1731 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1732 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1733 // library. 1734 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1736 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1737 OMPLoc); 1738 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1739 // to register constructor/destructor for variable. 1740 llvm::Value *Args[] = { 1741 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1742 Ctor, CopyCtor, Dtor}; 1743 CGF.EmitRuntimeCall( 1744 OMPBuilder.getOrCreateRuntimeFunction( 1745 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1746 Args); 1747 } 1748 1749 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1750 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1751 bool PerformInit, CodeGenFunction *CGF) { 1752 if (CGM.getLangOpts().OpenMPUseTLS && 1753 CGM.getContext().getTargetInfo().isTLSSupported()) 1754 return nullptr; 1755 1756 VD = VD->getDefinition(CGM.getContext()); 1757 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1758 QualType ASTTy = VD->getType(); 1759 1760 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1761 const Expr *Init = VD->getAnyInitializer(); 1762 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1763 // Generate function that re-emits the declaration's initializer into the 1764 // threadprivate copy of the variable VD 1765 CodeGenFunction CtorCGF(CGM); 1766 FunctionArgList Args; 1767 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1768 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1769 ImplicitParamDecl::Other); 1770 Args.push_back(&Dst); 1771 1772 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1773 CGM.getContext().VoidPtrTy, Args); 1774 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1775 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1776 llvm::Function *Fn = 1777 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1778 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1779 Args, Loc, Loc); 1780 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1781 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1782 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1783 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1784 Arg = CtorCGF.Builder.CreateElementBitCast( 1785 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1786 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1787 /*IsInitializer=*/true); 1788 ArgVal = CtorCGF.EmitLoadOfScalar( 1789 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1790 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1791 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1792 CtorCGF.FinishFunction(); 1793 Ctor = Fn; 1794 } 1795 if (VD->getType().isDestructedType() != QualType::DK_none) { 1796 // Generate function that emits destructor call for the threadprivate copy 1797 // of the variable VD 1798 CodeGenFunction DtorCGF(CGM); 1799 FunctionArgList Args; 1800 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1801 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1802 ImplicitParamDecl::Other); 1803 Args.push_back(&Dst); 1804 1805 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1806 CGM.getContext().VoidTy, Args); 1807 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1808 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1809 llvm::Function *Fn = 1810 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1811 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1812 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1813 Loc, Loc); 1814 // Create a scope with an artificial location for the body of this function. 1815 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1816 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1817 DtorCGF.GetAddrOfLocalVar(&Dst), 1818 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1819 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1820 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1821 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1822 DtorCGF.FinishFunction(); 1823 Dtor = Fn; 1824 } 1825 // Do not emit init function if it is not required. 1826 if (!Ctor && !Dtor) 1827 return nullptr; 1828 1829 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1830 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1831 /*isVarArg=*/false) 1832 ->getPointerTo(); 1833 // Copying constructor for the threadprivate variable. 1834 // Must be NULL - reserved by runtime, but currently it requires that this 1835 // parameter is always NULL. Otherwise it fires assertion. 1836 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1837 if (Ctor == nullptr) { 1838 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1839 /*isVarArg=*/false) 1840 ->getPointerTo(); 1841 Ctor = llvm::Constant::getNullValue(CtorTy); 1842 } 1843 if (Dtor == nullptr) { 1844 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1845 /*isVarArg=*/false) 1846 ->getPointerTo(); 1847 Dtor = llvm::Constant::getNullValue(DtorTy); 1848 } 1849 if (!CGF) { 1850 auto *InitFunctionTy = 1851 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1852 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1853 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1854 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1855 CodeGenFunction InitCGF(CGM); 1856 FunctionArgList ArgList; 1857 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1858 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1859 Loc, Loc); 1860 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1861 InitCGF.FinishFunction(); 1862 return InitFunction; 1863 } 1864 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1865 } 1866 return nullptr; 1867 } 1868 1869 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1870 llvm::GlobalVariable *Addr, 1871 bool PerformInit) { 1872 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1873 !CGM.getLangOpts().OpenMPIsDevice) 1874 return false; 1875 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1876 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1877 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1878 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1879 HasRequiresUnifiedSharedMemory)) 1880 return CGM.getLangOpts().OpenMPIsDevice; 1881 VD = VD->getDefinition(CGM.getContext()); 1882 assert(VD && "Unknown VarDecl"); 1883 1884 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1885 return CGM.getLangOpts().OpenMPIsDevice; 1886 1887 QualType ASTTy = VD->getType(); 1888 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1889 1890 // Produce the unique prefix to identify the new target regions. We use 1891 // the source location of the variable declaration which we know to not 1892 // conflict with any target region. 1893 unsigned DeviceID; 1894 unsigned FileID; 1895 unsigned Line; 1896 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1897 SmallString<128> Buffer, Out; 1898 { 1899 llvm::raw_svector_ostream OS(Buffer); 1900 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1901 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1902 } 1903 1904 const Expr *Init = VD->getAnyInitializer(); 1905 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1906 llvm::Constant *Ctor; 1907 llvm::Constant *ID; 1908 if (CGM.getLangOpts().OpenMPIsDevice) { 1909 // Generate function that re-emits the declaration's initializer into 1910 // the threadprivate copy of the variable VD 1911 CodeGenFunction CtorCGF(CGM); 1912 1913 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1914 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1915 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1916 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1917 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1918 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1919 FunctionArgList(), Loc, Loc); 1920 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1921 CtorCGF.EmitAnyExprToMem(Init, 1922 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1923 Init->getType().getQualifiers(), 1924 /*IsInitializer=*/true); 1925 CtorCGF.FinishFunction(); 1926 Ctor = Fn; 1927 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1928 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1929 } else { 1930 Ctor = new llvm::GlobalVariable( 1931 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1932 llvm::GlobalValue::PrivateLinkage, 1933 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1934 ID = Ctor; 1935 } 1936 1937 // Register the information for the entry associated with the constructor. 1938 Out.clear(); 1939 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1940 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1941 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1942 } 1943 if (VD->getType().isDestructedType() != QualType::DK_none) { 1944 llvm::Constant *Dtor; 1945 llvm::Constant *ID; 1946 if (CGM.getLangOpts().OpenMPIsDevice) { 1947 // Generate function that emits destructor call for the threadprivate 1948 // copy of the variable VD 1949 CodeGenFunction DtorCGF(CGM); 1950 1951 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1952 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1953 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1954 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1955 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1956 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1957 FunctionArgList(), Loc, Loc); 1958 // Create a scope with an artificial location for the body of this 1959 // function. 1960 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1961 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1962 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1963 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1964 DtorCGF.FinishFunction(); 1965 Dtor = Fn; 1966 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1967 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1968 } else { 1969 Dtor = new llvm::GlobalVariable( 1970 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1971 llvm::GlobalValue::PrivateLinkage, 1972 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1973 ID = Dtor; 1974 } 1975 // Register the information for the entry associated with the destructor. 1976 Out.clear(); 1977 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1978 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1979 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1980 } 1981 return CGM.getLangOpts().OpenMPIsDevice; 1982 } 1983 1984 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1985 QualType VarType, 1986 StringRef Name) { 1987 std::string Suffix = getName({"artificial", ""}); 1988 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1989 llvm::Value *GAddr = 1990 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1991 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1992 CGM.getTarget().isTLSSupported()) { 1993 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1994 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1995 } 1996 std::string CacheSuffix = getName({"cache", ""}); 1997 llvm::Value *Args[] = { 1998 emitUpdateLocation(CGF, SourceLocation()), 1999 getThreadID(CGF, SourceLocation()), 2000 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2001 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2002 /*isSigned=*/false), 2003 getOrCreateInternalVariable( 2004 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2005 return Address( 2006 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2007 CGF.EmitRuntimeCall( 2008 OMPBuilder.getOrCreateRuntimeFunction( 2009 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2010 Args), 2011 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2012 CGM.getContext().getTypeAlignInChars(VarType)); 2013 } 2014 2015 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2016 const RegionCodeGenTy &ThenGen, 2017 const RegionCodeGenTy &ElseGen) { 2018 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2019 2020 // If the condition constant folds and can be elided, try to avoid emitting 2021 // the condition and the dead arm of the if/else. 2022 bool CondConstant; 2023 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2024 if (CondConstant) 2025 ThenGen(CGF); 2026 else 2027 ElseGen(CGF); 2028 return; 2029 } 2030 2031 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2032 // emit the conditional branch. 2033 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2034 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2035 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2036 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2037 2038 // Emit the 'then' code. 2039 CGF.EmitBlock(ThenBlock); 2040 ThenGen(CGF); 2041 CGF.EmitBranch(ContBlock); 2042 // Emit the 'else' code if present. 2043 // There is no need to emit line number for unconditional branch. 2044 (void)ApplyDebugLocation::CreateEmpty(CGF); 2045 CGF.EmitBlock(ElseBlock); 2046 ElseGen(CGF); 2047 // There is no need to emit line number for unconditional branch. 2048 (void)ApplyDebugLocation::CreateEmpty(CGF); 2049 CGF.EmitBranch(ContBlock); 2050 // Emit the continuation block for code after the if. 2051 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2052 } 2053 2054 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2055 llvm::Function *OutlinedFn, 2056 ArrayRef<llvm::Value *> CapturedVars, 2057 const Expr *IfCond) { 2058 if (!CGF.HaveInsertPoint()) 2059 return; 2060 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2061 auto &M = CGM.getModule(); 2062 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2063 this](CodeGenFunction &CGF, PrePostActionTy &) { 2064 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2065 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2066 llvm::Value *Args[] = { 2067 RTLoc, 2068 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2069 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2070 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2071 RealArgs.append(std::begin(Args), std::end(Args)); 2072 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2073 2074 llvm::FunctionCallee RTLFn = 2075 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2076 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2077 }; 2078 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2079 this](CodeGenFunction &CGF, PrePostActionTy &) { 2080 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2081 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2082 // Build calls: 2083 // __kmpc_serialized_parallel(&Loc, GTid); 2084 llvm::Value *Args[] = {RTLoc, ThreadID}; 2085 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2086 M, OMPRTL___kmpc_serialized_parallel), 2087 Args); 2088 2089 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2090 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2091 Address ZeroAddrBound = 2092 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2093 /*Name=*/".bound.zero.addr"); 2094 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2095 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2096 // ThreadId for serialized parallels is 0. 2097 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2098 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2099 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2100 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2101 2102 // __kmpc_end_serialized_parallel(&Loc, GTid); 2103 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2104 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2105 M, OMPRTL___kmpc_end_serialized_parallel), 2106 EndArgs); 2107 }; 2108 if (IfCond) { 2109 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2110 } else { 2111 RegionCodeGenTy ThenRCG(ThenGen); 2112 ThenRCG(CGF); 2113 } 2114 } 2115 2116 // If we're inside an (outlined) parallel region, use the region info's 2117 // thread-ID variable (it is passed in a first argument of the outlined function 2118 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2119 // regular serial code region, get thread ID by calling kmp_int32 2120 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2121 // return the address of that temp. 2122 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2123 SourceLocation Loc) { 2124 if (auto *OMPRegionInfo = 2125 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2126 if (OMPRegionInfo->getThreadIDVariable()) 2127 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2128 2129 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2130 QualType Int32Ty = 2131 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2132 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2133 CGF.EmitStoreOfScalar(ThreadID, 2134 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2135 2136 return ThreadIDTemp; 2137 } 2138 2139 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2140 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2141 SmallString<256> Buffer; 2142 llvm::raw_svector_ostream Out(Buffer); 2143 Out << Name; 2144 StringRef RuntimeName = Out.str(); 2145 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2146 if (Elem.second) { 2147 assert(Elem.second->getType()->getPointerElementType() == Ty && 2148 "OMP internal variable has different type than requested"); 2149 return &*Elem.second; 2150 } 2151 2152 return Elem.second = new llvm::GlobalVariable( 2153 CGM.getModule(), Ty, /*IsConstant*/ false, 2154 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2155 Elem.first(), /*InsertBefore=*/nullptr, 2156 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2157 } 2158 2159 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2160 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2161 std::string Name = getName({Prefix, "var"}); 2162 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2163 } 2164 2165 namespace { 2166 /// Common pre(post)-action for different OpenMP constructs. 2167 class CommonActionTy final : public PrePostActionTy { 2168 llvm::FunctionCallee EnterCallee; 2169 ArrayRef<llvm::Value *> EnterArgs; 2170 llvm::FunctionCallee ExitCallee; 2171 ArrayRef<llvm::Value *> ExitArgs; 2172 bool Conditional; 2173 llvm::BasicBlock *ContBlock = nullptr; 2174 2175 public: 2176 CommonActionTy(llvm::FunctionCallee EnterCallee, 2177 ArrayRef<llvm::Value *> EnterArgs, 2178 llvm::FunctionCallee ExitCallee, 2179 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2180 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2181 ExitArgs(ExitArgs), Conditional(Conditional) {} 2182 void Enter(CodeGenFunction &CGF) override { 2183 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2184 if (Conditional) { 2185 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2186 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2187 ContBlock = CGF.createBasicBlock("omp_if.end"); 2188 // Generate the branch (If-stmt) 2189 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2190 CGF.EmitBlock(ThenBlock); 2191 } 2192 } 2193 void Done(CodeGenFunction &CGF) { 2194 // Emit the rest of blocks/branches 2195 CGF.EmitBranch(ContBlock); 2196 CGF.EmitBlock(ContBlock, true); 2197 } 2198 void Exit(CodeGenFunction &CGF) override { 2199 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2200 } 2201 }; 2202 } // anonymous namespace 2203 2204 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2205 StringRef CriticalName, 2206 const RegionCodeGenTy &CriticalOpGen, 2207 SourceLocation Loc, const Expr *Hint) { 2208 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2209 // CriticalOpGen(); 2210 // __kmpc_end_critical(ident_t *, gtid, Lock); 2211 // Prepare arguments and build a call to __kmpc_critical 2212 if (!CGF.HaveInsertPoint()) 2213 return; 2214 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2215 getCriticalRegionLock(CriticalName)}; 2216 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2217 std::end(Args)); 2218 if (Hint) { 2219 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2220 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2221 } 2222 CommonActionTy Action( 2223 OMPBuilder.getOrCreateRuntimeFunction( 2224 CGM.getModule(), 2225 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2226 EnterArgs, 2227 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2228 OMPRTL___kmpc_end_critical), 2229 Args); 2230 CriticalOpGen.setAction(Action); 2231 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2232 } 2233 2234 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2235 const RegionCodeGenTy &MasterOpGen, 2236 SourceLocation Loc) { 2237 if (!CGF.HaveInsertPoint()) 2238 return; 2239 // if(__kmpc_master(ident_t *, gtid)) { 2240 // MasterOpGen(); 2241 // __kmpc_end_master(ident_t *, gtid); 2242 // } 2243 // Prepare arguments and build a call to __kmpc_master 2244 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2245 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2246 CGM.getModule(), OMPRTL___kmpc_master), 2247 Args, 2248 OMPBuilder.getOrCreateRuntimeFunction( 2249 CGM.getModule(), OMPRTL___kmpc_end_master), 2250 Args, 2251 /*Conditional=*/true); 2252 MasterOpGen.setAction(Action); 2253 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2254 Action.Done(CGF); 2255 } 2256 2257 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2258 SourceLocation Loc) { 2259 if (!CGF.HaveInsertPoint()) 2260 return; 2261 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2262 OMPBuilder.CreateTaskyield(CGF.Builder); 2263 } else { 2264 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2265 llvm::Value *Args[] = { 2266 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2267 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2268 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2269 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2270 Args); 2271 } 2272 2273 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2274 Region->emitUntiedSwitch(CGF); 2275 } 2276 2277 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2278 const RegionCodeGenTy &TaskgroupOpGen, 2279 SourceLocation Loc) { 2280 if (!CGF.HaveInsertPoint()) 2281 return; 2282 // __kmpc_taskgroup(ident_t *, gtid); 2283 // TaskgroupOpGen(); 2284 // __kmpc_end_taskgroup(ident_t *, gtid); 2285 // Prepare arguments and build a call to __kmpc_taskgroup 2286 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2287 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2289 Args, 2290 OMPBuilder.getOrCreateRuntimeFunction( 2291 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2292 Args); 2293 TaskgroupOpGen.setAction(Action); 2294 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2295 } 2296 2297 /// Given an array of pointers to variables, project the address of a 2298 /// given variable. 2299 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2300 unsigned Index, const VarDecl *Var) { 2301 // Pull out the pointer to the variable. 2302 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2303 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2304 2305 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2306 Addr = CGF.Builder.CreateElementBitCast( 2307 Addr, CGF.ConvertTypeForMem(Var->getType())); 2308 return Addr; 2309 } 2310 2311 static llvm::Value *emitCopyprivateCopyFunction( 2312 CodeGenModule &CGM, llvm::Type *ArgsType, 2313 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2314 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2315 SourceLocation Loc) { 2316 ASTContext &C = CGM.getContext(); 2317 // void copy_func(void *LHSArg, void *RHSArg); 2318 FunctionArgList Args; 2319 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2320 ImplicitParamDecl::Other); 2321 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2322 ImplicitParamDecl::Other); 2323 Args.push_back(&LHSArg); 2324 Args.push_back(&RHSArg); 2325 const auto &CGFI = 2326 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2327 std::string Name = 2328 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2329 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2330 llvm::GlobalValue::InternalLinkage, Name, 2331 &CGM.getModule()); 2332 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2333 Fn->setDoesNotRecurse(); 2334 CodeGenFunction CGF(CGM); 2335 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2336 // Dest = (void*[n])(LHSArg); 2337 // Src = (void*[n])(RHSArg); 2338 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2339 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2340 ArgsType), CGF.getPointerAlign()); 2341 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2342 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2343 ArgsType), CGF.getPointerAlign()); 2344 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2345 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2346 // ... 2347 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2348 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2349 const auto *DestVar = 2350 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2351 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2352 2353 const auto *SrcVar = 2354 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2355 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2356 2357 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2358 QualType Type = VD->getType(); 2359 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2360 } 2361 CGF.FinishFunction(); 2362 return Fn; 2363 } 2364 2365 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2366 const RegionCodeGenTy &SingleOpGen, 2367 SourceLocation Loc, 2368 ArrayRef<const Expr *> CopyprivateVars, 2369 ArrayRef<const Expr *> SrcExprs, 2370 ArrayRef<const Expr *> DstExprs, 2371 ArrayRef<const Expr *> AssignmentOps) { 2372 if (!CGF.HaveInsertPoint()) 2373 return; 2374 assert(CopyprivateVars.size() == SrcExprs.size() && 2375 CopyprivateVars.size() == DstExprs.size() && 2376 CopyprivateVars.size() == AssignmentOps.size()); 2377 ASTContext &C = CGM.getContext(); 2378 // int32 did_it = 0; 2379 // if(__kmpc_single(ident_t *, gtid)) { 2380 // SingleOpGen(); 2381 // __kmpc_end_single(ident_t *, gtid); 2382 // did_it = 1; 2383 // } 2384 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2385 // <copy_func>, did_it); 2386 2387 Address DidIt = Address::invalid(); 2388 if (!CopyprivateVars.empty()) { 2389 // int32 did_it = 0; 2390 QualType KmpInt32Ty = 2391 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2392 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2393 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2394 } 2395 // Prepare arguments and build a call to __kmpc_single 2396 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2397 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2398 CGM.getModule(), OMPRTL___kmpc_single), 2399 Args, 2400 OMPBuilder.getOrCreateRuntimeFunction( 2401 CGM.getModule(), OMPRTL___kmpc_end_single), 2402 Args, 2403 /*Conditional=*/true); 2404 SingleOpGen.setAction(Action); 2405 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2406 if (DidIt.isValid()) { 2407 // did_it = 1; 2408 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2409 } 2410 Action.Done(CGF); 2411 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2412 // <copy_func>, did_it); 2413 if (DidIt.isValid()) { 2414 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2415 QualType CopyprivateArrayTy = C.getConstantArrayType( 2416 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2417 /*IndexTypeQuals=*/0); 2418 // Create a list of all private variables for copyprivate. 2419 Address CopyprivateList = 2420 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2421 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2422 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2423 CGF.Builder.CreateStore( 2424 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2425 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2426 CGF.VoidPtrTy), 2427 Elem); 2428 } 2429 // Build function that copies private values from single region to all other 2430 // threads in the corresponding parallel region. 2431 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2432 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2433 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2434 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2435 Address CL = 2436 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2437 CGF.VoidPtrTy); 2438 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2439 llvm::Value *Args[] = { 2440 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2441 getThreadID(CGF, Loc), // i32 <gtid> 2442 BufSize, // size_t <buf_size> 2443 CL.getPointer(), // void *<copyprivate list> 2444 CpyFn, // void (*) (void *, void *) <copy_func> 2445 DidItVal // i32 did_it 2446 }; 2447 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2448 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2449 Args); 2450 } 2451 } 2452 2453 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2454 const RegionCodeGenTy &OrderedOpGen, 2455 SourceLocation Loc, bool IsThreads) { 2456 if (!CGF.HaveInsertPoint()) 2457 return; 2458 // __kmpc_ordered(ident_t *, gtid); 2459 // OrderedOpGen(); 2460 // __kmpc_end_ordered(ident_t *, gtid); 2461 // Prepare arguments and build a call to __kmpc_ordered 2462 if (IsThreads) { 2463 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2464 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2465 CGM.getModule(), OMPRTL___kmpc_ordered), 2466 Args, 2467 OMPBuilder.getOrCreateRuntimeFunction( 2468 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2469 Args); 2470 OrderedOpGen.setAction(Action); 2471 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2472 return; 2473 } 2474 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2475 } 2476 2477 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2478 unsigned Flags; 2479 if (Kind == OMPD_for) 2480 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2481 else if (Kind == OMPD_sections) 2482 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2483 else if (Kind == OMPD_single) 2484 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2485 else if (Kind == OMPD_barrier) 2486 Flags = OMP_IDENT_BARRIER_EXPL; 2487 else 2488 Flags = OMP_IDENT_BARRIER_IMPL; 2489 return Flags; 2490 } 2491 2492 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2493 CodeGenFunction &CGF, const OMPLoopDirective &S, 2494 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2495 // Check if the loop directive is actually a doacross loop directive. In this 2496 // case choose static, 1 schedule. 2497 if (llvm::any_of( 2498 S.getClausesOfKind<OMPOrderedClause>(), 2499 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2500 ScheduleKind = OMPC_SCHEDULE_static; 2501 // Chunk size is 1 in this case. 2502 llvm::APInt ChunkSize(32, 1); 2503 ChunkExpr = IntegerLiteral::Create( 2504 CGF.getContext(), ChunkSize, 2505 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2506 SourceLocation()); 2507 } 2508 } 2509 2510 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2511 OpenMPDirectiveKind Kind, bool EmitChecks, 2512 bool ForceSimpleCall) { 2513 // Check if we should use the OMPBuilder 2514 auto *OMPRegionInfo = 2515 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2516 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2517 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( 2518 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2519 return; 2520 } 2521 2522 if (!CGF.HaveInsertPoint()) 2523 return; 2524 // Build call __kmpc_cancel_barrier(loc, thread_id); 2525 // Build call __kmpc_barrier(loc, thread_id); 2526 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2527 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2528 // thread_id); 2529 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2530 getThreadID(CGF, Loc)}; 2531 if (OMPRegionInfo) { 2532 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2533 llvm::Value *Result = CGF.EmitRuntimeCall( 2534 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2535 OMPRTL___kmpc_cancel_barrier), 2536 Args); 2537 if (EmitChecks) { 2538 // if (__kmpc_cancel_barrier()) { 2539 // exit from construct; 2540 // } 2541 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2542 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2543 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2544 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2545 CGF.EmitBlock(ExitBB); 2546 // exit from construct; 2547 CodeGenFunction::JumpDest CancelDestination = 2548 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2549 CGF.EmitBranchThroughCleanup(CancelDestination); 2550 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2551 } 2552 return; 2553 } 2554 } 2555 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2556 CGM.getModule(), OMPRTL___kmpc_barrier), 2557 Args); 2558 } 2559 2560 /// Map the OpenMP loop schedule to the runtime enumeration. 2561 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2562 bool Chunked, bool Ordered) { 2563 switch (ScheduleKind) { 2564 case OMPC_SCHEDULE_static: 2565 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2566 : (Ordered ? OMP_ord_static : OMP_sch_static); 2567 case OMPC_SCHEDULE_dynamic: 2568 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2569 case OMPC_SCHEDULE_guided: 2570 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2571 case OMPC_SCHEDULE_runtime: 2572 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2573 case OMPC_SCHEDULE_auto: 2574 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2575 case OMPC_SCHEDULE_unknown: 2576 assert(!Chunked && "chunk was specified but schedule kind not known"); 2577 return Ordered ? OMP_ord_static : OMP_sch_static; 2578 } 2579 llvm_unreachable("Unexpected runtime schedule"); 2580 } 2581 2582 /// Map the OpenMP distribute schedule to the runtime enumeration. 2583 static OpenMPSchedType 2584 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2585 // only static is allowed for dist_schedule 2586 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2587 } 2588 2589 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2590 bool Chunked) const { 2591 OpenMPSchedType Schedule = 2592 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2593 return Schedule == OMP_sch_static; 2594 } 2595 2596 bool CGOpenMPRuntime::isStaticNonchunked( 2597 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2598 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2599 return Schedule == OMP_dist_sch_static; 2600 } 2601 2602 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2603 bool Chunked) const { 2604 OpenMPSchedType Schedule = 2605 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2606 return Schedule == OMP_sch_static_chunked; 2607 } 2608 2609 bool CGOpenMPRuntime::isStaticChunked( 2610 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2611 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2612 return Schedule == OMP_dist_sch_static_chunked; 2613 } 2614 2615 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2616 OpenMPSchedType Schedule = 2617 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2618 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2619 return Schedule != OMP_sch_static; 2620 } 2621 2622 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2623 OpenMPScheduleClauseModifier M1, 2624 OpenMPScheduleClauseModifier M2) { 2625 int Modifier = 0; 2626 switch (M1) { 2627 case OMPC_SCHEDULE_MODIFIER_monotonic: 2628 Modifier = OMP_sch_modifier_monotonic; 2629 break; 2630 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2631 Modifier = OMP_sch_modifier_nonmonotonic; 2632 break; 2633 case OMPC_SCHEDULE_MODIFIER_simd: 2634 if (Schedule == OMP_sch_static_chunked) 2635 Schedule = OMP_sch_static_balanced_chunked; 2636 break; 2637 case OMPC_SCHEDULE_MODIFIER_last: 2638 case OMPC_SCHEDULE_MODIFIER_unknown: 2639 break; 2640 } 2641 switch (M2) { 2642 case OMPC_SCHEDULE_MODIFIER_monotonic: 2643 Modifier = OMP_sch_modifier_monotonic; 2644 break; 2645 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2646 Modifier = OMP_sch_modifier_nonmonotonic; 2647 break; 2648 case OMPC_SCHEDULE_MODIFIER_simd: 2649 if (Schedule == OMP_sch_static_chunked) 2650 Schedule = OMP_sch_static_balanced_chunked; 2651 break; 2652 case OMPC_SCHEDULE_MODIFIER_last: 2653 case OMPC_SCHEDULE_MODIFIER_unknown: 2654 break; 2655 } 2656 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2657 // If the static schedule kind is specified or if the ordered clause is 2658 // specified, and if the nonmonotonic modifier is not specified, the effect is 2659 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2660 // modifier is specified, the effect is as if the nonmonotonic modifier is 2661 // specified. 2662 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2663 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2664 Schedule == OMP_sch_static_balanced_chunked || 2665 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2666 Schedule == OMP_dist_sch_static_chunked || 2667 Schedule == OMP_dist_sch_static)) 2668 Modifier = OMP_sch_modifier_nonmonotonic; 2669 } 2670 return Schedule | Modifier; 2671 } 2672 2673 void CGOpenMPRuntime::emitForDispatchInit( 2674 CodeGenFunction &CGF, SourceLocation Loc, 2675 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2676 bool Ordered, const DispatchRTInput &DispatchValues) { 2677 if (!CGF.HaveInsertPoint()) 2678 return; 2679 OpenMPSchedType Schedule = getRuntimeSchedule( 2680 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2681 assert(Ordered || 2682 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2683 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2684 Schedule != OMP_sch_static_balanced_chunked)); 2685 // Call __kmpc_dispatch_init( 2686 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2687 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2688 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2689 2690 // If the Chunk was not specified in the clause - use default value 1. 2691 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2692 : CGF.Builder.getIntN(IVSize, 1); 2693 llvm::Value *Args[] = { 2694 emitUpdateLocation(CGF, Loc), 2695 getThreadID(CGF, Loc), 2696 CGF.Builder.getInt32(addMonoNonMonoModifier( 2697 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2698 DispatchValues.LB, // Lower 2699 DispatchValues.UB, // Upper 2700 CGF.Builder.getIntN(IVSize, 1), // Stride 2701 Chunk // Chunk 2702 }; 2703 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2704 } 2705 2706 static void emitForStaticInitCall( 2707 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2708 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2709 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2710 const CGOpenMPRuntime::StaticRTInput &Values) { 2711 if (!CGF.HaveInsertPoint()) 2712 return; 2713 2714 assert(!Values.Ordered); 2715 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2716 Schedule == OMP_sch_static_balanced_chunked || 2717 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2718 Schedule == OMP_dist_sch_static || 2719 Schedule == OMP_dist_sch_static_chunked); 2720 2721 // Call __kmpc_for_static_init( 2722 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2723 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2724 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2725 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2726 llvm::Value *Chunk = Values.Chunk; 2727 if (Chunk == nullptr) { 2728 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2729 Schedule == OMP_dist_sch_static) && 2730 "expected static non-chunked schedule"); 2731 // If the Chunk was not specified in the clause - use default value 1. 2732 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2733 } else { 2734 assert((Schedule == OMP_sch_static_chunked || 2735 Schedule == OMP_sch_static_balanced_chunked || 2736 Schedule == OMP_ord_static_chunked || 2737 Schedule == OMP_dist_sch_static_chunked) && 2738 "expected static chunked schedule"); 2739 } 2740 llvm::Value *Args[] = { 2741 UpdateLocation, 2742 ThreadId, 2743 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2744 M2)), // Schedule type 2745 Values.IL.getPointer(), // &isLastIter 2746 Values.LB.getPointer(), // &LB 2747 Values.UB.getPointer(), // &UB 2748 Values.ST.getPointer(), // &Stride 2749 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2750 Chunk // Chunk 2751 }; 2752 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2753 } 2754 2755 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2756 SourceLocation Loc, 2757 OpenMPDirectiveKind DKind, 2758 const OpenMPScheduleTy &ScheduleKind, 2759 const StaticRTInput &Values) { 2760 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2761 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2762 assert(isOpenMPWorksharingDirective(DKind) && 2763 "Expected loop-based or sections-based directive."); 2764 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2765 isOpenMPLoopDirective(DKind) 2766 ? OMP_IDENT_WORK_LOOP 2767 : OMP_IDENT_WORK_SECTIONS); 2768 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2769 llvm::FunctionCallee StaticInitFunction = 2770 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2771 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2772 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2773 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2774 } 2775 2776 void CGOpenMPRuntime::emitDistributeStaticInit( 2777 CodeGenFunction &CGF, SourceLocation Loc, 2778 OpenMPDistScheduleClauseKind SchedKind, 2779 const CGOpenMPRuntime::StaticRTInput &Values) { 2780 OpenMPSchedType ScheduleNum = 2781 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2782 llvm::Value *UpdatedLocation = 2783 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2784 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2785 llvm::FunctionCallee StaticInitFunction = 2786 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2787 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2788 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2789 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2790 } 2791 2792 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2793 SourceLocation Loc, 2794 OpenMPDirectiveKind DKind) { 2795 if (!CGF.HaveInsertPoint()) 2796 return; 2797 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2798 llvm::Value *Args[] = { 2799 emitUpdateLocation(CGF, Loc, 2800 isOpenMPDistributeDirective(DKind) 2801 ? OMP_IDENT_WORK_DISTRIBUTE 2802 : isOpenMPLoopDirective(DKind) 2803 ? OMP_IDENT_WORK_LOOP 2804 : OMP_IDENT_WORK_SECTIONS), 2805 getThreadID(CGF, Loc)}; 2806 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2807 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2808 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2809 Args); 2810 } 2811 2812 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2813 SourceLocation Loc, 2814 unsigned IVSize, 2815 bool IVSigned) { 2816 if (!CGF.HaveInsertPoint()) 2817 return; 2818 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2819 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2820 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2821 } 2822 2823 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2824 SourceLocation Loc, unsigned IVSize, 2825 bool IVSigned, Address IL, 2826 Address LB, Address UB, 2827 Address ST) { 2828 // Call __kmpc_dispatch_next( 2829 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2830 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2831 // kmp_int[32|64] *p_stride); 2832 llvm::Value *Args[] = { 2833 emitUpdateLocation(CGF, Loc), 2834 getThreadID(CGF, Loc), 2835 IL.getPointer(), // &isLastIter 2836 LB.getPointer(), // &Lower 2837 UB.getPointer(), // &Upper 2838 ST.getPointer() // &Stride 2839 }; 2840 llvm::Value *Call = 2841 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2842 return CGF.EmitScalarConversion( 2843 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2844 CGF.getContext().BoolTy, Loc); 2845 } 2846 2847 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2848 llvm::Value *NumThreads, 2849 SourceLocation Loc) { 2850 if (!CGF.HaveInsertPoint()) 2851 return; 2852 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2853 llvm::Value *Args[] = { 2854 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2855 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2856 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2857 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2858 Args); 2859 } 2860 2861 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2862 ProcBindKind ProcBind, 2863 SourceLocation Loc) { 2864 if (!CGF.HaveInsertPoint()) 2865 return; 2866 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2867 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2868 llvm::Value *Args[] = { 2869 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2870 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2871 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2872 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2873 Args); 2874 } 2875 2876 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2877 SourceLocation Loc, llvm::AtomicOrdering AO) { 2878 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2879 OMPBuilder.CreateFlush(CGF.Builder); 2880 } else { 2881 if (!CGF.HaveInsertPoint()) 2882 return; 2883 // Build call void __kmpc_flush(ident_t *loc) 2884 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_flush), 2886 emitUpdateLocation(CGF, Loc)); 2887 } 2888 } 2889 2890 namespace { 2891 /// Indexes of fields for type kmp_task_t. 2892 enum KmpTaskTFields { 2893 /// List of shared variables. 2894 KmpTaskTShareds, 2895 /// Task routine. 2896 KmpTaskTRoutine, 2897 /// Partition id for the untied tasks. 2898 KmpTaskTPartId, 2899 /// Function with call of destructors for private variables. 2900 Data1, 2901 /// Task priority. 2902 Data2, 2903 /// (Taskloops only) Lower bound. 2904 KmpTaskTLowerBound, 2905 /// (Taskloops only) Upper bound. 2906 KmpTaskTUpperBound, 2907 /// (Taskloops only) Stride. 2908 KmpTaskTStride, 2909 /// (Taskloops only) Is last iteration flag. 2910 KmpTaskTLastIter, 2911 /// (Taskloops only) Reduction data. 2912 KmpTaskTReductions, 2913 }; 2914 } // anonymous namespace 2915 2916 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2917 return OffloadEntriesTargetRegion.empty() && 2918 OffloadEntriesDeviceGlobalVar.empty(); 2919 } 2920 2921 /// Initialize target region entry. 2922 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2923 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2924 StringRef ParentName, unsigned LineNum, 2925 unsigned Order) { 2926 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2927 "only required for the device " 2928 "code generation."); 2929 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2930 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2931 OMPTargetRegionEntryTargetRegion); 2932 ++OffloadingEntriesNum; 2933 } 2934 2935 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2936 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2937 StringRef ParentName, unsigned LineNum, 2938 llvm::Constant *Addr, llvm::Constant *ID, 2939 OMPTargetRegionEntryKind Flags) { 2940 // If we are emitting code for a target, the entry is already initialized, 2941 // only has to be registered. 2942 if (CGM.getLangOpts().OpenMPIsDevice) { 2943 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 2944 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2945 DiagnosticsEngine::Error, 2946 "Unable to find target region on line '%0' in the device code."); 2947 CGM.getDiags().Report(DiagID) << LineNum; 2948 return; 2949 } 2950 auto &Entry = 2951 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2952 assert(Entry.isValid() && "Entry not initialized!"); 2953 Entry.setAddress(Addr); 2954 Entry.setID(ID); 2955 Entry.setFlags(Flags); 2956 } else { 2957 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2958 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2959 ++OffloadingEntriesNum; 2960 } 2961 } 2962 2963 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2964 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2965 unsigned LineNum) const { 2966 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2967 if (PerDevice == OffloadEntriesTargetRegion.end()) 2968 return false; 2969 auto PerFile = PerDevice->second.find(FileID); 2970 if (PerFile == PerDevice->second.end()) 2971 return false; 2972 auto PerParentName = PerFile->second.find(ParentName); 2973 if (PerParentName == PerFile->second.end()) 2974 return false; 2975 auto PerLine = PerParentName->second.find(LineNum); 2976 if (PerLine == PerParentName->second.end()) 2977 return false; 2978 // Fail if this entry is already registered. 2979 if (PerLine->second.getAddress() || PerLine->second.getID()) 2980 return false; 2981 return true; 2982 } 2983 2984 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2985 const OffloadTargetRegionEntryInfoActTy &Action) { 2986 // Scan all target region entries and perform the provided action. 2987 for (const auto &D : OffloadEntriesTargetRegion) 2988 for (const auto &F : D.second) 2989 for (const auto &P : F.second) 2990 for (const auto &L : P.second) 2991 Action(D.first, F.first, P.first(), L.first, L.second); 2992 } 2993 2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2995 initializeDeviceGlobalVarEntryInfo(StringRef Name, 2996 OMPTargetGlobalVarEntryKind Flags, 2997 unsigned Order) { 2998 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2999 "only required for the device " 3000 "code generation."); 3001 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3002 ++OffloadingEntriesNum; 3003 } 3004 3005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3006 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3007 CharUnits VarSize, 3008 OMPTargetGlobalVarEntryKind Flags, 3009 llvm::GlobalValue::LinkageTypes Linkage) { 3010 if (CGM.getLangOpts().OpenMPIsDevice) { 3011 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3012 assert(Entry.isValid() && Entry.getFlags() == Flags && 3013 "Entry not initialized!"); 3014 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3015 "Resetting with the new address."); 3016 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3017 if (Entry.getVarSize().isZero()) { 3018 Entry.setVarSize(VarSize); 3019 Entry.setLinkage(Linkage); 3020 } 3021 return; 3022 } 3023 Entry.setVarSize(VarSize); 3024 Entry.setLinkage(Linkage); 3025 Entry.setAddress(Addr); 3026 } else { 3027 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3028 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3029 assert(Entry.isValid() && Entry.getFlags() == Flags && 3030 "Entry not initialized!"); 3031 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3032 "Resetting with the new address."); 3033 if (Entry.getVarSize().isZero()) { 3034 Entry.setVarSize(VarSize); 3035 Entry.setLinkage(Linkage); 3036 } 3037 return; 3038 } 3039 OffloadEntriesDeviceGlobalVar.try_emplace( 3040 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3041 ++OffloadingEntriesNum; 3042 } 3043 } 3044 3045 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3046 actOnDeviceGlobalVarEntriesInfo( 3047 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3048 // Scan all target region entries and perform the provided action. 3049 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3050 Action(E.getKey(), E.getValue()); 3051 } 3052 3053 void CGOpenMPRuntime::createOffloadEntry( 3054 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3055 llvm::GlobalValue::LinkageTypes Linkage) { 3056 StringRef Name = Addr->getName(); 3057 llvm::Module &M = CGM.getModule(); 3058 llvm::LLVMContext &C = M.getContext(); 3059 3060 // Create constant string with the name. 3061 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3062 3063 std::string StringName = getName({"omp_offloading", "entry_name"}); 3064 auto *Str = new llvm::GlobalVariable( 3065 M, StrPtrInit->getType(), /*isConstant=*/true, 3066 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3067 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3068 3069 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3070 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3071 llvm::ConstantInt::get(CGM.SizeTy, Size), 3072 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3073 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3074 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3075 llvm::GlobalVariable *Entry = createGlobalStruct( 3076 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3077 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3078 3079 // The entry has to be created in the section the linker expects it to be. 3080 Entry->setSection("omp_offloading_entries"); 3081 } 3082 3083 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3084 // Emit the offloading entries and metadata so that the device codegen side 3085 // can easily figure out what to emit. The produced metadata looks like 3086 // this: 3087 // 3088 // !omp_offload.info = !{!1, ...} 3089 // 3090 // Right now we only generate metadata for function that contain target 3091 // regions. 3092 3093 // If we are in simd mode or there are no entries, we don't need to do 3094 // anything. 3095 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3096 return; 3097 3098 llvm::Module &M = CGM.getModule(); 3099 llvm::LLVMContext &C = M.getContext(); 3100 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3101 SourceLocation, StringRef>, 3102 16> 3103 OrderedEntries(OffloadEntriesInfoManager.size()); 3104 llvm::SmallVector<StringRef, 16> ParentFunctions( 3105 OffloadEntriesInfoManager.size()); 3106 3107 // Auxiliary methods to create metadata values and strings. 3108 auto &&GetMDInt = [this](unsigned V) { 3109 return llvm::ConstantAsMetadata::get( 3110 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3111 }; 3112 3113 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3114 3115 // Create the offloading info metadata node. 3116 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3117 3118 // Create function that emits metadata for each target region entry; 3119 auto &&TargetRegionMetadataEmitter = 3120 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3121 &GetMDString]( 3122 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3123 unsigned Line, 3124 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3125 // Generate metadata for target regions. Each entry of this metadata 3126 // contains: 3127 // - Entry 0 -> Kind of this type of metadata (0). 3128 // - Entry 1 -> Device ID of the file where the entry was identified. 3129 // - Entry 2 -> File ID of the file where the entry was identified. 3130 // - Entry 3 -> Mangled name of the function where the entry was 3131 // identified. 3132 // - Entry 4 -> Line in the file where the entry was identified. 3133 // - Entry 5 -> Order the entry was created. 3134 // The first element of the metadata node is the kind. 3135 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3136 GetMDInt(FileID), GetMDString(ParentName), 3137 GetMDInt(Line), GetMDInt(E.getOrder())}; 3138 3139 SourceLocation Loc; 3140 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3141 E = CGM.getContext().getSourceManager().fileinfo_end(); 3142 I != E; ++I) { 3143 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3144 I->getFirst()->getUniqueID().getFile() == FileID) { 3145 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3146 I->getFirst(), Line, 1); 3147 break; 3148 } 3149 } 3150 // Save this entry in the right position of the ordered entries array. 3151 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3152 ParentFunctions[E.getOrder()] = ParentName; 3153 3154 // Add metadata to the named metadata node. 3155 MD->addOperand(llvm::MDNode::get(C, Ops)); 3156 }; 3157 3158 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3159 TargetRegionMetadataEmitter); 3160 3161 // Create function that emits metadata for each device global variable entry; 3162 auto &&DeviceGlobalVarMetadataEmitter = 3163 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3164 MD](StringRef MangledName, 3165 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3166 &E) { 3167 // Generate metadata for global variables. Each entry of this metadata 3168 // contains: 3169 // - Entry 0 -> Kind of this type of metadata (1). 3170 // - Entry 1 -> Mangled name of the variable. 3171 // - Entry 2 -> Declare target kind. 3172 // - Entry 3 -> Order the entry was created. 3173 // The first element of the metadata node is the kind. 3174 llvm::Metadata *Ops[] = { 3175 GetMDInt(E.getKind()), GetMDString(MangledName), 3176 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3177 3178 // Save this entry in the right position of the ordered entries array. 3179 OrderedEntries[E.getOrder()] = 3180 std::make_tuple(&E, SourceLocation(), MangledName); 3181 3182 // Add metadata to the named metadata node. 3183 MD->addOperand(llvm::MDNode::get(C, Ops)); 3184 }; 3185 3186 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3187 DeviceGlobalVarMetadataEmitter); 3188 3189 for (const auto &E : OrderedEntries) { 3190 assert(std::get<0>(E) && "All ordered entries must exist!"); 3191 if (const auto *CE = 3192 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3193 std::get<0>(E))) { 3194 if (!CE->getID() || !CE->getAddress()) { 3195 // Do not blame the entry if the parent funtion is not emitted. 3196 StringRef FnName = ParentFunctions[CE->getOrder()]; 3197 if (!CGM.GetGlobalValue(FnName)) 3198 continue; 3199 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3200 DiagnosticsEngine::Error, 3201 "Offloading entry for target region in %0 is incorrect: either the " 3202 "address or the ID is invalid."); 3203 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3204 continue; 3205 } 3206 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3207 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3208 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3209 OffloadEntryInfoDeviceGlobalVar>( 3210 std::get<0>(E))) { 3211 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3212 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3213 CE->getFlags()); 3214 switch (Flags) { 3215 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3216 if (CGM.getLangOpts().OpenMPIsDevice && 3217 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3218 continue; 3219 if (!CE->getAddress()) { 3220 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3221 DiagnosticsEngine::Error, "Offloading entry for declare target " 3222 "variable %0 is incorrect: the " 3223 "address is invalid."); 3224 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3225 continue; 3226 } 3227 // The vaiable has no definition - no need to add the entry. 3228 if (CE->getVarSize().isZero()) 3229 continue; 3230 break; 3231 } 3232 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3233 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3234 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3235 "Declaret target link address is set."); 3236 if (CGM.getLangOpts().OpenMPIsDevice) 3237 continue; 3238 if (!CE->getAddress()) { 3239 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3240 DiagnosticsEngine::Error, 3241 "Offloading entry for declare target variable is incorrect: the " 3242 "address is invalid."); 3243 CGM.getDiags().Report(DiagID); 3244 continue; 3245 } 3246 break; 3247 } 3248 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3249 CE->getVarSize().getQuantity(), Flags, 3250 CE->getLinkage()); 3251 } else { 3252 llvm_unreachable("Unsupported entry kind."); 3253 } 3254 } 3255 } 3256 3257 /// Loads all the offload entries information from the host IR 3258 /// metadata. 3259 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3260 // If we are in target mode, load the metadata from the host IR. This code has 3261 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3262 3263 if (!CGM.getLangOpts().OpenMPIsDevice) 3264 return; 3265 3266 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3267 return; 3268 3269 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3270 if (auto EC = Buf.getError()) { 3271 CGM.getDiags().Report(diag::err_cannot_open_file) 3272 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3273 return; 3274 } 3275 3276 llvm::LLVMContext C; 3277 auto ME = expectedToErrorOrAndEmitErrors( 3278 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3279 3280 if (auto EC = ME.getError()) { 3281 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3282 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3283 CGM.getDiags().Report(DiagID) 3284 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3285 return; 3286 } 3287 3288 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3289 if (!MD) 3290 return; 3291 3292 for (llvm::MDNode *MN : MD->operands()) { 3293 auto &&GetMDInt = [MN](unsigned Idx) { 3294 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3295 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3296 }; 3297 3298 auto &&GetMDString = [MN](unsigned Idx) { 3299 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3300 return V->getString(); 3301 }; 3302 3303 switch (GetMDInt(0)) { 3304 default: 3305 llvm_unreachable("Unexpected metadata!"); 3306 break; 3307 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3308 OffloadingEntryInfoTargetRegion: 3309 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3310 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3311 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3312 /*Order=*/GetMDInt(5)); 3313 break; 3314 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3315 OffloadingEntryInfoDeviceGlobalVar: 3316 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3317 /*MangledName=*/GetMDString(1), 3318 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3319 /*Flags=*/GetMDInt(2)), 3320 /*Order=*/GetMDInt(3)); 3321 break; 3322 } 3323 } 3324 } 3325 3326 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3327 if (!KmpRoutineEntryPtrTy) { 3328 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3329 ASTContext &C = CGM.getContext(); 3330 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3331 FunctionProtoType::ExtProtoInfo EPI; 3332 KmpRoutineEntryPtrQTy = C.getPointerType( 3333 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3334 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3335 } 3336 } 3337 3338 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3339 // Make sure the type of the entry is already created. This is the type we 3340 // have to create: 3341 // struct __tgt_offload_entry{ 3342 // void *addr; // Pointer to the offload entry info. 3343 // // (function or global) 3344 // char *name; // Name of the function or global. 3345 // size_t size; // Size of the entry info (0 if it a function). 3346 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3347 // int32_t reserved; // Reserved, to use by the runtime library. 3348 // }; 3349 if (TgtOffloadEntryQTy.isNull()) { 3350 ASTContext &C = CGM.getContext(); 3351 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3352 RD->startDefinition(); 3353 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3354 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3355 addFieldToRecordDecl(C, RD, C.getSizeType()); 3356 addFieldToRecordDecl( 3357 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3358 addFieldToRecordDecl( 3359 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3360 RD->completeDefinition(); 3361 RD->addAttr(PackedAttr::CreateImplicit(C)); 3362 TgtOffloadEntryQTy = C.getRecordType(RD); 3363 } 3364 return TgtOffloadEntryQTy; 3365 } 3366 3367 namespace { 3368 struct PrivateHelpersTy { 3369 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3370 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3371 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3372 PrivateElemInit(PrivateElemInit) {} 3373 const Expr *OriginalRef = nullptr; 3374 const VarDecl *Original = nullptr; 3375 const VarDecl *PrivateCopy = nullptr; 3376 const VarDecl *PrivateElemInit = nullptr; 3377 }; 3378 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3379 } // anonymous namespace 3380 3381 static RecordDecl * 3382 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3383 if (!Privates.empty()) { 3384 ASTContext &C = CGM.getContext(); 3385 // Build struct .kmp_privates_t. { 3386 // /* private vars */ 3387 // }; 3388 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3389 RD->startDefinition(); 3390 for (const auto &Pair : Privates) { 3391 const VarDecl *VD = Pair.second.Original; 3392 QualType Type = VD->getType().getNonReferenceType(); 3393 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3394 if (VD->hasAttrs()) { 3395 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3396 E(VD->getAttrs().end()); 3397 I != E; ++I) 3398 FD->addAttr(*I); 3399 } 3400 } 3401 RD->completeDefinition(); 3402 return RD; 3403 } 3404 return nullptr; 3405 } 3406 3407 static RecordDecl * 3408 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3409 QualType KmpInt32Ty, 3410 QualType KmpRoutineEntryPointerQTy) { 3411 ASTContext &C = CGM.getContext(); 3412 // Build struct kmp_task_t { 3413 // void * shareds; 3414 // kmp_routine_entry_t routine; 3415 // kmp_int32 part_id; 3416 // kmp_cmplrdata_t data1; 3417 // kmp_cmplrdata_t data2; 3418 // For taskloops additional fields: 3419 // kmp_uint64 lb; 3420 // kmp_uint64 ub; 3421 // kmp_int64 st; 3422 // kmp_int32 liter; 3423 // void * reductions; 3424 // }; 3425 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3426 UD->startDefinition(); 3427 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3428 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3429 UD->completeDefinition(); 3430 QualType KmpCmplrdataTy = C.getRecordType(UD); 3431 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3432 RD->startDefinition(); 3433 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3434 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3435 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3436 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3437 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3438 if (isOpenMPTaskLoopDirective(Kind)) { 3439 QualType KmpUInt64Ty = 3440 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3441 QualType KmpInt64Ty = 3442 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3443 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3444 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3445 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3446 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3447 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3448 } 3449 RD->completeDefinition(); 3450 return RD; 3451 } 3452 3453 static RecordDecl * 3454 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3455 ArrayRef<PrivateDataTy> Privates) { 3456 ASTContext &C = CGM.getContext(); 3457 // Build struct kmp_task_t_with_privates { 3458 // kmp_task_t task_data; 3459 // .kmp_privates_t. privates; 3460 // }; 3461 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3462 RD->startDefinition(); 3463 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3464 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3465 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3466 RD->completeDefinition(); 3467 return RD; 3468 } 3469 3470 /// Emit a proxy function which accepts kmp_task_t as the second 3471 /// argument. 3472 /// \code 3473 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3474 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3475 /// For taskloops: 3476 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3477 /// tt->reductions, tt->shareds); 3478 /// return 0; 3479 /// } 3480 /// \endcode 3481 static llvm::Function * 3482 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3483 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3484 QualType KmpTaskTWithPrivatesPtrQTy, 3485 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3486 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3487 llvm::Value *TaskPrivatesMap) { 3488 ASTContext &C = CGM.getContext(); 3489 FunctionArgList Args; 3490 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3491 ImplicitParamDecl::Other); 3492 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3493 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3494 ImplicitParamDecl::Other); 3495 Args.push_back(&GtidArg); 3496 Args.push_back(&TaskTypeArg); 3497 const auto &TaskEntryFnInfo = 3498 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3499 llvm::FunctionType *TaskEntryTy = 3500 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3501 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3502 auto *TaskEntry = llvm::Function::Create( 3503 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3504 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3505 TaskEntry->setDoesNotRecurse(); 3506 CodeGenFunction CGF(CGM); 3507 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3508 Loc, Loc); 3509 3510 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3511 // tt, 3512 // For taskloops: 3513 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3514 // tt->task_data.shareds); 3515 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3516 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3517 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3518 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3519 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3520 const auto *KmpTaskTWithPrivatesQTyRD = 3521 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3522 LValue Base = 3523 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3524 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3525 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3526 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3527 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3528 3529 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3530 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3531 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3532 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3533 CGF.ConvertTypeForMem(SharedsPtrTy)); 3534 3535 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3536 llvm::Value *PrivatesParam; 3537 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3538 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3539 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3540 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3541 } else { 3542 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3543 } 3544 3545 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3546 TaskPrivatesMap, 3547 CGF.Builder 3548 .CreatePointerBitCastOrAddrSpaceCast( 3549 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3550 .getPointer()}; 3551 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3552 std::end(CommonArgs)); 3553 if (isOpenMPTaskLoopDirective(Kind)) { 3554 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3555 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3556 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3557 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3558 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3559 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3560 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3561 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3562 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3563 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3564 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3565 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3566 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3567 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3568 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3569 CallArgs.push_back(LBParam); 3570 CallArgs.push_back(UBParam); 3571 CallArgs.push_back(StParam); 3572 CallArgs.push_back(LIParam); 3573 CallArgs.push_back(RParam); 3574 } 3575 CallArgs.push_back(SharedsParam); 3576 3577 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3578 CallArgs); 3579 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3580 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3581 CGF.FinishFunction(); 3582 return TaskEntry; 3583 } 3584 3585 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3586 SourceLocation Loc, 3587 QualType KmpInt32Ty, 3588 QualType KmpTaskTWithPrivatesPtrQTy, 3589 QualType KmpTaskTWithPrivatesQTy) { 3590 ASTContext &C = CGM.getContext(); 3591 FunctionArgList Args; 3592 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3593 ImplicitParamDecl::Other); 3594 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3595 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3596 ImplicitParamDecl::Other); 3597 Args.push_back(&GtidArg); 3598 Args.push_back(&TaskTypeArg); 3599 const auto &DestructorFnInfo = 3600 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3601 llvm::FunctionType *DestructorFnTy = 3602 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3603 std::string Name = 3604 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3605 auto *DestructorFn = 3606 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3607 Name, &CGM.getModule()); 3608 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3609 DestructorFnInfo); 3610 DestructorFn->setDoesNotRecurse(); 3611 CodeGenFunction CGF(CGM); 3612 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3613 Args, Loc, Loc); 3614 3615 LValue Base = CGF.EmitLoadOfPointerLValue( 3616 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3617 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3618 const auto *KmpTaskTWithPrivatesQTyRD = 3619 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3620 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3621 Base = CGF.EmitLValueForField(Base, *FI); 3622 for (const auto *Field : 3623 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3624 if (QualType::DestructionKind DtorKind = 3625 Field->getType().isDestructedType()) { 3626 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3627 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3628 } 3629 } 3630 CGF.FinishFunction(); 3631 return DestructorFn; 3632 } 3633 3634 /// Emit a privates mapping function for correct handling of private and 3635 /// firstprivate variables. 3636 /// \code 3637 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3638 /// **noalias priv1,..., <tyn> **noalias privn) { 3639 /// *priv1 = &.privates.priv1; 3640 /// ...; 3641 /// *privn = &.privates.privn; 3642 /// } 3643 /// \endcode 3644 static llvm::Value * 3645 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3646 ArrayRef<const Expr *> PrivateVars, 3647 ArrayRef<const Expr *> FirstprivateVars, 3648 ArrayRef<const Expr *> LastprivateVars, 3649 QualType PrivatesQTy, 3650 ArrayRef<PrivateDataTy> Privates) { 3651 ASTContext &C = CGM.getContext(); 3652 FunctionArgList Args; 3653 ImplicitParamDecl TaskPrivatesArg( 3654 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3655 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3656 ImplicitParamDecl::Other); 3657 Args.push_back(&TaskPrivatesArg); 3658 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3659 unsigned Counter = 1; 3660 for (const Expr *E : PrivateVars) { 3661 Args.push_back(ImplicitParamDecl::Create( 3662 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3663 C.getPointerType(C.getPointerType(E->getType())) 3664 .withConst() 3665 .withRestrict(), 3666 ImplicitParamDecl::Other)); 3667 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3668 PrivateVarsPos[VD] = Counter; 3669 ++Counter; 3670 } 3671 for (const Expr *E : FirstprivateVars) { 3672 Args.push_back(ImplicitParamDecl::Create( 3673 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3674 C.getPointerType(C.getPointerType(E->getType())) 3675 .withConst() 3676 .withRestrict(), 3677 ImplicitParamDecl::Other)); 3678 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3679 PrivateVarsPos[VD] = Counter; 3680 ++Counter; 3681 } 3682 for (const Expr *E : LastprivateVars) { 3683 Args.push_back(ImplicitParamDecl::Create( 3684 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3685 C.getPointerType(C.getPointerType(E->getType())) 3686 .withConst() 3687 .withRestrict(), 3688 ImplicitParamDecl::Other)); 3689 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3690 PrivateVarsPos[VD] = Counter; 3691 ++Counter; 3692 } 3693 const auto &TaskPrivatesMapFnInfo = 3694 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3695 llvm::FunctionType *TaskPrivatesMapTy = 3696 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3697 std::string Name = 3698 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3699 auto *TaskPrivatesMap = llvm::Function::Create( 3700 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3701 &CGM.getModule()); 3702 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3703 TaskPrivatesMapFnInfo); 3704 if (CGM.getLangOpts().Optimize) { 3705 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3706 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3707 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3708 } 3709 CodeGenFunction CGF(CGM); 3710 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3711 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3712 3713 // *privi = &.privates.privi; 3714 LValue Base = CGF.EmitLoadOfPointerLValue( 3715 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3716 TaskPrivatesArg.getType()->castAs<PointerType>()); 3717 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3718 Counter = 0; 3719 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3720 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3721 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3722 LValue RefLVal = 3723 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3724 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3725 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3726 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3727 ++Counter; 3728 } 3729 CGF.FinishFunction(); 3730 return TaskPrivatesMap; 3731 } 3732 3733 /// Emit initialization for private variables in task-based directives. 3734 static void emitPrivatesInit(CodeGenFunction &CGF, 3735 const OMPExecutableDirective &D, 3736 Address KmpTaskSharedsPtr, LValue TDBase, 3737 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3738 QualType SharedsTy, QualType SharedsPtrTy, 3739 const OMPTaskDataTy &Data, 3740 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3741 ASTContext &C = CGF.getContext(); 3742 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3743 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3744 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3745 ? OMPD_taskloop 3746 : OMPD_task; 3747 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3748 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3749 LValue SrcBase; 3750 bool IsTargetTask = 3751 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3752 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3753 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3754 // PointersArray and SizesArray. The original variables for these arrays are 3755 // not captured and we get their addresses explicitly. 3756 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3757 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3758 SrcBase = CGF.MakeAddrLValue( 3759 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3760 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3761 SharedsTy); 3762 } 3763 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3764 for (const PrivateDataTy &Pair : Privates) { 3765 const VarDecl *VD = Pair.second.PrivateCopy; 3766 const Expr *Init = VD->getAnyInitializer(); 3767 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3768 !CGF.isTrivialInitializer(Init)))) { 3769 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3770 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3771 const VarDecl *OriginalVD = Pair.second.Original; 3772 // Check if the variable is the target-based BasePointersArray, 3773 // PointersArray or SizesArray. 3774 LValue SharedRefLValue; 3775 QualType Type = PrivateLValue.getType(); 3776 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3777 if (IsTargetTask && !SharedField) { 3778 assert(isa<ImplicitParamDecl>(OriginalVD) && 3779 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3780 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3781 ->getNumParams() == 0 && 3782 isa<TranslationUnitDecl>( 3783 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3784 ->getDeclContext()) && 3785 "Expected artificial target data variable."); 3786 SharedRefLValue = 3787 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3788 } else if (ForDup) { 3789 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3790 SharedRefLValue = CGF.MakeAddrLValue( 3791 Address(SharedRefLValue.getPointer(CGF), 3792 C.getDeclAlign(OriginalVD)), 3793 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3794 SharedRefLValue.getTBAAInfo()); 3795 } else if (CGF.LambdaCaptureFields.count( 3796 Pair.second.Original->getCanonicalDecl()) > 0 || 3797 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3798 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3799 } else { 3800 // Processing for implicitly captured variables. 3801 InlinedOpenMPRegionRAII Region( 3802 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3803 /*HasCancel=*/false); 3804 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3805 } 3806 if (Type->isArrayType()) { 3807 // Initialize firstprivate array. 3808 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3809 // Perform simple memcpy. 3810 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3811 } else { 3812 // Initialize firstprivate array using element-by-element 3813 // initialization. 3814 CGF.EmitOMPAggregateAssign( 3815 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3816 Type, 3817 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3818 Address SrcElement) { 3819 // Clean up any temporaries needed by the initialization. 3820 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3821 InitScope.addPrivate( 3822 Elem, [SrcElement]() -> Address { return SrcElement; }); 3823 (void)InitScope.Privatize(); 3824 // Emit initialization for single element. 3825 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3826 CGF, &CapturesInfo); 3827 CGF.EmitAnyExprToMem(Init, DestElement, 3828 Init->getType().getQualifiers(), 3829 /*IsInitializer=*/false); 3830 }); 3831 } 3832 } else { 3833 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3834 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3835 return SharedRefLValue.getAddress(CGF); 3836 }); 3837 (void)InitScope.Privatize(); 3838 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3839 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3840 /*capturedByInit=*/false); 3841 } 3842 } else { 3843 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3844 } 3845 } 3846 ++FI; 3847 } 3848 } 3849 3850 /// Check if duplication function is required for taskloops. 3851 static bool checkInitIsRequired(CodeGenFunction &CGF, 3852 ArrayRef<PrivateDataTy> Privates) { 3853 bool InitRequired = false; 3854 for (const PrivateDataTy &Pair : Privates) { 3855 const VarDecl *VD = Pair.second.PrivateCopy; 3856 const Expr *Init = VD->getAnyInitializer(); 3857 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3858 !CGF.isTrivialInitializer(Init)); 3859 if (InitRequired) 3860 break; 3861 } 3862 return InitRequired; 3863 } 3864 3865 3866 /// Emit task_dup function (for initialization of 3867 /// private/firstprivate/lastprivate vars and last_iter flag) 3868 /// \code 3869 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3870 /// lastpriv) { 3871 /// // setup lastprivate flag 3872 /// task_dst->last = lastpriv; 3873 /// // could be constructor calls here... 3874 /// } 3875 /// \endcode 3876 static llvm::Value * 3877 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3878 const OMPExecutableDirective &D, 3879 QualType KmpTaskTWithPrivatesPtrQTy, 3880 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3881 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3882 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3883 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3884 ASTContext &C = CGM.getContext(); 3885 FunctionArgList Args; 3886 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3887 KmpTaskTWithPrivatesPtrQTy, 3888 ImplicitParamDecl::Other); 3889 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3890 KmpTaskTWithPrivatesPtrQTy, 3891 ImplicitParamDecl::Other); 3892 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3893 ImplicitParamDecl::Other); 3894 Args.push_back(&DstArg); 3895 Args.push_back(&SrcArg); 3896 Args.push_back(&LastprivArg); 3897 const auto &TaskDupFnInfo = 3898 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3899 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3900 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3901 auto *TaskDup = llvm::Function::Create( 3902 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3903 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3904 TaskDup->setDoesNotRecurse(); 3905 CodeGenFunction CGF(CGM); 3906 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3907 Loc); 3908 3909 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3910 CGF.GetAddrOfLocalVar(&DstArg), 3911 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3912 // task_dst->liter = lastpriv; 3913 if (WithLastIter) { 3914 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3915 LValue Base = CGF.EmitLValueForField( 3916 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3917 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3918 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3919 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3920 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3921 } 3922 3923 // Emit initial values for private copies (if any). 3924 assert(!Privates.empty()); 3925 Address KmpTaskSharedsPtr = Address::invalid(); 3926 if (!Data.FirstprivateVars.empty()) { 3927 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3928 CGF.GetAddrOfLocalVar(&SrcArg), 3929 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3930 LValue Base = CGF.EmitLValueForField( 3931 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3932 KmpTaskSharedsPtr = Address( 3933 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3934 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3935 KmpTaskTShareds)), 3936 Loc), 3937 CGM.getNaturalTypeAlignment(SharedsTy)); 3938 } 3939 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3940 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3941 CGF.FinishFunction(); 3942 return TaskDup; 3943 } 3944 3945 /// Checks if destructor function is required to be generated. 3946 /// \return true if cleanups are required, false otherwise. 3947 static bool 3948 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 3949 bool NeedsCleanup = false; 3950 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3951 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 3952 for (const FieldDecl *FD : PrivateRD->fields()) { 3953 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 3954 if (NeedsCleanup) 3955 break; 3956 } 3957 return NeedsCleanup; 3958 } 3959 3960 namespace { 3961 /// Loop generator for OpenMP iterator expression. 3962 class OMPIteratorGeneratorScope final 3963 : public CodeGenFunction::OMPPrivateScope { 3964 CodeGenFunction &CGF; 3965 const OMPIteratorExpr *E = nullptr; 3966 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3967 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3968 OMPIteratorGeneratorScope() = delete; 3969 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3970 3971 public: 3972 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3973 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3974 if (!E) 3975 return; 3976 SmallVector<llvm::Value *, 4> Uppers; 3977 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3978 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3979 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3980 addPrivate(VD, [&CGF, VD]() { 3981 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 3982 }); 3983 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3984 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 3985 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 3986 "counter.addr"); 3987 }); 3988 } 3989 Privatize(); 3990 3991 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3992 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3993 LValue CLVal = 3994 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3995 HelperData.CounterVD->getType()); 3996 // Counter = 0; 3997 CGF.EmitStoreOfScalar( 3998 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 3999 CLVal); 4000 CodeGenFunction::JumpDest &ContDest = 4001 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4002 CodeGenFunction::JumpDest &ExitDest = 4003 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4004 // N = <number-of_iterations>; 4005 llvm::Value *N = Uppers[I]; 4006 // cont: 4007 // if (Counter < N) goto body; else goto exit; 4008 CGF.EmitBlock(ContDest.getBlock()); 4009 auto *CVal = 4010 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4011 llvm::Value *Cmp = 4012 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4013 ? CGF.Builder.CreateICmpSLT(CVal, N) 4014 : CGF.Builder.CreateICmpULT(CVal, N); 4015 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4016 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4017 // body: 4018 CGF.EmitBlock(BodyBB); 4019 // Iteri = Begini + Counter * Stepi; 4020 CGF.EmitIgnoredExpr(HelperData.Update); 4021 } 4022 } 4023 ~OMPIteratorGeneratorScope() { 4024 if (!E) 4025 return; 4026 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4027 // Counter = Counter + 1; 4028 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4029 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4030 // goto cont; 4031 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4032 // exit: 4033 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4034 } 4035 } 4036 }; 4037 } // namespace 4038 4039 static std::pair<llvm::Value *, llvm::Value *> 4040 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4041 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4042 llvm::Value *Addr; 4043 if (OASE) { 4044 const Expr *Base = OASE->getBase(); 4045 Addr = CGF.EmitScalarExpr(Base); 4046 } else { 4047 Addr = CGF.EmitLValue(E).getPointer(CGF); 4048 } 4049 llvm::Value *SizeVal; 4050 QualType Ty = E->getType(); 4051 if (OASE) { 4052 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4053 for (const Expr *SE : OASE->getDimensions()) { 4054 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4055 Sz = CGF.EmitScalarConversion( 4056 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4057 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4058 } 4059 } else if (const auto *ASE = 4060 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4061 LValue UpAddrLVal = 4062 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4063 llvm::Value *UpAddr = 4064 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4065 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4066 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4067 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4068 } else { 4069 SizeVal = CGF.getTypeSize(Ty); 4070 } 4071 return std::make_pair(Addr, SizeVal); 4072 } 4073 4074 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4075 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4076 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4077 if (KmpTaskAffinityInfoTy.isNull()) { 4078 RecordDecl *KmpAffinityInfoRD = 4079 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4080 KmpAffinityInfoRD->startDefinition(); 4081 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4082 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4083 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4084 KmpAffinityInfoRD->completeDefinition(); 4085 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4086 } 4087 } 4088 4089 CGOpenMPRuntime::TaskResultTy 4090 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4091 const OMPExecutableDirective &D, 4092 llvm::Function *TaskFunction, QualType SharedsTy, 4093 Address Shareds, const OMPTaskDataTy &Data) { 4094 ASTContext &C = CGM.getContext(); 4095 llvm::SmallVector<PrivateDataTy, 4> Privates; 4096 // Aggregate privates and sort them by the alignment. 4097 const auto *I = Data.PrivateCopies.begin(); 4098 for (const Expr *E : Data.PrivateVars) { 4099 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4100 Privates.emplace_back( 4101 C.getDeclAlign(VD), 4102 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4103 /*PrivateElemInit=*/nullptr)); 4104 ++I; 4105 } 4106 I = Data.FirstprivateCopies.begin(); 4107 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4108 for (const Expr *E : Data.FirstprivateVars) { 4109 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4110 Privates.emplace_back( 4111 C.getDeclAlign(VD), 4112 PrivateHelpersTy( 4113 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4114 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4115 ++I; 4116 ++IElemInitRef; 4117 } 4118 I = Data.LastprivateCopies.begin(); 4119 for (const Expr *E : Data.LastprivateVars) { 4120 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4121 Privates.emplace_back( 4122 C.getDeclAlign(VD), 4123 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4124 /*PrivateElemInit=*/nullptr)); 4125 ++I; 4126 } 4127 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4128 return L.first > R.first; 4129 }); 4130 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4131 // Build type kmp_routine_entry_t (if not built yet). 4132 emitKmpRoutineEntryT(KmpInt32Ty); 4133 // Build type kmp_task_t (if not built yet). 4134 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4135 if (SavedKmpTaskloopTQTy.isNull()) { 4136 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4137 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4138 } 4139 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4140 } else { 4141 assert((D.getDirectiveKind() == OMPD_task || 4142 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4143 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4144 "Expected taskloop, task or target directive"); 4145 if (SavedKmpTaskTQTy.isNull()) { 4146 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4147 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4148 } 4149 KmpTaskTQTy = SavedKmpTaskTQTy; 4150 } 4151 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4152 // Build particular struct kmp_task_t for the given task. 4153 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4154 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4155 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4156 QualType KmpTaskTWithPrivatesPtrQTy = 4157 C.getPointerType(KmpTaskTWithPrivatesQTy); 4158 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4159 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4160 KmpTaskTWithPrivatesTy->getPointerTo(); 4161 llvm::Value *KmpTaskTWithPrivatesTySize = 4162 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4163 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4164 4165 // Emit initial values for private copies (if any). 4166 llvm::Value *TaskPrivatesMap = nullptr; 4167 llvm::Type *TaskPrivatesMapTy = 4168 std::next(TaskFunction->arg_begin(), 3)->getType(); 4169 if (!Privates.empty()) { 4170 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4171 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4172 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4173 FI->getType(), Privates); 4174 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4175 TaskPrivatesMap, TaskPrivatesMapTy); 4176 } else { 4177 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4178 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4179 } 4180 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4181 // kmp_task_t *tt); 4182 llvm::Function *TaskEntry = emitProxyTaskFunction( 4183 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4184 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4185 TaskPrivatesMap); 4186 4187 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4188 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4189 // kmp_routine_entry_t *task_entry); 4190 // Task flags. Format is taken from 4191 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4192 // description of kmp_tasking_flags struct. 4193 enum { 4194 TiedFlag = 0x1, 4195 FinalFlag = 0x2, 4196 DestructorsFlag = 0x8, 4197 PriorityFlag = 0x20, 4198 DetachableFlag = 0x40, 4199 }; 4200 unsigned Flags = Data.Tied ? TiedFlag : 0; 4201 bool NeedsCleanup = false; 4202 if (!Privates.empty()) { 4203 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4204 if (NeedsCleanup) 4205 Flags = Flags | DestructorsFlag; 4206 } 4207 if (Data.Priority.getInt()) 4208 Flags = Flags | PriorityFlag; 4209 if (D.hasClausesOfKind<OMPDetachClause>()) 4210 Flags = Flags | DetachableFlag; 4211 llvm::Value *TaskFlags = 4212 Data.Final.getPointer() 4213 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4214 CGF.Builder.getInt32(FinalFlag), 4215 CGF.Builder.getInt32(/*C=*/0)) 4216 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4217 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4218 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4219 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4220 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4221 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4222 TaskEntry, KmpRoutineEntryPtrTy)}; 4223 llvm::Value *NewTask; 4224 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4225 // Check if we have any device clause associated with the directive. 4226 const Expr *Device = nullptr; 4227 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4228 Device = C->getDevice(); 4229 // Emit device ID if any otherwise use default value. 4230 llvm::Value *DeviceID; 4231 if (Device) 4232 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4233 CGF.Int64Ty, /*isSigned=*/true); 4234 else 4235 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4236 AllocArgs.push_back(DeviceID); 4237 NewTask = CGF.EmitRuntimeCall( 4238 OMPBuilder.getOrCreateRuntimeFunction( 4239 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4240 AllocArgs); 4241 } else { 4242 NewTask = 4243 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4244 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4245 AllocArgs); 4246 } 4247 // Emit detach clause initialization. 4248 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4249 // task_descriptor); 4250 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4251 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4252 LValue EvtLVal = CGF.EmitLValue(Evt); 4253 4254 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4255 // int gtid, kmp_task_t *task); 4256 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4257 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4258 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4259 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4260 OMPBuilder.getOrCreateRuntimeFunction( 4261 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4262 {Loc, Tid, NewTask}); 4263 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4264 Evt->getExprLoc()); 4265 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4266 } 4267 // Process affinity clauses. 4268 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4269 // Process list of affinity data. 4270 ASTContext &C = CGM.getContext(); 4271 Address AffinitiesArray = Address::invalid(); 4272 // Calculate number of elements to form the array of affinity data. 4273 llvm::Value *NumOfElements = nullptr; 4274 unsigned NumAffinities = 0; 4275 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4276 if (const Expr *Modifier = C->getModifier()) { 4277 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4278 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4279 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4280 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4281 NumOfElements = 4282 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4283 } 4284 } else { 4285 NumAffinities += C->varlist_size(); 4286 } 4287 } 4288 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4289 // Fields ids in kmp_task_affinity_info record. 4290 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4291 4292 QualType KmpTaskAffinityInfoArrayTy; 4293 if (NumOfElements) { 4294 NumOfElements = CGF.Builder.CreateNUWAdd( 4295 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4296 OpaqueValueExpr OVE( 4297 Loc, 4298 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4299 VK_RValue); 4300 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4301 RValue::get(NumOfElements)); 4302 KmpTaskAffinityInfoArrayTy = 4303 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4304 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4305 // Properly emit variable-sized array. 4306 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4307 ImplicitParamDecl::Other); 4308 CGF.EmitVarDecl(*PD); 4309 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4310 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4311 /*isSigned=*/false); 4312 } else { 4313 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4314 KmpTaskAffinityInfoTy, 4315 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4316 ArrayType::Normal, /*IndexTypeQuals=*/0); 4317 AffinitiesArray = 4318 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4319 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4320 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4321 /*isSigned=*/false); 4322 } 4323 4324 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4325 // Fill array by elements without iterators. 4326 unsigned Pos = 0; 4327 bool HasIterator = false; 4328 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4329 if (C->getModifier()) { 4330 HasIterator = true; 4331 continue; 4332 } 4333 for (const Expr *E : C->varlists()) { 4334 llvm::Value *Addr; 4335 llvm::Value *Size; 4336 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4337 LValue Base = 4338 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4339 KmpTaskAffinityInfoTy); 4340 // affs[i].base_addr = &<Affinities[i].second>; 4341 LValue BaseAddrLVal = CGF.EmitLValueForField( 4342 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4343 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4344 BaseAddrLVal); 4345 // affs[i].len = sizeof(<Affinities[i].second>); 4346 LValue LenLVal = CGF.EmitLValueForField( 4347 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4348 CGF.EmitStoreOfScalar(Size, LenLVal); 4349 ++Pos; 4350 } 4351 } 4352 LValue PosLVal; 4353 if (HasIterator) { 4354 PosLVal = CGF.MakeAddrLValue( 4355 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4356 C.getSizeType()); 4357 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4358 } 4359 // Process elements with iterators. 4360 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4361 const Expr *Modifier = C->getModifier(); 4362 if (!Modifier) 4363 continue; 4364 OMPIteratorGeneratorScope IteratorScope( 4365 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4366 for (const Expr *E : C->varlists()) { 4367 llvm::Value *Addr; 4368 llvm::Value *Size; 4369 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4370 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4371 LValue Base = CGF.MakeAddrLValue( 4372 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4373 AffinitiesArray.getAlignment()), 4374 KmpTaskAffinityInfoTy); 4375 // affs[i].base_addr = &<Affinities[i].second>; 4376 LValue BaseAddrLVal = CGF.EmitLValueForField( 4377 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4378 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4379 BaseAddrLVal); 4380 // affs[i].len = sizeof(<Affinities[i].second>); 4381 LValue LenLVal = CGF.EmitLValueForField( 4382 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4383 CGF.EmitStoreOfScalar(Size, LenLVal); 4384 Idx = CGF.Builder.CreateNUWAdd( 4385 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4386 CGF.EmitStoreOfScalar(Idx, PosLVal); 4387 } 4388 } 4389 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4390 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4391 // naffins, kmp_task_affinity_info_t *affin_list); 4392 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4393 llvm::Value *GTid = getThreadID(CGF, Loc); 4394 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4395 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4396 // FIXME: Emit the function and ignore its result for now unless the 4397 // runtime function is properly implemented. 4398 (void)CGF.EmitRuntimeCall( 4399 OMPBuilder.getOrCreateRuntimeFunction( 4400 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4401 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4402 } 4403 llvm::Value *NewTaskNewTaskTTy = 4404 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4405 NewTask, KmpTaskTWithPrivatesPtrTy); 4406 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4407 KmpTaskTWithPrivatesQTy); 4408 LValue TDBase = 4409 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4410 // Fill the data in the resulting kmp_task_t record. 4411 // Copy shareds if there are any. 4412 Address KmpTaskSharedsPtr = Address::invalid(); 4413 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4414 KmpTaskSharedsPtr = 4415 Address(CGF.EmitLoadOfScalar( 4416 CGF.EmitLValueForField( 4417 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4418 KmpTaskTShareds)), 4419 Loc), 4420 CGM.getNaturalTypeAlignment(SharedsTy)); 4421 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4422 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4423 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4424 } 4425 // Emit initial values for private copies (if any). 4426 TaskResultTy Result; 4427 if (!Privates.empty()) { 4428 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4429 SharedsTy, SharedsPtrTy, Data, Privates, 4430 /*ForDup=*/false); 4431 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4432 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4433 Result.TaskDupFn = emitTaskDupFunction( 4434 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4435 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4436 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4437 } 4438 } 4439 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4440 enum { Priority = 0, Destructors = 1 }; 4441 // Provide pointer to function with destructors for privates. 4442 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4443 const RecordDecl *KmpCmplrdataUD = 4444 (*FI)->getType()->getAsUnionType()->getDecl(); 4445 if (NeedsCleanup) { 4446 llvm::Value *DestructorFn = emitDestructorsFunction( 4447 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4448 KmpTaskTWithPrivatesQTy); 4449 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4450 LValue DestructorsLV = CGF.EmitLValueForField( 4451 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4452 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4453 DestructorFn, KmpRoutineEntryPtrTy), 4454 DestructorsLV); 4455 } 4456 // Set priority. 4457 if (Data.Priority.getInt()) { 4458 LValue Data2LV = CGF.EmitLValueForField( 4459 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4460 LValue PriorityLV = CGF.EmitLValueForField( 4461 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4462 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4463 } 4464 Result.NewTask = NewTask; 4465 Result.TaskEntry = TaskEntry; 4466 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4467 Result.TDBase = TDBase; 4468 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4469 return Result; 4470 } 4471 4472 namespace { 4473 /// Dependence kind for RTL. 4474 enum RTLDependenceKindTy { 4475 DepIn = 0x01, 4476 DepInOut = 0x3, 4477 DepMutexInOutSet = 0x4 4478 }; 4479 /// Fields ids in kmp_depend_info record. 4480 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4481 } // namespace 4482 4483 /// Translates internal dependency kind into the runtime kind. 4484 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4485 RTLDependenceKindTy DepKind; 4486 switch (K) { 4487 case OMPC_DEPEND_in: 4488 DepKind = DepIn; 4489 break; 4490 // Out and InOut dependencies must use the same code. 4491 case OMPC_DEPEND_out: 4492 case OMPC_DEPEND_inout: 4493 DepKind = DepInOut; 4494 break; 4495 case OMPC_DEPEND_mutexinoutset: 4496 DepKind = DepMutexInOutSet; 4497 break; 4498 case OMPC_DEPEND_source: 4499 case OMPC_DEPEND_sink: 4500 case OMPC_DEPEND_depobj: 4501 case OMPC_DEPEND_unknown: 4502 llvm_unreachable("Unknown task dependence type"); 4503 } 4504 return DepKind; 4505 } 4506 4507 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4508 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4509 QualType &FlagsTy) { 4510 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4511 if (KmpDependInfoTy.isNull()) { 4512 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4513 KmpDependInfoRD->startDefinition(); 4514 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4515 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4516 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4517 KmpDependInfoRD->completeDefinition(); 4518 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4519 } 4520 } 4521 4522 std::pair<llvm::Value *, LValue> 4523 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4524 SourceLocation Loc) { 4525 ASTContext &C = CGM.getContext(); 4526 QualType FlagsTy; 4527 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4528 RecordDecl *KmpDependInfoRD = 4529 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4530 LValue Base = CGF.EmitLoadOfPointerLValue( 4531 DepobjLVal.getAddress(CGF), 4532 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4533 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4534 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4535 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4536 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4537 Base.getTBAAInfo()); 4538 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4539 Addr.getPointer(), 4540 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4541 LValue NumDepsBase = CGF.MakeAddrLValue( 4542 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4543 Base.getBaseInfo(), Base.getTBAAInfo()); 4544 // NumDeps = deps[i].base_addr; 4545 LValue BaseAddrLVal = CGF.EmitLValueForField( 4546 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4547 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4548 return std::make_pair(NumDeps, Base); 4549 } 4550 4551 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4552 llvm::PointerUnion<unsigned *, LValue *> Pos, 4553 const OMPTaskDataTy::DependData &Data, 4554 Address DependenciesArray) { 4555 CodeGenModule &CGM = CGF.CGM; 4556 ASTContext &C = CGM.getContext(); 4557 QualType FlagsTy; 4558 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4559 RecordDecl *KmpDependInfoRD = 4560 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4561 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4562 4563 OMPIteratorGeneratorScope IteratorScope( 4564 CGF, cast_or_null<OMPIteratorExpr>( 4565 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4566 : nullptr)); 4567 for (const Expr *E : Data.DepExprs) { 4568 llvm::Value *Addr; 4569 llvm::Value *Size; 4570 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4571 LValue Base; 4572 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4573 Base = CGF.MakeAddrLValue( 4574 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4575 } else { 4576 LValue &PosLVal = *Pos.get<LValue *>(); 4577 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4578 Base = CGF.MakeAddrLValue( 4579 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4580 DependenciesArray.getAlignment()), 4581 KmpDependInfoTy); 4582 } 4583 // deps[i].base_addr = &<Dependencies[i].second>; 4584 LValue BaseAddrLVal = CGF.EmitLValueForField( 4585 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4586 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4587 BaseAddrLVal); 4588 // deps[i].len = sizeof(<Dependencies[i].second>); 4589 LValue LenLVal = CGF.EmitLValueForField( 4590 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4591 CGF.EmitStoreOfScalar(Size, LenLVal); 4592 // deps[i].flags = <Dependencies[i].first>; 4593 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4594 LValue FlagsLVal = CGF.EmitLValueForField( 4595 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4596 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4597 FlagsLVal); 4598 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4599 ++(*P); 4600 } else { 4601 LValue &PosLVal = *Pos.get<LValue *>(); 4602 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4603 Idx = CGF.Builder.CreateNUWAdd(Idx, 4604 llvm::ConstantInt::get(Idx->getType(), 1)); 4605 CGF.EmitStoreOfScalar(Idx, PosLVal); 4606 } 4607 } 4608 } 4609 4610 static SmallVector<llvm::Value *, 4> 4611 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4612 const OMPTaskDataTy::DependData &Data) { 4613 assert(Data.DepKind == OMPC_DEPEND_depobj && 4614 "Expected depobj dependecy kind."); 4615 SmallVector<llvm::Value *, 4> Sizes; 4616 SmallVector<LValue, 4> SizeLVals; 4617 ASTContext &C = CGF.getContext(); 4618 QualType FlagsTy; 4619 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4620 RecordDecl *KmpDependInfoRD = 4621 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4622 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4623 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4624 { 4625 OMPIteratorGeneratorScope IteratorScope( 4626 CGF, cast_or_null<OMPIteratorExpr>( 4627 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4628 : nullptr)); 4629 for (const Expr *E : Data.DepExprs) { 4630 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4631 LValue Base = CGF.EmitLoadOfPointerLValue( 4632 DepobjLVal.getAddress(CGF), 4633 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4634 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4635 Base.getAddress(CGF), KmpDependInfoPtrT); 4636 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4637 Base.getTBAAInfo()); 4638 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4639 Addr.getPointer(), 4640 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4641 LValue NumDepsBase = CGF.MakeAddrLValue( 4642 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4643 Base.getBaseInfo(), Base.getTBAAInfo()); 4644 // NumDeps = deps[i].base_addr; 4645 LValue BaseAddrLVal = CGF.EmitLValueForField( 4646 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4647 llvm::Value *NumDeps = 4648 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4649 LValue NumLVal = CGF.MakeAddrLValue( 4650 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4651 C.getUIntPtrType()); 4652 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4653 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4654 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4655 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4656 CGF.EmitStoreOfScalar(Add, NumLVal); 4657 SizeLVals.push_back(NumLVal); 4658 } 4659 } 4660 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4661 llvm::Value *Size = 4662 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4663 Sizes.push_back(Size); 4664 } 4665 return Sizes; 4666 } 4667 4668 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4669 LValue PosLVal, 4670 const OMPTaskDataTy::DependData &Data, 4671 Address DependenciesArray) { 4672 assert(Data.DepKind == OMPC_DEPEND_depobj && 4673 "Expected depobj dependecy kind."); 4674 ASTContext &C = CGF.getContext(); 4675 QualType FlagsTy; 4676 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4677 RecordDecl *KmpDependInfoRD = 4678 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4679 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4680 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4681 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4682 { 4683 OMPIteratorGeneratorScope IteratorScope( 4684 CGF, cast_or_null<OMPIteratorExpr>( 4685 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4686 : nullptr)); 4687 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4688 const Expr *E = Data.DepExprs[I]; 4689 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4690 LValue Base = CGF.EmitLoadOfPointerLValue( 4691 DepobjLVal.getAddress(CGF), 4692 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4693 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4694 Base.getAddress(CGF), KmpDependInfoPtrT); 4695 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4696 Base.getTBAAInfo()); 4697 4698 // Get number of elements in a single depobj. 4699 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4700 Addr.getPointer(), 4701 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4702 LValue NumDepsBase = CGF.MakeAddrLValue( 4703 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4704 Base.getBaseInfo(), Base.getTBAAInfo()); 4705 // NumDeps = deps[i].base_addr; 4706 LValue BaseAddrLVal = CGF.EmitLValueForField( 4707 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4708 llvm::Value *NumDeps = 4709 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4710 4711 // memcopy dependency data. 4712 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4713 ElSize, 4714 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4715 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4716 Address DepAddr = 4717 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4718 DependenciesArray.getAlignment()); 4719 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4720 4721 // Increase pos. 4722 // pos += size; 4723 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4724 CGF.EmitStoreOfScalar(Add, PosLVal); 4725 } 4726 } 4727 } 4728 4729 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4730 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4731 SourceLocation Loc) { 4732 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4733 return D.DepExprs.empty(); 4734 })) 4735 return std::make_pair(nullptr, Address::invalid()); 4736 // Process list of dependencies. 4737 ASTContext &C = CGM.getContext(); 4738 Address DependenciesArray = Address::invalid(); 4739 llvm::Value *NumOfElements = nullptr; 4740 unsigned NumDependencies = std::accumulate( 4741 Dependencies.begin(), Dependencies.end(), 0, 4742 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4743 return D.DepKind == OMPC_DEPEND_depobj 4744 ? V 4745 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4746 }); 4747 QualType FlagsTy; 4748 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4749 bool HasDepobjDeps = false; 4750 bool HasRegularWithIterators = false; 4751 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4752 llvm::Value *NumOfRegularWithIterators = 4753 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4754 // Calculate number of depobj dependecies and regular deps with the iterators. 4755 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4756 if (D.DepKind == OMPC_DEPEND_depobj) { 4757 SmallVector<llvm::Value *, 4> Sizes = 4758 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4759 for (llvm::Value *Size : Sizes) { 4760 NumOfDepobjElements = 4761 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4762 } 4763 HasDepobjDeps = true; 4764 continue; 4765 } 4766 // Include number of iterations, if any. 4767 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4768 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4769 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4770 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4771 NumOfRegularWithIterators = 4772 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4773 } 4774 HasRegularWithIterators = true; 4775 continue; 4776 } 4777 } 4778 4779 QualType KmpDependInfoArrayTy; 4780 if (HasDepobjDeps || HasRegularWithIterators) { 4781 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4782 /*isSigned=*/false); 4783 if (HasDepobjDeps) { 4784 NumOfElements = 4785 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4786 } 4787 if (HasRegularWithIterators) { 4788 NumOfElements = 4789 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4790 } 4791 OpaqueValueExpr OVE(Loc, 4792 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4793 VK_RValue); 4794 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4795 RValue::get(NumOfElements)); 4796 KmpDependInfoArrayTy = 4797 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4798 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4799 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4800 // Properly emit variable-sized array. 4801 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4802 ImplicitParamDecl::Other); 4803 CGF.EmitVarDecl(*PD); 4804 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4805 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4806 /*isSigned=*/false); 4807 } else { 4808 KmpDependInfoArrayTy = C.getConstantArrayType( 4809 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4810 ArrayType::Normal, /*IndexTypeQuals=*/0); 4811 DependenciesArray = 4812 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4813 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4814 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4815 /*isSigned=*/false); 4816 } 4817 unsigned Pos = 0; 4818 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4819 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4820 Dependencies[I].IteratorExpr) 4821 continue; 4822 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4823 DependenciesArray); 4824 } 4825 // Copy regular dependecies with iterators. 4826 LValue PosLVal = CGF.MakeAddrLValue( 4827 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4828 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4829 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4830 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4831 !Dependencies[I].IteratorExpr) 4832 continue; 4833 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4834 DependenciesArray); 4835 } 4836 // Copy final depobj arrays without iterators. 4837 if (HasDepobjDeps) { 4838 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4839 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4840 continue; 4841 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4842 DependenciesArray); 4843 } 4844 } 4845 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4846 DependenciesArray, CGF.VoidPtrTy); 4847 return std::make_pair(NumOfElements, DependenciesArray); 4848 } 4849 4850 Address CGOpenMPRuntime::emitDepobjDependClause( 4851 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4852 SourceLocation Loc) { 4853 if (Dependencies.DepExprs.empty()) 4854 return Address::invalid(); 4855 // Process list of dependencies. 4856 ASTContext &C = CGM.getContext(); 4857 Address DependenciesArray = Address::invalid(); 4858 unsigned NumDependencies = Dependencies.DepExprs.size(); 4859 QualType FlagsTy; 4860 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4861 RecordDecl *KmpDependInfoRD = 4862 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4863 4864 llvm::Value *Size; 4865 // Define type kmp_depend_info[<Dependencies.size()>]; 4866 // For depobj reserve one extra element to store the number of elements. 4867 // It is required to handle depobj(x) update(in) construct. 4868 // kmp_depend_info[<Dependencies.size()>] deps; 4869 llvm::Value *NumDepsVal; 4870 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4871 if (const auto *IE = 4872 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4873 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4874 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4875 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4876 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4877 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4878 } 4879 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4880 NumDepsVal); 4881 CharUnits SizeInBytes = 4882 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4883 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4884 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4885 NumDepsVal = 4886 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4887 } else { 4888 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4889 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4890 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4891 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4892 Size = CGM.getSize(Sz.alignTo(Align)); 4893 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4894 } 4895 // Need to allocate on the dynamic memory. 4896 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4897 // Use default allocator. 4898 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4899 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4900 4901 llvm::Value *Addr = 4902 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4903 CGM.getModule(), OMPRTL___kmpc_alloc), 4904 Args, ".dep.arr.addr"); 4905 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4906 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4907 DependenciesArray = Address(Addr, Align); 4908 // Write number of elements in the first element of array for depobj. 4909 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4910 // deps[i].base_addr = NumDependencies; 4911 LValue BaseAddrLVal = CGF.EmitLValueForField( 4912 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4913 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4914 llvm::PointerUnion<unsigned *, LValue *> Pos; 4915 unsigned Idx = 1; 4916 LValue PosLVal; 4917 if (Dependencies.IteratorExpr) { 4918 PosLVal = CGF.MakeAddrLValue( 4919 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4920 C.getSizeType()); 4921 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4922 /*IsInit=*/true); 4923 Pos = &PosLVal; 4924 } else { 4925 Pos = &Idx; 4926 } 4927 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4928 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4929 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4930 return DependenciesArray; 4931 } 4932 4933 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4934 SourceLocation Loc) { 4935 ASTContext &C = CGM.getContext(); 4936 QualType FlagsTy; 4937 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4938 LValue Base = CGF.EmitLoadOfPointerLValue( 4939 DepobjLVal.getAddress(CGF), 4940 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4941 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4942 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4943 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4944 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4945 Addr.getPointer(), 4946 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4947 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4948 CGF.VoidPtrTy); 4949 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4950 // Use default allocator. 4951 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4952 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4953 4954 // _kmpc_free(gtid, addr, nullptr); 4955 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4956 CGM.getModule(), OMPRTL___kmpc_free), 4957 Args); 4958 } 4959 4960 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4961 OpenMPDependClauseKind NewDepKind, 4962 SourceLocation Loc) { 4963 ASTContext &C = CGM.getContext(); 4964 QualType FlagsTy; 4965 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4966 RecordDecl *KmpDependInfoRD = 4967 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4968 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4969 llvm::Value *NumDeps; 4970 LValue Base; 4971 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4972 4973 Address Begin = Base.getAddress(CGF); 4974 // Cast from pointer to array type to pointer to single element. 4975 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 4976 // The basic structure here is a while-do loop. 4977 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4978 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4979 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4980 CGF.EmitBlock(BodyBB); 4981 llvm::PHINode *ElementPHI = 4982 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4983 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4984 Begin = Address(ElementPHI, Begin.getAlignment()); 4985 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4986 Base.getTBAAInfo()); 4987 // deps[i].flags = NewDepKind; 4988 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4989 LValue FlagsLVal = CGF.EmitLValueForField( 4990 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4991 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4992 FlagsLVal); 4993 4994 // Shift the address forward by one element. 4995 Address ElementNext = 4996 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 4997 ElementPHI->addIncoming(ElementNext.getPointer(), 4998 CGF.Builder.GetInsertBlock()); 4999 llvm::Value *IsEmpty = 5000 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5001 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5002 // Done. 5003 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5004 } 5005 5006 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5007 const OMPExecutableDirective &D, 5008 llvm::Function *TaskFunction, 5009 QualType SharedsTy, Address Shareds, 5010 const Expr *IfCond, 5011 const OMPTaskDataTy &Data) { 5012 if (!CGF.HaveInsertPoint()) 5013 return; 5014 5015 TaskResultTy Result = 5016 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5017 llvm::Value *NewTask = Result.NewTask; 5018 llvm::Function *TaskEntry = Result.TaskEntry; 5019 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5020 LValue TDBase = Result.TDBase; 5021 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5022 // Process list of dependences. 5023 Address DependenciesArray = Address::invalid(); 5024 llvm::Value *NumOfElements; 5025 std::tie(NumOfElements, DependenciesArray) = 5026 emitDependClause(CGF, Data.Dependences, Loc); 5027 5028 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5029 // libcall. 5030 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5031 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5032 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5033 // list is not empty 5034 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5035 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5036 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5037 llvm::Value *DepTaskArgs[7]; 5038 if (!Data.Dependences.empty()) { 5039 DepTaskArgs[0] = UpLoc; 5040 DepTaskArgs[1] = ThreadID; 5041 DepTaskArgs[2] = NewTask; 5042 DepTaskArgs[3] = NumOfElements; 5043 DepTaskArgs[4] = DependenciesArray.getPointer(); 5044 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5045 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5046 } 5047 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5048 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5049 if (!Data.Tied) { 5050 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5051 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5052 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5053 } 5054 if (!Data.Dependences.empty()) { 5055 CGF.EmitRuntimeCall( 5056 OMPBuilder.getOrCreateRuntimeFunction( 5057 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5058 DepTaskArgs); 5059 } else { 5060 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5061 CGM.getModule(), OMPRTL___kmpc_omp_task), 5062 TaskArgs); 5063 } 5064 // Check if parent region is untied and build return for untied task; 5065 if (auto *Region = 5066 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5067 Region->emitUntiedSwitch(CGF); 5068 }; 5069 5070 llvm::Value *DepWaitTaskArgs[6]; 5071 if (!Data.Dependences.empty()) { 5072 DepWaitTaskArgs[0] = UpLoc; 5073 DepWaitTaskArgs[1] = ThreadID; 5074 DepWaitTaskArgs[2] = NumOfElements; 5075 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5076 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5077 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5078 } 5079 auto &M = CGM.getModule(); 5080 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5081 TaskEntry, &Data, &DepWaitTaskArgs, 5082 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5083 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5084 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5085 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5086 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5087 // is specified. 5088 if (!Data.Dependences.empty()) 5089 CGF.EmitRuntimeCall( 5090 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5091 DepWaitTaskArgs); 5092 // Call proxy_task_entry(gtid, new_task); 5093 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5094 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5095 Action.Enter(CGF); 5096 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5097 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5098 OutlinedFnArgs); 5099 }; 5100 5101 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5102 // kmp_task_t *new_task); 5103 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5104 // kmp_task_t *new_task); 5105 RegionCodeGenTy RCG(CodeGen); 5106 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5107 M, OMPRTL___kmpc_omp_task_begin_if0), 5108 TaskArgs, 5109 OMPBuilder.getOrCreateRuntimeFunction( 5110 M, OMPRTL___kmpc_omp_task_complete_if0), 5111 TaskArgs); 5112 RCG.setAction(Action); 5113 RCG(CGF); 5114 }; 5115 5116 if (IfCond) { 5117 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5118 } else { 5119 RegionCodeGenTy ThenRCG(ThenCodeGen); 5120 ThenRCG(CGF); 5121 } 5122 } 5123 5124 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5125 const OMPLoopDirective &D, 5126 llvm::Function *TaskFunction, 5127 QualType SharedsTy, Address Shareds, 5128 const Expr *IfCond, 5129 const OMPTaskDataTy &Data) { 5130 if (!CGF.HaveInsertPoint()) 5131 return; 5132 TaskResultTy Result = 5133 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5134 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5135 // libcall. 5136 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5137 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5138 // sched, kmp_uint64 grainsize, void *task_dup); 5139 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5140 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5141 llvm::Value *IfVal; 5142 if (IfCond) { 5143 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5144 /*isSigned=*/true); 5145 } else { 5146 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5147 } 5148 5149 LValue LBLVal = CGF.EmitLValueForField( 5150 Result.TDBase, 5151 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5152 const auto *LBVar = 5153 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5154 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5155 LBLVal.getQuals(), 5156 /*IsInitializer=*/true); 5157 LValue UBLVal = CGF.EmitLValueForField( 5158 Result.TDBase, 5159 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5160 const auto *UBVar = 5161 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5162 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5163 UBLVal.getQuals(), 5164 /*IsInitializer=*/true); 5165 LValue StLVal = CGF.EmitLValueForField( 5166 Result.TDBase, 5167 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5168 const auto *StVar = 5169 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5170 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5171 StLVal.getQuals(), 5172 /*IsInitializer=*/true); 5173 // Store reductions address. 5174 LValue RedLVal = CGF.EmitLValueForField( 5175 Result.TDBase, 5176 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5177 if (Data.Reductions) { 5178 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5179 } else { 5180 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5181 CGF.getContext().VoidPtrTy); 5182 } 5183 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5184 llvm::Value *TaskArgs[] = { 5185 UpLoc, 5186 ThreadID, 5187 Result.NewTask, 5188 IfVal, 5189 LBLVal.getPointer(CGF), 5190 UBLVal.getPointer(CGF), 5191 CGF.EmitLoadOfScalar(StLVal, Loc), 5192 llvm::ConstantInt::getSigned( 5193 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5194 llvm::ConstantInt::getSigned( 5195 CGF.IntTy, Data.Schedule.getPointer() 5196 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5197 : NoSchedule), 5198 Data.Schedule.getPointer() 5199 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5200 /*isSigned=*/false) 5201 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5202 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5203 Result.TaskDupFn, CGF.VoidPtrTy) 5204 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5205 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5206 CGM.getModule(), OMPRTL___kmpc_taskloop), 5207 TaskArgs); 5208 } 5209 5210 /// Emit reduction operation for each element of array (required for 5211 /// array sections) LHS op = RHS. 5212 /// \param Type Type of array. 5213 /// \param LHSVar Variable on the left side of the reduction operation 5214 /// (references element of array in original variable). 5215 /// \param RHSVar Variable on the right side of the reduction operation 5216 /// (references element of array in original variable). 5217 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5218 /// RHSVar. 5219 static void EmitOMPAggregateReduction( 5220 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5221 const VarDecl *RHSVar, 5222 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5223 const Expr *, const Expr *)> &RedOpGen, 5224 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5225 const Expr *UpExpr = nullptr) { 5226 // Perform element-by-element initialization. 5227 QualType ElementTy; 5228 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5229 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5230 5231 // Drill down to the base element type on both arrays. 5232 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5233 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5234 5235 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5236 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5237 // Cast from pointer to array type to pointer to single element. 5238 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5239 // The basic structure here is a while-do loop. 5240 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5241 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5242 llvm::Value *IsEmpty = 5243 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5244 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5245 5246 // Enter the loop body, making that address the current address. 5247 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5248 CGF.EmitBlock(BodyBB); 5249 5250 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5251 5252 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5253 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5254 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5255 Address RHSElementCurrent = 5256 Address(RHSElementPHI, 5257 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5258 5259 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5260 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5261 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5262 Address LHSElementCurrent = 5263 Address(LHSElementPHI, 5264 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5265 5266 // Emit copy. 5267 CodeGenFunction::OMPPrivateScope Scope(CGF); 5268 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5269 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5270 Scope.Privatize(); 5271 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5272 Scope.ForceCleanup(); 5273 5274 // Shift the address forward by one element. 5275 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5276 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5277 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5278 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5279 // Check whether we've reached the end. 5280 llvm::Value *Done = 5281 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5282 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5283 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5284 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5285 5286 // Done. 5287 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5288 } 5289 5290 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5291 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5292 /// UDR combiner function. 5293 static void emitReductionCombiner(CodeGenFunction &CGF, 5294 const Expr *ReductionOp) { 5295 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5296 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5297 if (const auto *DRE = 5298 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5299 if (const auto *DRD = 5300 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5301 std::pair<llvm::Function *, llvm::Function *> Reduction = 5302 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5303 RValue Func = RValue::get(Reduction.first); 5304 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5305 CGF.EmitIgnoredExpr(ReductionOp); 5306 return; 5307 } 5308 CGF.EmitIgnoredExpr(ReductionOp); 5309 } 5310 5311 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5312 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5313 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5314 ArrayRef<const Expr *> ReductionOps) { 5315 ASTContext &C = CGM.getContext(); 5316 5317 // void reduction_func(void *LHSArg, void *RHSArg); 5318 FunctionArgList Args; 5319 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5320 ImplicitParamDecl::Other); 5321 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5322 ImplicitParamDecl::Other); 5323 Args.push_back(&LHSArg); 5324 Args.push_back(&RHSArg); 5325 const auto &CGFI = 5326 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5327 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5328 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5329 llvm::GlobalValue::InternalLinkage, Name, 5330 &CGM.getModule()); 5331 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5332 Fn->setDoesNotRecurse(); 5333 CodeGenFunction CGF(CGM); 5334 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5335 5336 // Dst = (void*[n])(LHSArg); 5337 // Src = (void*[n])(RHSArg); 5338 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5339 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5340 ArgsType), CGF.getPointerAlign()); 5341 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5342 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5343 ArgsType), CGF.getPointerAlign()); 5344 5345 // ... 5346 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5347 // ... 5348 CodeGenFunction::OMPPrivateScope Scope(CGF); 5349 auto IPriv = Privates.begin(); 5350 unsigned Idx = 0; 5351 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5352 const auto *RHSVar = 5353 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5354 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5355 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5356 }); 5357 const auto *LHSVar = 5358 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5359 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5360 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5361 }); 5362 QualType PrivTy = (*IPriv)->getType(); 5363 if (PrivTy->isVariablyModifiedType()) { 5364 // Get array size and emit VLA type. 5365 ++Idx; 5366 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5367 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5368 const VariableArrayType *VLA = 5369 CGF.getContext().getAsVariableArrayType(PrivTy); 5370 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5371 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5372 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5373 CGF.EmitVariablyModifiedType(PrivTy); 5374 } 5375 } 5376 Scope.Privatize(); 5377 IPriv = Privates.begin(); 5378 auto ILHS = LHSExprs.begin(); 5379 auto IRHS = RHSExprs.begin(); 5380 for (const Expr *E : ReductionOps) { 5381 if ((*IPriv)->getType()->isArrayType()) { 5382 // Emit reduction for array section. 5383 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5384 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5385 EmitOMPAggregateReduction( 5386 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5387 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5388 emitReductionCombiner(CGF, E); 5389 }); 5390 } else { 5391 // Emit reduction for array subscript or single variable. 5392 emitReductionCombiner(CGF, E); 5393 } 5394 ++IPriv; 5395 ++ILHS; 5396 ++IRHS; 5397 } 5398 Scope.ForceCleanup(); 5399 CGF.FinishFunction(); 5400 return Fn; 5401 } 5402 5403 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5404 const Expr *ReductionOp, 5405 const Expr *PrivateRef, 5406 const DeclRefExpr *LHS, 5407 const DeclRefExpr *RHS) { 5408 if (PrivateRef->getType()->isArrayType()) { 5409 // Emit reduction for array section. 5410 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5411 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5412 EmitOMPAggregateReduction( 5413 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5414 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5415 emitReductionCombiner(CGF, ReductionOp); 5416 }); 5417 } else { 5418 // Emit reduction for array subscript or single variable. 5419 emitReductionCombiner(CGF, ReductionOp); 5420 } 5421 } 5422 5423 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5424 ArrayRef<const Expr *> Privates, 5425 ArrayRef<const Expr *> LHSExprs, 5426 ArrayRef<const Expr *> RHSExprs, 5427 ArrayRef<const Expr *> ReductionOps, 5428 ReductionOptionsTy Options) { 5429 if (!CGF.HaveInsertPoint()) 5430 return; 5431 5432 bool WithNowait = Options.WithNowait; 5433 bool SimpleReduction = Options.SimpleReduction; 5434 5435 // Next code should be emitted for reduction: 5436 // 5437 // static kmp_critical_name lock = { 0 }; 5438 // 5439 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5440 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5441 // ... 5442 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5443 // *(Type<n>-1*)rhs[<n>-1]); 5444 // } 5445 // 5446 // ... 5447 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5448 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5449 // RedList, reduce_func, &<lock>)) { 5450 // case 1: 5451 // ... 5452 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5453 // ... 5454 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5455 // break; 5456 // case 2: 5457 // ... 5458 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5459 // ... 5460 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5461 // break; 5462 // default:; 5463 // } 5464 // 5465 // if SimpleReduction is true, only the next code is generated: 5466 // ... 5467 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5468 // ... 5469 5470 ASTContext &C = CGM.getContext(); 5471 5472 if (SimpleReduction) { 5473 CodeGenFunction::RunCleanupsScope Scope(CGF); 5474 auto IPriv = Privates.begin(); 5475 auto ILHS = LHSExprs.begin(); 5476 auto IRHS = RHSExprs.begin(); 5477 for (const Expr *E : ReductionOps) { 5478 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5479 cast<DeclRefExpr>(*IRHS)); 5480 ++IPriv; 5481 ++ILHS; 5482 ++IRHS; 5483 } 5484 return; 5485 } 5486 5487 // 1. Build a list of reduction variables. 5488 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5489 auto Size = RHSExprs.size(); 5490 for (const Expr *E : Privates) { 5491 if (E->getType()->isVariablyModifiedType()) 5492 // Reserve place for array size. 5493 ++Size; 5494 } 5495 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5496 QualType ReductionArrayTy = 5497 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5498 /*IndexTypeQuals=*/0); 5499 Address ReductionList = 5500 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5501 auto IPriv = Privates.begin(); 5502 unsigned Idx = 0; 5503 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5504 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5505 CGF.Builder.CreateStore( 5506 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5507 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5508 Elem); 5509 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5510 // Store array size. 5511 ++Idx; 5512 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5513 llvm::Value *Size = CGF.Builder.CreateIntCast( 5514 CGF.getVLASize( 5515 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5516 .NumElts, 5517 CGF.SizeTy, /*isSigned=*/false); 5518 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5519 Elem); 5520 } 5521 } 5522 5523 // 2. Emit reduce_func(). 5524 llvm::Function *ReductionFn = emitReductionFunction( 5525 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5526 LHSExprs, RHSExprs, ReductionOps); 5527 5528 // 3. Create static kmp_critical_name lock = { 0 }; 5529 std::string Name = getName({"reduction"}); 5530 llvm::Value *Lock = getCriticalRegionLock(Name); 5531 5532 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5533 // RedList, reduce_func, &<lock>); 5534 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5535 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5536 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5537 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5538 ReductionList.getPointer(), CGF.VoidPtrTy); 5539 llvm::Value *Args[] = { 5540 IdentTLoc, // ident_t *<loc> 5541 ThreadId, // i32 <gtid> 5542 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5543 ReductionArrayTySize, // size_type sizeof(RedList) 5544 RL, // void *RedList 5545 ReductionFn, // void (*) (void *, void *) <reduce_func> 5546 Lock // kmp_critical_name *&<lock> 5547 }; 5548 llvm::Value *Res = CGF.EmitRuntimeCall( 5549 OMPBuilder.getOrCreateRuntimeFunction( 5550 CGM.getModule(), 5551 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5552 Args); 5553 5554 // 5. Build switch(res) 5555 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5556 llvm::SwitchInst *SwInst = 5557 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5558 5559 // 6. Build case 1: 5560 // ... 5561 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5562 // ... 5563 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5564 // break; 5565 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5566 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5567 CGF.EmitBlock(Case1BB); 5568 5569 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5570 llvm::Value *EndArgs[] = { 5571 IdentTLoc, // ident_t *<loc> 5572 ThreadId, // i32 <gtid> 5573 Lock // kmp_critical_name *&<lock> 5574 }; 5575 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5576 CodeGenFunction &CGF, PrePostActionTy &Action) { 5577 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5578 auto IPriv = Privates.begin(); 5579 auto ILHS = LHSExprs.begin(); 5580 auto IRHS = RHSExprs.begin(); 5581 for (const Expr *E : ReductionOps) { 5582 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5583 cast<DeclRefExpr>(*IRHS)); 5584 ++IPriv; 5585 ++ILHS; 5586 ++IRHS; 5587 } 5588 }; 5589 RegionCodeGenTy RCG(CodeGen); 5590 CommonActionTy Action( 5591 nullptr, llvm::None, 5592 OMPBuilder.getOrCreateRuntimeFunction( 5593 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5594 : OMPRTL___kmpc_end_reduce), 5595 EndArgs); 5596 RCG.setAction(Action); 5597 RCG(CGF); 5598 5599 CGF.EmitBranch(DefaultBB); 5600 5601 // 7. Build case 2: 5602 // ... 5603 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5604 // ... 5605 // break; 5606 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5607 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5608 CGF.EmitBlock(Case2BB); 5609 5610 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5611 CodeGenFunction &CGF, PrePostActionTy &Action) { 5612 auto ILHS = LHSExprs.begin(); 5613 auto IRHS = RHSExprs.begin(); 5614 auto IPriv = Privates.begin(); 5615 for (const Expr *E : ReductionOps) { 5616 const Expr *XExpr = nullptr; 5617 const Expr *EExpr = nullptr; 5618 const Expr *UpExpr = nullptr; 5619 BinaryOperatorKind BO = BO_Comma; 5620 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5621 if (BO->getOpcode() == BO_Assign) { 5622 XExpr = BO->getLHS(); 5623 UpExpr = BO->getRHS(); 5624 } 5625 } 5626 // Try to emit update expression as a simple atomic. 5627 const Expr *RHSExpr = UpExpr; 5628 if (RHSExpr) { 5629 // Analyze RHS part of the whole expression. 5630 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5631 RHSExpr->IgnoreParenImpCasts())) { 5632 // If this is a conditional operator, analyze its condition for 5633 // min/max reduction operator. 5634 RHSExpr = ACO->getCond(); 5635 } 5636 if (const auto *BORHS = 5637 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5638 EExpr = BORHS->getRHS(); 5639 BO = BORHS->getOpcode(); 5640 } 5641 } 5642 if (XExpr) { 5643 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5644 auto &&AtomicRedGen = [BO, VD, 5645 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5646 const Expr *EExpr, const Expr *UpExpr) { 5647 LValue X = CGF.EmitLValue(XExpr); 5648 RValue E; 5649 if (EExpr) 5650 E = CGF.EmitAnyExpr(EExpr); 5651 CGF.EmitOMPAtomicSimpleUpdateExpr( 5652 X, E, BO, /*IsXLHSInRHSPart=*/true, 5653 llvm::AtomicOrdering::Monotonic, Loc, 5654 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5655 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5656 PrivateScope.addPrivate( 5657 VD, [&CGF, VD, XRValue, Loc]() { 5658 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5659 CGF.emitOMPSimpleStore( 5660 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5661 VD->getType().getNonReferenceType(), Loc); 5662 return LHSTemp; 5663 }); 5664 (void)PrivateScope.Privatize(); 5665 return CGF.EmitAnyExpr(UpExpr); 5666 }); 5667 }; 5668 if ((*IPriv)->getType()->isArrayType()) { 5669 // Emit atomic reduction for array section. 5670 const auto *RHSVar = 5671 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5672 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5673 AtomicRedGen, XExpr, EExpr, UpExpr); 5674 } else { 5675 // Emit atomic reduction for array subscript or single variable. 5676 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5677 } 5678 } else { 5679 // Emit as a critical region. 5680 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5681 const Expr *, const Expr *) { 5682 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5683 std::string Name = RT.getName({"atomic_reduction"}); 5684 RT.emitCriticalRegion( 5685 CGF, Name, 5686 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5687 Action.Enter(CGF); 5688 emitReductionCombiner(CGF, E); 5689 }, 5690 Loc); 5691 }; 5692 if ((*IPriv)->getType()->isArrayType()) { 5693 const auto *LHSVar = 5694 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5695 const auto *RHSVar = 5696 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5697 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5698 CritRedGen); 5699 } else { 5700 CritRedGen(CGF, nullptr, nullptr, nullptr); 5701 } 5702 } 5703 ++ILHS; 5704 ++IRHS; 5705 ++IPriv; 5706 } 5707 }; 5708 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5709 if (!WithNowait) { 5710 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5711 llvm::Value *EndArgs[] = { 5712 IdentTLoc, // ident_t *<loc> 5713 ThreadId, // i32 <gtid> 5714 Lock // kmp_critical_name *&<lock> 5715 }; 5716 CommonActionTy Action(nullptr, llvm::None, 5717 OMPBuilder.getOrCreateRuntimeFunction( 5718 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5719 EndArgs); 5720 AtomicRCG.setAction(Action); 5721 AtomicRCG(CGF); 5722 } else { 5723 AtomicRCG(CGF); 5724 } 5725 5726 CGF.EmitBranch(DefaultBB); 5727 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5728 } 5729 5730 /// Generates unique name for artificial threadprivate variables. 5731 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5732 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5733 const Expr *Ref) { 5734 SmallString<256> Buffer; 5735 llvm::raw_svector_ostream Out(Buffer); 5736 const clang::DeclRefExpr *DE; 5737 const VarDecl *D = ::getBaseDecl(Ref, DE); 5738 if (!D) 5739 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5740 D = D->getCanonicalDecl(); 5741 std::string Name = CGM.getOpenMPRuntime().getName( 5742 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5743 Out << Prefix << Name << "_" 5744 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5745 return std::string(Out.str()); 5746 } 5747 5748 /// Emits reduction initializer function: 5749 /// \code 5750 /// void @.red_init(void* %arg, void* %orig) { 5751 /// %0 = bitcast void* %arg to <type>* 5752 /// store <type> <init>, <type>* %0 5753 /// ret void 5754 /// } 5755 /// \endcode 5756 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5757 SourceLocation Loc, 5758 ReductionCodeGen &RCG, unsigned N) { 5759 ASTContext &C = CGM.getContext(); 5760 QualType VoidPtrTy = C.VoidPtrTy; 5761 VoidPtrTy.addRestrict(); 5762 FunctionArgList Args; 5763 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5764 ImplicitParamDecl::Other); 5765 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5766 ImplicitParamDecl::Other); 5767 Args.emplace_back(&Param); 5768 Args.emplace_back(&ParamOrig); 5769 const auto &FnInfo = 5770 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5771 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5772 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5773 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5774 Name, &CGM.getModule()); 5775 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5776 Fn->setDoesNotRecurse(); 5777 CodeGenFunction CGF(CGM); 5778 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5779 Address PrivateAddr = CGF.EmitLoadOfPointer( 5780 CGF.GetAddrOfLocalVar(&Param), 5781 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5782 llvm::Value *Size = nullptr; 5783 // If the size of the reduction item is non-constant, load it from global 5784 // threadprivate variable. 5785 if (RCG.getSizes(N).second) { 5786 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5787 CGF, CGM.getContext().getSizeType(), 5788 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5789 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5790 CGM.getContext().getSizeType(), Loc); 5791 } 5792 RCG.emitAggregateType(CGF, N, Size); 5793 LValue OrigLVal; 5794 // If initializer uses initializer from declare reduction construct, emit a 5795 // pointer to the address of the original reduction item (reuired by reduction 5796 // initializer) 5797 if (RCG.usesReductionInitializer(N)) { 5798 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5799 SharedAddr = CGF.EmitLoadOfPointer( 5800 SharedAddr, 5801 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5802 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5803 } else { 5804 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5805 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5806 CGM.getContext().VoidPtrTy); 5807 } 5808 // Emit the initializer: 5809 // %0 = bitcast void* %arg to <type>* 5810 // store <type> <init>, <type>* %0 5811 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5812 [](CodeGenFunction &) { return false; }); 5813 CGF.FinishFunction(); 5814 return Fn; 5815 } 5816 5817 /// Emits reduction combiner function: 5818 /// \code 5819 /// void @.red_comb(void* %arg0, void* %arg1) { 5820 /// %lhs = bitcast void* %arg0 to <type>* 5821 /// %rhs = bitcast void* %arg1 to <type>* 5822 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5823 /// store <type> %2, <type>* %lhs 5824 /// ret void 5825 /// } 5826 /// \endcode 5827 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5828 SourceLocation Loc, 5829 ReductionCodeGen &RCG, unsigned N, 5830 const Expr *ReductionOp, 5831 const Expr *LHS, const Expr *RHS, 5832 const Expr *PrivateRef) { 5833 ASTContext &C = CGM.getContext(); 5834 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5835 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5836 FunctionArgList Args; 5837 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5838 C.VoidPtrTy, ImplicitParamDecl::Other); 5839 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5840 ImplicitParamDecl::Other); 5841 Args.emplace_back(&ParamInOut); 5842 Args.emplace_back(&ParamIn); 5843 const auto &FnInfo = 5844 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5845 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5846 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5847 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5848 Name, &CGM.getModule()); 5849 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5850 Fn->setDoesNotRecurse(); 5851 CodeGenFunction CGF(CGM); 5852 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5853 llvm::Value *Size = nullptr; 5854 // If the size of the reduction item is non-constant, load it from global 5855 // threadprivate variable. 5856 if (RCG.getSizes(N).second) { 5857 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5858 CGF, CGM.getContext().getSizeType(), 5859 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5860 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5861 CGM.getContext().getSizeType(), Loc); 5862 } 5863 RCG.emitAggregateType(CGF, N, Size); 5864 // Remap lhs and rhs variables to the addresses of the function arguments. 5865 // %lhs = bitcast void* %arg0 to <type>* 5866 // %rhs = bitcast void* %arg1 to <type>* 5867 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5868 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5869 // Pull out the pointer to the variable. 5870 Address PtrAddr = CGF.EmitLoadOfPointer( 5871 CGF.GetAddrOfLocalVar(&ParamInOut), 5872 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5873 return CGF.Builder.CreateElementBitCast( 5874 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5875 }); 5876 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5877 // Pull out the pointer to the variable. 5878 Address PtrAddr = CGF.EmitLoadOfPointer( 5879 CGF.GetAddrOfLocalVar(&ParamIn), 5880 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5881 return CGF.Builder.CreateElementBitCast( 5882 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5883 }); 5884 PrivateScope.Privatize(); 5885 // Emit the combiner body: 5886 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5887 // store <type> %2, <type>* %lhs 5888 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5889 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5890 cast<DeclRefExpr>(RHS)); 5891 CGF.FinishFunction(); 5892 return Fn; 5893 } 5894 5895 /// Emits reduction finalizer function: 5896 /// \code 5897 /// void @.red_fini(void* %arg) { 5898 /// %0 = bitcast void* %arg to <type>* 5899 /// <destroy>(<type>* %0) 5900 /// ret void 5901 /// } 5902 /// \endcode 5903 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5904 SourceLocation Loc, 5905 ReductionCodeGen &RCG, unsigned N) { 5906 if (!RCG.needCleanups(N)) 5907 return nullptr; 5908 ASTContext &C = CGM.getContext(); 5909 FunctionArgList Args; 5910 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5911 ImplicitParamDecl::Other); 5912 Args.emplace_back(&Param); 5913 const auto &FnInfo = 5914 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5915 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5916 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5917 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5918 Name, &CGM.getModule()); 5919 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5920 Fn->setDoesNotRecurse(); 5921 CodeGenFunction CGF(CGM); 5922 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5923 Address PrivateAddr = CGF.EmitLoadOfPointer( 5924 CGF.GetAddrOfLocalVar(&Param), 5925 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5926 llvm::Value *Size = nullptr; 5927 // If the size of the reduction item is non-constant, load it from global 5928 // threadprivate variable. 5929 if (RCG.getSizes(N).second) { 5930 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5931 CGF, CGM.getContext().getSizeType(), 5932 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5933 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5934 CGM.getContext().getSizeType(), Loc); 5935 } 5936 RCG.emitAggregateType(CGF, N, Size); 5937 // Emit the finalizer body: 5938 // <destroy>(<type>* %0) 5939 RCG.emitCleanups(CGF, N, PrivateAddr); 5940 CGF.FinishFunction(Loc); 5941 return Fn; 5942 } 5943 5944 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5945 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5946 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5947 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5948 return nullptr; 5949 5950 // Build typedef struct: 5951 // kmp_taskred_input { 5952 // void *reduce_shar; // shared reduction item 5953 // void *reduce_orig; // original reduction item used for initialization 5954 // size_t reduce_size; // size of data item 5955 // void *reduce_init; // data initialization routine 5956 // void *reduce_fini; // data finalization routine 5957 // void *reduce_comb; // data combiner routine 5958 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5959 // } kmp_taskred_input_t; 5960 ASTContext &C = CGM.getContext(); 5961 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5962 RD->startDefinition(); 5963 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5964 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5965 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5966 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5967 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5968 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5969 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5970 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5971 RD->completeDefinition(); 5972 QualType RDType = C.getRecordType(RD); 5973 unsigned Size = Data.ReductionVars.size(); 5974 llvm::APInt ArraySize(/*numBits=*/64, Size); 5975 QualType ArrayRDType = C.getConstantArrayType( 5976 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5977 // kmp_task_red_input_t .rd_input.[Size]; 5978 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5979 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5980 Data.ReductionCopies, Data.ReductionOps); 5981 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5982 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5983 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5984 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5985 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5986 TaskRedInput.getPointer(), Idxs, 5987 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5988 ".rd_input.gep."); 5989 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5990 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5991 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5992 RCG.emitSharedOrigLValue(CGF, Cnt); 5993 llvm::Value *CastedShared = 5994 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 5995 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5996 // ElemLVal.reduce_orig = &Origs[Cnt]; 5997 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5998 llvm::Value *CastedOrig = 5999 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6000 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6001 RCG.emitAggregateType(CGF, Cnt); 6002 llvm::Value *SizeValInChars; 6003 llvm::Value *SizeVal; 6004 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6005 // We use delayed creation/initialization for VLAs and array sections. It is 6006 // required because runtime does not provide the way to pass the sizes of 6007 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6008 // threadprivate global variables are used to store these values and use 6009 // them in the functions. 6010 bool DelayedCreation = !!SizeVal; 6011 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6012 /*isSigned=*/false); 6013 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6014 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6015 // ElemLVal.reduce_init = init; 6016 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6017 llvm::Value *InitAddr = 6018 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6019 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6020 // ElemLVal.reduce_fini = fini; 6021 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6022 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6023 llvm::Value *FiniAddr = Fini 6024 ? CGF.EmitCastToVoidPtr(Fini) 6025 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6026 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6027 // ElemLVal.reduce_comb = comb; 6028 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6029 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6030 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6031 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6032 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6033 // ElemLVal.flags = 0; 6034 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6035 if (DelayedCreation) { 6036 CGF.EmitStoreOfScalar( 6037 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6038 FlagsLVal); 6039 } else 6040 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6041 FlagsLVal.getType()); 6042 } 6043 if (Data.IsReductionWithTaskMod) { 6044 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6045 // is_ws, int num, void *data); 6046 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6047 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6048 CGM.IntTy, /*isSigned=*/true); 6049 llvm::Value *Args[] = { 6050 IdentTLoc, GTid, 6051 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6052 /*isSigned=*/true), 6053 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6054 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6055 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6056 return CGF.EmitRuntimeCall( 6057 OMPBuilder.getOrCreateRuntimeFunction( 6058 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6059 Args); 6060 } 6061 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6062 llvm::Value *Args[] = { 6063 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6064 /*isSigned=*/true), 6065 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6066 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6067 CGM.VoidPtrTy)}; 6068 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6069 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6070 Args); 6071 } 6072 6073 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6074 SourceLocation Loc, 6075 bool IsWorksharingReduction) { 6076 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6077 // is_ws, int num, void *data); 6078 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6079 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6080 CGM.IntTy, /*isSigned=*/true); 6081 llvm::Value *Args[] = {IdentTLoc, GTid, 6082 llvm::ConstantInt::get(CGM.IntTy, 6083 IsWorksharingReduction ? 1 : 0, 6084 /*isSigned=*/true)}; 6085 (void)CGF.EmitRuntimeCall( 6086 OMPBuilder.getOrCreateRuntimeFunction( 6087 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6088 Args); 6089 } 6090 6091 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6092 SourceLocation Loc, 6093 ReductionCodeGen &RCG, 6094 unsigned N) { 6095 auto Sizes = RCG.getSizes(N); 6096 // Emit threadprivate global variable if the type is non-constant 6097 // (Sizes.second = nullptr). 6098 if (Sizes.second) { 6099 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6100 /*isSigned=*/false); 6101 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6102 CGF, CGM.getContext().getSizeType(), 6103 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6104 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6105 } 6106 } 6107 6108 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6109 SourceLocation Loc, 6110 llvm::Value *ReductionsPtr, 6111 LValue SharedLVal) { 6112 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6113 // *d); 6114 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6115 CGM.IntTy, 6116 /*isSigned=*/true), 6117 ReductionsPtr, 6118 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6119 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6120 return Address( 6121 CGF.EmitRuntimeCall( 6122 OMPBuilder.getOrCreateRuntimeFunction( 6123 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6124 Args), 6125 SharedLVal.getAlignment()); 6126 } 6127 6128 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6129 SourceLocation Loc) { 6130 if (!CGF.HaveInsertPoint()) 6131 return; 6132 6133 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6134 OMPBuilder.CreateTaskwait(CGF.Builder); 6135 } else { 6136 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6137 // global_tid); 6138 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6139 // Ignore return result until untied tasks are supported. 6140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6141 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6142 Args); 6143 } 6144 6145 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6146 Region->emitUntiedSwitch(CGF); 6147 } 6148 6149 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6150 OpenMPDirectiveKind InnerKind, 6151 const RegionCodeGenTy &CodeGen, 6152 bool HasCancel) { 6153 if (!CGF.HaveInsertPoint()) 6154 return; 6155 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6156 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6157 } 6158 6159 namespace { 6160 enum RTCancelKind { 6161 CancelNoreq = 0, 6162 CancelParallel = 1, 6163 CancelLoop = 2, 6164 CancelSections = 3, 6165 CancelTaskgroup = 4 6166 }; 6167 } // anonymous namespace 6168 6169 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6170 RTCancelKind CancelKind = CancelNoreq; 6171 if (CancelRegion == OMPD_parallel) 6172 CancelKind = CancelParallel; 6173 else if (CancelRegion == OMPD_for) 6174 CancelKind = CancelLoop; 6175 else if (CancelRegion == OMPD_sections) 6176 CancelKind = CancelSections; 6177 else { 6178 assert(CancelRegion == OMPD_taskgroup); 6179 CancelKind = CancelTaskgroup; 6180 } 6181 return CancelKind; 6182 } 6183 6184 void CGOpenMPRuntime::emitCancellationPointCall( 6185 CodeGenFunction &CGF, SourceLocation Loc, 6186 OpenMPDirectiveKind CancelRegion) { 6187 if (!CGF.HaveInsertPoint()) 6188 return; 6189 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6190 // global_tid, kmp_int32 cncl_kind); 6191 if (auto *OMPRegionInfo = 6192 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6193 // For 'cancellation point taskgroup', the task region info may not have a 6194 // cancel. This may instead happen in another adjacent task. 6195 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6196 llvm::Value *Args[] = { 6197 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6198 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6199 // Ignore return result until untied tasks are supported. 6200 llvm::Value *Result = CGF.EmitRuntimeCall( 6201 OMPBuilder.getOrCreateRuntimeFunction( 6202 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6203 Args); 6204 // if (__kmpc_cancellationpoint()) { 6205 // exit from construct; 6206 // } 6207 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6208 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6209 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6210 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6211 CGF.EmitBlock(ExitBB); 6212 // exit from construct; 6213 CodeGenFunction::JumpDest CancelDest = 6214 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6215 CGF.EmitBranchThroughCleanup(CancelDest); 6216 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6217 } 6218 } 6219 } 6220 6221 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6222 const Expr *IfCond, 6223 OpenMPDirectiveKind CancelRegion) { 6224 if (!CGF.HaveInsertPoint()) 6225 return; 6226 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6227 // kmp_int32 cncl_kind); 6228 auto &M = CGM.getModule(); 6229 if (auto *OMPRegionInfo = 6230 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6231 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6232 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6233 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6234 llvm::Value *Args[] = { 6235 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6236 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6237 // Ignore return result until untied tasks are supported. 6238 llvm::Value *Result = CGF.EmitRuntimeCall( 6239 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6240 // if (__kmpc_cancel()) { 6241 // exit from construct; 6242 // } 6243 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6244 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6245 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6246 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6247 CGF.EmitBlock(ExitBB); 6248 // exit from construct; 6249 CodeGenFunction::JumpDest CancelDest = 6250 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6251 CGF.EmitBranchThroughCleanup(CancelDest); 6252 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6253 }; 6254 if (IfCond) { 6255 emitIfClause(CGF, IfCond, ThenGen, 6256 [](CodeGenFunction &, PrePostActionTy &) {}); 6257 } else { 6258 RegionCodeGenTy ThenRCG(ThenGen); 6259 ThenRCG(CGF); 6260 } 6261 } 6262 } 6263 6264 namespace { 6265 /// Cleanup action for uses_allocators support. 6266 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6267 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6268 6269 public: 6270 OMPUsesAllocatorsActionTy( 6271 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6272 : Allocators(Allocators) {} 6273 void Enter(CodeGenFunction &CGF) override { 6274 if (!CGF.HaveInsertPoint()) 6275 return; 6276 for (const auto &AllocatorData : Allocators) { 6277 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6278 CGF, AllocatorData.first, AllocatorData.second); 6279 } 6280 } 6281 void Exit(CodeGenFunction &CGF) override { 6282 if (!CGF.HaveInsertPoint()) 6283 return; 6284 for (const auto &AllocatorData : Allocators) { 6285 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6286 AllocatorData.first); 6287 } 6288 } 6289 }; 6290 } // namespace 6291 6292 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6293 const OMPExecutableDirective &D, StringRef ParentName, 6294 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6295 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6296 assert(!ParentName.empty() && "Invalid target region parent name!"); 6297 HasEmittedTargetRegion = true; 6298 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6299 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6300 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6301 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6302 if (!D.AllocatorTraits) 6303 continue; 6304 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6305 } 6306 } 6307 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6308 CodeGen.setAction(UsesAllocatorAction); 6309 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6310 IsOffloadEntry, CodeGen); 6311 } 6312 6313 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6314 const Expr *Allocator, 6315 const Expr *AllocatorTraits) { 6316 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6317 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6318 // Use default memspace handle. 6319 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6320 llvm::Value *NumTraits = llvm::ConstantInt::get( 6321 CGF.IntTy, cast<ConstantArrayType>( 6322 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6323 ->getSize() 6324 .getLimitedValue()); 6325 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6326 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6327 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6328 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6329 AllocatorTraitsLVal.getBaseInfo(), 6330 AllocatorTraitsLVal.getTBAAInfo()); 6331 llvm::Value *Traits = 6332 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6333 6334 llvm::Value *AllocatorVal = 6335 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6336 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6337 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6338 // Store to allocator. 6339 CGF.EmitVarDecl(*cast<VarDecl>( 6340 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6341 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6342 AllocatorVal = 6343 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6344 Allocator->getType(), Allocator->getExprLoc()); 6345 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6346 } 6347 6348 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6349 const Expr *Allocator) { 6350 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6351 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6352 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6353 llvm::Value *AllocatorVal = 6354 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6355 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6356 CGF.getContext().VoidPtrTy, 6357 Allocator->getExprLoc()); 6358 (void)CGF.EmitRuntimeCall( 6359 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6360 OMPRTL___kmpc_destroy_allocator), 6361 {ThreadId, AllocatorVal}); 6362 } 6363 6364 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6365 const OMPExecutableDirective &D, StringRef ParentName, 6366 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6367 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6368 // Create a unique name for the entry function using the source location 6369 // information of the current target region. The name will be something like: 6370 // 6371 // __omp_offloading_DD_FFFF_PP_lBB 6372 // 6373 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6374 // mangled name of the function that encloses the target region and BB is the 6375 // line number of the target region. 6376 6377 unsigned DeviceID; 6378 unsigned FileID; 6379 unsigned Line; 6380 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6381 Line); 6382 SmallString<64> EntryFnName; 6383 { 6384 llvm::raw_svector_ostream OS(EntryFnName); 6385 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6386 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6387 } 6388 6389 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6390 6391 CodeGenFunction CGF(CGM, true); 6392 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6393 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6394 6395 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6396 6397 // If this target outline function is not an offload entry, we don't need to 6398 // register it. 6399 if (!IsOffloadEntry) 6400 return; 6401 6402 // The target region ID is used by the runtime library to identify the current 6403 // target region, so it only has to be unique and not necessarily point to 6404 // anything. It could be the pointer to the outlined function that implements 6405 // the target region, but we aren't using that so that the compiler doesn't 6406 // need to keep that, and could therefore inline the host function if proven 6407 // worthwhile during optimization. In the other hand, if emitting code for the 6408 // device, the ID has to be the function address so that it can retrieved from 6409 // the offloading entry and launched by the runtime library. We also mark the 6410 // outlined function to have external linkage in case we are emitting code for 6411 // the device, because these functions will be entry points to the device. 6412 6413 if (CGM.getLangOpts().OpenMPIsDevice) { 6414 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6415 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6416 OutlinedFn->setDSOLocal(false); 6417 } else { 6418 std::string Name = getName({EntryFnName, "region_id"}); 6419 OutlinedFnID = new llvm::GlobalVariable( 6420 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6421 llvm::GlobalValue::WeakAnyLinkage, 6422 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6423 } 6424 6425 // Register the information for the entry associated with this target region. 6426 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6427 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6428 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6429 } 6430 6431 /// Checks if the expression is constant or does not have non-trivial function 6432 /// calls. 6433 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6434 // We can skip constant expressions. 6435 // We can skip expressions with trivial calls or simple expressions. 6436 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6437 !E->hasNonTrivialCall(Ctx)) && 6438 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6439 } 6440 6441 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6442 const Stmt *Body) { 6443 const Stmt *Child = Body->IgnoreContainers(); 6444 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6445 Child = nullptr; 6446 for (const Stmt *S : C->body()) { 6447 if (const auto *E = dyn_cast<Expr>(S)) { 6448 if (isTrivial(Ctx, E)) 6449 continue; 6450 } 6451 // Some of the statements can be ignored. 6452 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6453 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6454 continue; 6455 // Analyze declarations. 6456 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6457 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6458 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6459 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6460 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6461 isa<UsingDirectiveDecl>(D) || 6462 isa<OMPDeclareReductionDecl>(D) || 6463 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6464 return true; 6465 const auto *VD = dyn_cast<VarDecl>(D); 6466 if (!VD) 6467 return false; 6468 return VD->isConstexpr() || 6469 ((VD->getType().isTrivialType(Ctx) || 6470 VD->getType()->isReferenceType()) && 6471 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6472 })) 6473 continue; 6474 } 6475 // Found multiple children - cannot get the one child only. 6476 if (Child) 6477 return nullptr; 6478 Child = S; 6479 } 6480 if (Child) 6481 Child = Child->IgnoreContainers(); 6482 } 6483 return Child; 6484 } 6485 6486 /// Emit the number of teams for a target directive. Inspect the num_teams 6487 /// clause associated with a teams construct combined or closely nested 6488 /// with the target directive. 6489 /// 6490 /// Emit a team of size one for directives such as 'target parallel' that 6491 /// have no associated teams construct. 6492 /// 6493 /// Otherwise, return nullptr. 6494 static llvm::Value * 6495 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6496 const OMPExecutableDirective &D) { 6497 assert(!CGF.getLangOpts().OpenMPIsDevice && 6498 "Clauses associated with the teams directive expected to be emitted " 6499 "only for the host!"); 6500 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6501 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6502 "Expected target-based executable directive."); 6503 CGBuilderTy &Bld = CGF.Builder; 6504 switch (DirectiveKind) { 6505 case OMPD_target: { 6506 const auto *CS = D.getInnermostCapturedStmt(); 6507 const auto *Body = 6508 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6509 const Stmt *ChildStmt = 6510 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6511 if (const auto *NestedDir = 6512 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6513 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6514 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6515 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6516 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6517 const Expr *NumTeams = 6518 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6519 llvm::Value *NumTeamsVal = 6520 CGF.EmitScalarExpr(NumTeams, 6521 /*IgnoreResultAssign*/ true); 6522 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6523 /*isSigned=*/true); 6524 } 6525 return Bld.getInt32(0); 6526 } 6527 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6528 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6529 return Bld.getInt32(1); 6530 return Bld.getInt32(0); 6531 } 6532 return nullptr; 6533 } 6534 case OMPD_target_teams: 6535 case OMPD_target_teams_distribute: 6536 case OMPD_target_teams_distribute_simd: 6537 case OMPD_target_teams_distribute_parallel_for: 6538 case OMPD_target_teams_distribute_parallel_for_simd: { 6539 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6540 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6541 const Expr *NumTeams = 6542 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6543 llvm::Value *NumTeamsVal = 6544 CGF.EmitScalarExpr(NumTeams, 6545 /*IgnoreResultAssign*/ true); 6546 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6547 /*isSigned=*/true); 6548 } 6549 return Bld.getInt32(0); 6550 } 6551 case OMPD_target_parallel: 6552 case OMPD_target_parallel_for: 6553 case OMPD_target_parallel_for_simd: 6554 case OMPD_target_simd: 6555 return Bld.getInt32(1); 6556 case OMPD_parallel: 6557 case OMPD_for: 6558 case OMPD_parallel_for: 6559 case OMPD_parallel_master: 6560 case OMPD_parallel_sections: 6561 case OMPD_for_simd: 6562 case OMPD_parallel_for_simd: 6563 case OMPD_cancel: 6564 case OMPD_cancellation_point: 6565 case OMPD_ordered: 6566 case OMPD_threadprivate: 6567 case OMPD_allocate: 6568 case OMPD_task: 6569 case OMPD_simd: 6570 case OMPD_sections: 6571 case OMPD_section: 6572 case OMPD_single: 6573 case OMPD_master: 6574 case OMPD_critical: 6575 case OMPD_taskyield: 6576 case OMPD_barrier: 6577 case OMPD_taskwait: 6578 case OMPD_taskgroup: 6579 case OMPD_atomic: 6580 case OMPD_flush: 6581 case OMPD_depobj: 6582 case OMPD_scan: 6583 case OMPD_teams: 6584 case OMPD_target_data: 6585 case OMPD_target_exit_data: 6586 case OMPD_target_enter_data: 6587 case OMPD_distribute: 6588 case OMPD_distribute_simd: 6589 case OMPD_distribute_parallel_for: 6590 case OMPD_distribute_parallel_for_simd: 6591 case OMPD_teams_distribute: 6592 case OMPD_teams_distribute_simd: 6593 case OMPD_teams_distribute_parallel_for: 6594 case OMPD_teams_distribute_parallel_for_simd: 6595 case OMPD_target_update: 6596 case OMPD_declare_simd: 6597 case OMPD_declare_variant: 6598 case OMPD_begin_declare_variant: 6599 case OMPD_end_declare_variant: 6600 case OMPD_declare_target: 6601 case OMPD_end_declare_target: 6602 case OMPD_declare_reduction: 6603 case OMPD_declare_mapper: 6604 case OMPD_taskloop: 6605 case OMPD_taskloop_simd: 6606 case OMPD_master_taskloop: 6607 case OMPD_master_taskloop_simd: 6608 case OMPD_parallel_master_taskloop: 6609 case OMPD_parallel_master_taskloop_simd: 6610 case OMPD_requires: 6611 case OMPD_unknown: 6612 break; 6613 default: 6614 break; 6615 } 6616 llvm_unreachable("Unexpected directive kind."); 6617 } 6618 6619 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6620 llvm::Value *DefaultThreadLimitVal) { 6621 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6622 CGF.getContext(), CS->getCapturedStmt()); 6623 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6624 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6625 llvm::Value *NumThreads = nullptr; 6626 llvm::Value *CondVal = nullptr; 6627 // Handle if clause. If if clause present, the number of threads is 6628 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6629 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6630 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6631 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6632 const OMPIfClause *IfClause = nullptr; 6633 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6634 if (C->getNameModifier() == OMPD_unknown || 6635 C->getNameModifier() == OMPD_parallel) { 6636 IfClause = C; 6637 break; 6638 } 6639 } 6640 if (IfClause) { 6641 const Expr *Cond = IfClause->getCondition(); 6642 bool Result; 6643 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6644 if (!Result) 6645 return CGF.Builder.getInt32(1); 6646 } else { 6647 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6648 if (const auto *PreInit = 6649 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6650 for (const auto *I : PreInit->decls()) { 6651 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6652 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6653 } else { 6654 CodeGenFunction::AutoVarEmission Emission = 6655 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6656 CGF.EmitAutoVarCleanups(Emission); 6657 } 6658 } 6659 } 6660 CondVal = CGF.EvaluateExprAsBool(Cond); 6661 } 6662 } 6663 } 6664 // Check the value of num_threads clause iff if clause was not specified 6665 // or is not evaluated to false. 6666 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6667 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6668 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6669 const auto *NumThreadsClause = 6670 Dir->getSingleClause<OMPNumThreadsClause>(); 6671 CodeGenFunction::LexicalScope Scope( 6672 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6673 if (const auto *PreInit = 6674 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6675 for (const auto *I : PreInit->decls()) { 6676 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6677 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6678 } else { 6679 CodeGenFunction::AutoVarEmission Emission = 6680 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6681 CGF.EmitAutoVarCleanups(Emission); 6682 } 6683 } 6684 } 6685 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6686 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6687 /*isSigned=*/false); 6688 if (DefaultThreadLimitVal) 6689 NumThreads = CGF.Builder.CreateSelect( 6690 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6691 DefaultThreadLimitVal, NumThreads); 6692 } else { 6693 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6694 : CGF.Builder.getInt32(0); 6695 } 6696 // Process condition of the if clause. 6697 if (CondVal) { 6698 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6699 CGF.Builder.getInt32(1)); 6700 } 6701 return NumThreads; 6702 } 6703 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6704 return CGF.Builder.getInt32(1); 6705 return DefaultThreadLimitVal; 6706 } 6707 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6708 : CGF.Builder.getInt32(0); 6709 } 6710 6711 /// Emit the number of threads for a target directive. Inspect the 6712 /// thread_limit clause associated with a teams construct combined or closely 6713 /// nested with the target directive. 6714 /// 6715 /// Emit the num_threads clause for directives such as 'target parallel' that 6716 /// have no associated teams construct. 6717 /// 6718 /// Otherwise, return nullptr. 6719 static llvm::Value * 6720 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6721 const OMPExecutableDirective &D) { 6722 assert(!CGF.getLangOpts().OpenMPIsDevice && 6723 "Clauses associated with the teams directive expected to be emitted " 6724 "only for the host!"); 6725 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6726 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6727 "Expected target-based executable directive."); 6728 CGBuilderTy &Bld = CGF.Builder; 6729 llvm::Value *ThreadLimitVal = nullptr; 6730 llvm::Value *NumThreadsVal = nullptr; 6731 switch (DirectiveKind) { 6732 case OMPD_target: { 6733 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6734 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6735 return NumThreads; 6736 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6737 CGF.getContext(), CS->getCapturedStmt()); 6738 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6739 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6740 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6741 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6742 const auto *ThreadLimitClause = 6743 Dir->getSingleClause<OMPThreadLimitClause>(); 6744 CodeGenFunction::LexicalScope Scope( 6745 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6746 if (const auto *PreInit = 6747 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6748 for (const auto *I : PreInit->decls()) { 6749 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6750 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6751 } else { 6752 CodeGenFunction::AutoVarEmission Emission = 6753 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6754 CGF.EmitAutoVarCleanups(Emission); 6755 } 6756 } 6757 } 6758 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6759 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6760 ThreadLimitVal = 6761 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6762 } 6763 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6764 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6765 CS = Dir->getInnermostCapturedStmt(); 6766 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6767 CGF.getContext(), CS->getCapturedStmt()); 6768 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6769 } 6770 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6771 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6772 CS = Dir->getInnermostCapturedStmt(); 6773 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6774 return NumThreads; 6775 } 6776 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6777 return Bld.getInt32(1); 6778 } 6779 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6780 } 6781 case OMPD_target_teams: { 6782 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6783 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6784 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6785 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6786 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6787 ThreadLimitVal = 6788 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6789 } 6790 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6791 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6792 return NumThreads; 6793 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6794 CGF.getContext(), CS->getCapturedStmt()); 6795 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6796 if (Dir->getDirectiveKind() == OMPD_distribute) { 6797 CS = Dir->getInnermostCapturedStmt(); 6798 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6799 return NumThreads; 6800 } 6801 } 6802 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6803 } 6804 case OMPD_target_teams_distribute: 6805 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6806 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6807 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6808 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6809 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6810 ThreadLimitVal = 6811 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6812 } 6813 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6814 case OMPD_target_parallel: 6815 case OMPD_target_parallel_for: 6816 case OMPD_target_parallel_for_simd: 6817 case OMPD_target_teams_distribute_parallel_for: 6818 case OMPD_target_teams_distribute_parallel_for_simd: { 6819 llvm::Value *CondVal = nullptr; 6820 // Handle if clause. If if clause present, the number of threads is 6821 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6822 if (D.hasClausesOfKind<OMPIfClause>()) { 6823 const OMPIfClause *IfClause = nullptr; 6824 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6825 if (C->getNameModifier() == OMPD_unknown || 6826 C->getNameModifier() == OMPD_parallel) { 6827 IfClause = C; 6828 break; 6829 } 6830 } 6831 if (IfClause) { 6832 const Expr *Cond = IfClause->getCondition(); 6833 bool Result; 6834 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6835 if (!Result) 6836 return Bld.getInt32(1); 6837 } else { 6838 CodeGenFunction::RunCleanupsScope Scope(CGF); 6839 CondVal = CGF.EvaluateExprAsBool(Cond); 6840 } 6841 } 6842 } 6843 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6844 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6845 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6846 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6847 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6848 ThreadLimitVal = 6849 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6850 } 6851 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6852 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6853 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6854 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6855 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6856 NumThreadsVal = 6857 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6858 ThreadLimitVal = ThreadLimitVal 6859 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6860 ThreadLimitVal), 6861 NumThreadsVal, ThreadLimitVal) 6862 : NumThreadsVal; 6863 } 6864 if (!ThreadLimitVal) 6865 ThreadLimitVal = Bld.getInt32(0); 6866 if (CondVal) 6867 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6868 return ThreadLimitVal; 6869 } 6870 case OMPD_target_teams_distribute_simd: 6871 case OMPD_target_simd: 6872 return Bld.getInt32(1); 6873 case OMPD_parallel: 6874 case OMPD_for: 6875 case OMPD_parallel_for: 6876 case OMPD_parallel_master: 6877 case OMPD_parallel_sections: 6878 case OMPD_for_simd: 6879 case OMPD_parallel_for_simd: 6880 case OMPD_cancel: 6881 case OMPD_cancellation_point: 6882 case OMPD_ordered: 6883 case OMPD_threadprivate: 6884 case OMPD_allocate: 6885 case OMPD_task: 6886 case OMPD_simd: 6887 case OMPD_sections: 6888 case OMPD_section: 6889 case OMPD_single: 6890 case OMPD_master: 6891 case OMPD_critical: 6892 case OMPD_taskyield: 6893 case OMPD_barrier: 6894 case OMPD_taskwait: 6895 case OMPD_taskgroup: 6896 case OMPD_atomic: 6897 case OMPD_flush: 6898 case OMPD_depobj: 6899 case OMPD_scan: 6900 case OMPD_teams: 6901 case OMPD_target_data: 6902 case OMPD_target_exit_data: 6903 case OMPD_target_enter_data: 6904 case OMPD_distribute: 6905 case OMPD_distribute_simd: 6906 case OMPD_distribute_parallel_for: 6907 case OMPD_distribute_parallel_for_simd: 6908 case OMPD_teams_distribute: 6909 case OMPD_teams_distribute_simd: 6910 case OMPD_teams_distribute_parallel_for: 6911 case OMPD_teams_distribute_parallel_for_simd: 6912 case OMPD_target_update: 6913 case OMPD_declare_simd: 6914 case OMPD_declare_variant: 6915 case OMPD_begin_declare_variant: 6916 case OMPD_end_declare_variant: 6917 case OMPD_declare_target: 6918 case OMPD_end_declare_target: 6919 case OMPD_declare_reduction: 6920 case OMPD_declare_mapper: 6921 case OMPD_taskloop: 6922 case OMPD_taskloop_simd: 6923 case OMPD_master_taskloop: 6924 case OMPD_master_taskloop_simd: 6925 case OMPD_parallel_master_taskloop: 6926 case OMPD_parallel_master_taskloop_simd: 6927 case OMPD_requires: 6928 case OMPD_unknown: 6929 break; 6930 default: 6931 break; 6932 } 6933 llvm_unreachable("Unsupported directive kind."); 6934 } 6935 6936 namespace { 6937 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6938 6939 // Utility to handle information from clauses associated with a given 6940 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6941 // It provides a convenient interface to obtain the information and generate 6942 // code for that information. 6943 class MappableExprsHandler { 6944 public: 6945 /// Values for bit flags used to specify the mapping type for 6946 /// offloading. 6947 enum OpenMPOffloadMappingFlags : uint64_t { 6948 /// No flags 6949 OMP_MAP_NONE = 0x0, 6950 /// Allocate memory on the device and move data from host to device. 6951 OMP_MAP_TO = 0x01, 6952 /// Allocate memory on the device and move data from device to host. 6953 OMP_MAP_FROM = 0x02, 6954 /// Always perform the requested mapping action on the element, even 6955 /// if it was already mapped before. 6956 OMP_MAP_ALWAYS = 0x04, 6957 /// Delete the element from the device environment, ignoring the 6958 /// current reference count associated with the element. 6959 OMP_MAP_DELETE = 0x08, 6960 /// The element being mapped is a pointer-pointee pair; both the 6961 /// pointer and the pointee should be mapped. 6962 OMP_MAP_PTR_AND_OBJ = 0x10, 6963 /// This flags signals that the base address of an entry should be 6964 /// passed to the target kernel as an argument. 6965 OMP_MAP_TARGET_PARAM = 0x20, 6966 /// Signal that the runtime library has to return the device pointer 6967 /// in the current position for the data being mapped. Used when we have the 6968 /// use_device_ptr or use_device_addr clause. 6969 OMP_MAP_RETURN_PARAM = 0x40, 6970 /// This flag signals that the reference being passed is a pointer to 6971 /// private data. 6972 OMP_MAP_PRIVATE = 0x80, 6973 /// Pass the element to the device by value. 6974 OMP_MAP_LITERAL = 0x100, 6975 /// Implicit map 6976 OMP_MAP_IMPLICIT = 0x200, 6977 /// Close is a hint to the runtime to allocate memory close to 6978 /// the target device. 6979 OMP_MAP_CLOSE = 0x400, 6980 /// 0x800 is reserved for compatibility with XLC. 6981 /// Produce a runtime error if the data is not already allocated. 6982 OMP_MAP_PRESENT = 0x1000, 6983 /// The 16 MSBs of the flags indicate whether the entry is member of some 6984 /// struct/class. 6985 OMP_MAP_MEMBER_OF = 0xffff000000000000, 6986 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 6987 }; 6988 6989 /// Get the offset of the OMP_MAP_MEMBER_OF field. 6990 static unsigned getFlagMemberOffset() { 6991 unsigned Offset = 0; 6992 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 6993 Remain = Remain >> 1) 6994 Offset++; 6995 return Offset; 6996 } 6997 6998 /// Class that associates information with a base pointer to be passed to the 6999 /// runtime library. 7000 class BasePointerInfo { 7001 /// The base pointer. 7002 llvm::Value *Ptr = nullptr; 7003 /// The base declaration that refers to this device pointer, or null if 7004 /// there is none. 7005 const ValueDecl *DevPtrDecl = nullptr; 7006 7007 public: 7008 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7009 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7010 llvm::Value *operator*() const { return Ptr; } 7011 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7012 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7013 }; 7014 7015 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7016 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7017 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7018 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7019 7020 /// This structure contains combined information generated for mappable 7021 /// clauses, including base pointers, pointers, sizes, map types, and 7022 /// user-defined mappers. 7023 struct MapCombinedInfoTy { 7024 MapBaseValuesArrayTy BasePointers; 7025 MapValuesArrayTy Pointers; 7026 MapValuesArrayTy Sizes; 7027 MapFlagsArrayTy Types; 7028 MapMappersArrayTy Mappers; 7029 7030 /// Append arrays in \a CurInfo. 7031 void append(MapCombinedInfoTy &CurInfo) { 7032 BasePointers.append(CurInfo.BasePointers.begin(), 7033 CurInfo.BasePointers.end()); 7034 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7035 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7036 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7037 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7038 } 7039 }; 7040 7041 /// Map between a struct and the its lowest & highest elements which have been 7042 /// mapped. 7043 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7044 /// HE(FieldIndex, Pointer)} 7045 struct StructRangeInfoTy { 7046 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7047 0, Address::invalid()}; 7048 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7049 0, Address::invalid()}; 7050 Address Base = Address::invalid(); 7051 }; 7052 7053 private: 7054 /// Kind that defines how a device pointer has to be returned. 7055 struct MapInfo { 7056 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7057 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7058 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7059 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7060 bool ReturnDevicePointer = false; 7061 bool IsImplicit = false; 7062 const ValueDecl *Mapper = nullptr; 7063 bool ForDeviceAddr = false; 7064 7065 MapInfo() = default; 7066 MapInfo( 7067 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7068 OpenMPMapClauseKind MapType, 7069 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7070 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7071 bool ReturnDevicePointer, bool IsImplicit, 7072 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false) 7073 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7074 MotionModifiers(MotionModifiers), 7075 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7076 Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {} 7077 }; 7078 7079 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7080 /// member and there is no map information about it, then emission of that 7081 /// entry is deferred until the whole struct has been processed. 7082 struct DeferredDevicePtrEntryTy { 7083 const Expr *IE = nullptr; 7084 const ValueDecl *VD = nullptr; 7085 bool ForDeviceAddr = false; 7086 7087 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7088 bool ForDeviceAddr) 7089 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7090 }; 7091 7092 /// The target directive from where the mappable clauses were extracted. It 7093 /// is either a executable directive or a user-defined mapper directive. 7094 llvm::PointerUnion<const OMPExecutableDirective *, 7095 const OMPDeclareMapperDecl *> 7096 CurDir; 7097 7098 /// Function the directive is being generated for. 7099 CodeGenFunction &CGF; 7100 7101 /// Set of all first private variables in the current directive. 7102 /// bool data is set to true if the variable is implicitly marked as 7103 /// firstprivate, false otherwise. 7104 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7105 7106 /// Map between device pointer declarations and their expression components. 7107 /// The key value for declarations in 'this' is null. 7108 llvm::DenseMap< 7109 const ValueDecl *, 7110 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7111 DevPointersMap; 7112 7113 llvm::Value *getExprTypeSize(const Expr *E) const { 7114 QualType ExprTy = E->getType().getCanonicalType(); 7115 7116 // Calculate the size for array shaping expression. 7117 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7118 llvm::Value *Size = 7119 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7120 for (const Expr *SE : OAE->getDimensions()) { 7121 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7122 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7123 CGF.getContext().getSizeType(), 7124 SE->getExprLoc()); 7125 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7126 } 7127 return Size; 7128 } 7129 7130 // Reference types are ignored for mapping purposes. 7131 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7132 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7133 7134 // Given that an array section is considered a built-in type, we need to 7135 // do the calculation based on the length of the section instead of relying 7136 // on CGF.getTypeSize(E->getType()). 7137 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7138 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7139 OAE->getBase()->IgnoreParenImpCasts()) 7140 .getCanonicalType(); 7141 7142 // If there is no length associated with the expression and lower bound is 7143 // not specified too, that means we are using the whole length of the 7144 // base. 7145 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7146 !OAE->getLowerBound()) 7147 return CGF.getTypeSize(BaseTy); 7148 7149 llvm::Value *ElemSize; 7150 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7151 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7152 } else { 7153 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7154 assert(ATy && "Expecting array type if not a pointer type."); 7155 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7156 } 7157 7158 // If we don't have a length at this point, that is because we have an 7159 // array section with a single element. 7160 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7161 return ElemSize; 7162 7163 if (const Expr *LenExpr = OAE->getLength()) { 7164 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7165 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7166 CGF.getContext().getSizeType(), 7167 LenExpr->getExprLoc()); 7168 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7169 } 7170 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7171 OAE->getLowerBound() && "expected array_section[lb:]."); 7172 // Size = sizetype - lb * elemtype; 7173 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7174 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7175 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7176 CGF.getContext().getSizeType(), 7177 OAE->getLowerBound()->getExprLoc()); 7178 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7179 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7180 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7181 LengthVal = CGF.Builder.CreateSelect( 7182 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7183 return LengthVal; 7184 } 7185 return CGF.getTypeSize(ExprTy); 7186 } 7187 7188 /// Return the corresponding bits for a given map clause modifier. Add 7189 /// a flag marking the map as a pointer if requested. Add a flag marking the 7190 /// map as the first one of a series of maps that relate to the same map 7191 /// expression. 7192 OpenMPOffloadMappingFlags getMapTypeBits( 7193 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7194 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7195 bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7196 OpenMPOffloadMappingFlags Bits = 7197 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7198 switch (MapType) { 7199 case OMPC_MAP_alloc: 7200 case OMPC_MAP_release: 7201 // alloc and release is the default behavior in the runtime library, i.e. 7202 // if we don't pass any bits alloc/release that is what the runtime is 7203 // going to do. Therefore, we don't need to signal anything for these two 7204 // type modifiers. 7205 break; 7206 case OMPC_MAP_to: 7207 Bits |= OMP_MAP_TO; 7208 break; 7209 case OMPC_MAP_from: 7210 Bits |= OMP_MAP_FROM; 7211 break; 7212 case OMPC_MAP_tofrom: 7213 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7214 break; 7215 case OMPC_MAP_delete: 7216 Bits |= OMP_MAP_DELETE; 7217 break; 7218 case OMPC_MAP_unknown: 7219 llvm_unreachable("Unexpected map type!"); 7220 } 7221 if (AddPtrFlag) 7222 Bits |= OMP_MAP_PTR_AND_OBJ; 7223 if (AddIsTargetParamFlag) 7224 Bits |= OMP_MAP_TARGET_PARAM; 7225 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7226 != MapModifiers.end()) 7227 Bits |= OMP_MAP_ALWAYS; 7228 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7229 != MapModifiers.end()) 7230 Bits |= OMP_MAP_CLOSE; 7231 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7232 != MapModifiers.end()) 7233 Bits |= OMP_MAP_PRESENT; 7234 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7235 != MotionModifiers.end()) 7236 Bits |= OMP_MAP_PRESENT; 7237 return Bits; 7238 } 7239 7240 /// Return true if the provided expression is a final array section. A 7241 /// final array section, is one whose length can't be proved to be one. 7242 bool isFinalArraySectionExpression(const Expr *E) const { 7243 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7244 7245 // It is not an array section and therefore not a unity-size one. 7246 if (!OASE) 7247 return false; 7248 7249 // An array section with no colon always refer to a single element. 7250 if (OASE->getColonLocFirst().isInvalid()) 7251 return false; 7252 7253 const Expr *Length = OASE->getLength(); 7254 7255 // If we don't have a length we have to check if the array has size 1 7256 // for this dimension. Also, we should always expect a length if the 7257 // base type is pointer. 7258 if (!Length) { 7259 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7260 OASE->getBase()->IgnoreParenImpCasts()) 7261 .getCanonicalType(); 7262 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7263 return ATy->getSize().getSExtValue() != 1; 7264 // If we don't have a constant dimension length, we have to consider 7265 // the current section as having any size, so it is not necessarily 7266 // unitary. If it happen to be unity size, that's user fault. 7267 return true; 7268 } 7269 7270 // Check if the length evaluates to 1. 7271 Expr::EvalResult Result; 7272 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7273 return true; // Can have more that size 1. 7274 7275 llvm::APSInt ConstLength = Result.Val.getInt(); 7276 return ConstLength.getSExtValue() != 1; 7277 } 7278 7279 /// Generate the base pointers, section pointers, sizes, map type bits, and 7280 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7281 /// map type, map or motion modifiers, and expression components. 7282 /// \a IsFirstComponent should be set to true if the provided set of 7283 /// components is the first associated with a capture. 7284 void generateInfoForComponentList( 7285 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7286 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7287 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7288 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7289 bool IsFirstComponentList, bool IsImplicit, 7290 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7291 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7292 OverlappedElements = llvm::None) const { 7293 // The following summarizes what has to be generated for each map and the 7294 // types below. The generated information is expressed in this order: 7295 // base pointer, section pointer, size, flags 7296 // (to add to the ones that come from the map type and modifier). 7297 // 7298 // double d; 7299 // int i[100]; 7300 // float *p; 7301 // 7302 // struct S1 { 7303 // int i; 7304 // float f[50]; 7305 // } 7306 // struct S2 { 7307 // int i; 7308 // float f[50]; 7309 // S1 s; 7310 // double *p; 7311 // struct S2 *ps; 7312 // } 7313 // S2 s; 7314 // S2 *ps; 7315 // 7316 // map(d) 7317 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7318 // 7319 // map(i) 7320 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7321 // 7322 // map(i[1:23]) 7323 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7324 // 7325 // map(p) 7326 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7327 // 7328 // map(p[1:24]) 7329 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7330 // in unified shared memory mode or for local pointers 7331 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7332 // 7333 // map(s) 7334 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7335 // 7336 // map(s.i) 7337 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7338 // 7339 // map(s.s.f) 7340 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7341 // 7342 // map(s.p) 7343 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7344 // 7345 // map(to: s.p[:22]) 7346 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7347 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7348 // &(s.p), &(s.p[0]), 22*sizeof(double), 7349 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7350 // (*) alloc space for struct members, only this is a target parameter 7351 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7352 // optimizes this entry out, same in the examples below) 7353 // (***) map the pointee (map: to) 7354 // 7355 // map(s.ps) 7356 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7357 // 7358 // map(from: s.ps->s.i) 7359 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7360 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7361 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7362 // 7363 // map(to: s.ps->ps) 7364 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7365 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7366 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7367 // 7368 // map(s.ps->ps->ps) 7369 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7370 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7371 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7372 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7373 // 7374 // map(to: s.ps->ps->s.f[:22]) 7375 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7376 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7377 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7378 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7379 // 7380 // map(ps) 7381 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7382 // 7383 // map(ps->i) 7384 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7385 // 7386 // map(ps->s.f) 7387 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7388 // 7389 // map(from: ps->p) 7390 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7391 // 7392 // map(to: ps->p[:22]) 7393 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7394 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7395 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7396 // 7397 // map(ps->ps) 7398 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7399 // 7400 // map(from: ps->ps->s.i) 7401 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7402 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7403 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7404 // 7405 // map(from: ps->ps->ps) 7406 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7407 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7408 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7409 // 7410 // map(ps->ps->ps->ps) 7411 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7412 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7413 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7414 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7415 // 7416 // map(to: ps->ps->ps->s.f[:22]) 7417 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7418 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7419 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7420 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7421 // 7422 // map(to: s.f[:22]) map(from: s.p[:33]) 7423 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7424 // sizeof(double*) (**), TARGET_PARAM 7425 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7426 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7427 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7428 // (*) allocate contiguous space needed to fit all mapped members even if 7429 // we allocate space for members not mapped (in this example, 7430 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7431 // them as well because they fall between &s.f[0] and &s.p) 7432 // 7433 // map(from: s.f[:22]) map(to: ps->p[:33]) 7434 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7435 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7436 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7437 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7438 // (*) the struct this entry pertains to is the 2nd element in the list of 7439 // arguments, hence MEMBER_OF(2) 7440 // 7441 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7442 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7443 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7444 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7445 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7446 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7447 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7448 // (*) the struct this entry pertains to is the 4th element in the list 7449 // of arguments, hence MEMBER_OF(4) 7450 7451 // Track if the map information being generated is the first for a capture. 7452 bool IsCaptureFirstInfo = IsFirstComponentList; 7453 // When the variable is on a declare target link or in a to clause with 7454 // unified memory, a reference is needed to hold the host/device address 7455 // of the variable. 7456 bool RequiresReference = false; 7457 7458 // Scan the components from the base to the complete expression. 7459 auto CI = Components.rbegin(); 7460 auto CE = Components.rend(); 7461 auto I = CI; 7462 7463 // Track if the map information being generated is the first for a list of 7464 // components. 7465 bool IsExpressionFirstInfo = true; 7466 bool FirstPointerInComplexData = false; 7467 Address BP = Address::invalid(); 7468 const Expr *AssocExpr = I->getAssociatedExpression(); 7469 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7470 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7471 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7472 7473 if (isa<MemberExpr>(AssocExpr)) { 7474 // The base is the 'this' pointer. The content of the pointer is going 7475 // to be the base of the field being mapped. 7476 BP = CGF.LoadCXXThisAddress(); 7477 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7478 (OASE && 7479 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7480 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7481 } else if (OAShE && 7482 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7483 BP = Address( 7484 CGF.EmitScalarExpr(OAShE->getBase()), 7485 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7486 } else { 7487 // The base is the reference to the variable. 7488 // BP = &Var. 7489 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7490 if (const auto *VD = 7491 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7492 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7493 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7494 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7495 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7496 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7497 RequiresReference = true; 7498 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7499 } 7500 } 7501 } 7502 7503 // If the variable is a pointer and is being dereferenced (i.e. is not 7504 // the last component), the base has to be the pointer itself, not its 7505 // reference. References are ignored for mapping purposes. 7506 QualType Ty = 7507 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7508 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7509 // No need to generate individual map information for the pointer, it 7510 // can be associated with the combined storage if shared memory mode is 7511 // active or the base declaration is not global variable. 7512 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7513 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7514 !VD || VD->hasLocalStorage()) 7515 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7516 else 7517 FirstPointerInComplexData = true; 7518 ++I; 7519 } 7520 } 7521 7522 // Track whether a component of the list should be marked as MEMBER_OF some 7523 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7524 // in a component list should be marked as MEMBER_OF, all subsequent entries 7525 // do not belong to the base struct. E.g. 7526 // struct S2 s; 7527 // s.ps->ps->ps->f[:] 7528 // (1) (2) (3) (4) 7529 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7530 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7531 // is the pointee of ps(2) which is not member of struct s, so it should not 7532 // be marked as such (it is still PTR_AND_OBJ). 7533 // The variable is initialized to false so that PTR_AND_OBJ entries which 7534 // are not struct members are not considered (e.g. array of pointers to 7535 // data). 7536 bool ShouldBeMemberOf = false; 7537 7538 // Variable keeping track of whether or not we have encountered a component 7539 // in the component list which is a member expression. Useful when we have a 7540 // pointer or a final array section, in which case it is the previous 7541 // component in the list which tells us whether we have a member expression. 7542 // E.g. X.f[:] 7543 // While processing the final array section "[:]" it is "f" which tells us 7544 // whether we are dealing with a member of a declared struct. 7545 const MemberExpr *EncounteredME = nullptr; 7546 7547 for (; I != CE; ++I) { 7548 // If the current component is member of a struct (parent struct) mark it. 7549 if (!EncounteredME) { 7550 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7551 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7552 // as MEMBER_OF the parent struct. 7553 if (EncounteredME) { 7554 ShouldBeMemberOf = true; 7555 // Do not emit as complex pointer if this is actually not array-like 7556 // expression. 7557 if (FirstPointerInComplexData) { 7558 QualType Ty = std::prev(I) 7559 ->getAssociatedDeclaration() 7560 ->getType() 7561 .getNonReferenceType(); 7562 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7563 FirstPointerInComplexData = false; 7564 } 7565 } 7566 } 7567 7568 auto Next = std::next(I); 7569 7570 // We need to generate the addresses and sizes if this is the last 7571 // component, if the component is a pointer or if it is an array section 7572 // whose length can't be proved to be one. If this is a pointer, it 7573 // becomes the base address for the following components. 7574 7575 // A final array section, is one whose length can't be proved to be one. 7576 bool IsFinalArraySection = 7577 isFinalArraySectionExpression(I->getAssociatedExpression()); 7578 7579 // Get information on whether the element is a pointer. Have to do a 7580 // special treatment for array sections given that they are built-in 7581 // types. 7582 const auto *OASE = 7583 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7584 const auto *OAShE = 7585 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7586 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7587 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7588 bool IsPointer = 7589 OAShE || 7590 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7591 .getCanonicalType() 7592 ->isAnyPointerType()) || 7593 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7594 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7595 7596 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7597 // If this is not the last component, we expect the pointer to be 7598 // associated with an array expression or member expression. 7599 assert((Next == CE || 7600 isa<MemberExpr>(Next->getAssociatedExpression()) || 7601 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7602 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7603 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7604 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7605 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7606 "Unexpected expression"); 7607 7608 Address LB = Address::invalid(); 7609 if (OAShE) { 7610 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7611 CGF.getContext().getTypeAlignInChars( 7612 OAShE->getBase()->getType())); 7613 } else { 7614 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7615 .getAddress(CGF); 7616 } 7617 7618 // If this component is a pointer inside the base struct then we don't 7619 // need to create any entry for it - it will be combined with the object 7620 // it is pointing to into a single PTR_AND_OBJ entry. 7621 bool IsMemberPointerOrAddr = 7622 (IsPointer || ForDeviceAddr) && EncounteredME && 7623 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7624 EncounteredME); 7625 if (!OverlappedElements.empty()) { 7626 // Handle base element with the info for overlapped elements. 7627 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7628 assert(Next == CE && 7629 "Expected last element for the overlapped elements."); 7630 assert(!IsPointer && 7631 "Unexpected base element with the pointer type."); 7632 // Mark the whole struct as the struct that requires allocation on the 7633 // device. 7634 PartialStruct.LowestElem = {0, LB}; 7635 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7636 I->getAssociatedExpression()->getType()); 7637 Address HB = CGF.Builder.CreateConstGEP( 7638 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7639 CGF.VoidPtrTy), 7640 TypeSize.getQuantity() - 1); 7641 PartialStruct.HighestElem = { 7642 std::numeric_limits<decltype( 7643 PartialStruct.HighestElem.first)>::max(), 7644 HB}; 7645 PartialStruct.Base = BP; 7646 // Emit data for non-overlapped data. 7647 OpenMPOffloadMappingFlags Flags = 7648 OMP_MAP_MEMBER_OF | 7649 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7650 /*AddPtrFlag=*/false, 7651 /*AddIsTargetParamFlag=*/false); 7652 LB = BP; 7653 llvm::Value *Size = nullptr; 7654 // Do bitcopy of all non-overlapped structure elements. 7655 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7656 Component : OverlappedElements) { 7657 Address ComponentLB = Address::invalid(); 7658 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7659 Component) { 7660 if (MC.getAssociatedDeclaration()) { 7661 ComponentLB = 7662 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7663 .getAddress(CGF); 7664 Size = CGF.Builder.CreatePtrDiff( 7665 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7666 CGF.EmitCastToVoidPtr(LB.getPointer())); 7667 break; 7668 } 7669 } 7670 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7671 CombinedInfo.Pointers.push_back(LB.getPointer()); 7672 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7673 Size, CGF.Int64Ty, /*isSigned=*/true)); 7674 CombinedInfo.Types.push_back(Flags); 7675 CombinedInfo.Mappers.push_back(nullptr); 7676 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7677 } 7678 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7679 CombinedInfo.Pointers.push_back(LB.getPointer()); 7680 Size = CGF.Builder.CreatePtrDiff( 7681 CGF.EmitCastToVoidPtr( 7682 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7683 CGF.EmitCastToVoidPtr(LB.getPointer())); 7684 CombinedInfo.Sizes.push_back( 7685 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7686 CombinedInfo.Types.push_back(Flags); 7687 CombinedInfo.Mappers.push_back(nullptr); 7688 break; 7689 } 7690 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7691 if (!IsMemberPointerOrAddr) { 7692 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7693 CombinedInfo.Pointers.push_back(LB.getPointer()); 7694 CombinedInfo.Sizes.push_back( 7695 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7696 7697 // If Mapper is valid, the last component inherits the mapper. 7698 bool HasMapper = Mapper && Next == CE; 7699 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7700 7701 // We need to add a pointer flag for each map that comes from the 7702 // same expression except for the first one. We also need to signal 7703 // this map is the first one that relates with the current capture 7704 // (there is a set of entries for each capture). 7705 OpenMPOffloadMappingFlags Flags = 7706 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7707 !IsExpressionFirstInfo || RequiresReference || 7708 FirstPointerInComplexData, 7709 IsCaptureFirstInfo && !RequiresReference); 7710 7711 if (!IsExpressionFirstInfo) { 7712 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7713 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7714 if (IsPointer) 7715 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7716 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7717 7718 if (ShouldBeMemberOf) { 7719 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7720 // should be later updated with the correct value of MEMBER_OF. 7721 Flags |= OMP_MAP_MEMBER_OF; 7722 // From now on, all subsequent PTR_AND_OBJ entries should not be 7723 // marked as MEMBER_OF. 7724 ShouldBeMemberOf = false; 7725 } 7726 } 7727 7728 CombinedInfo.Types.push_back(Flags); 7729 } 7730 7731 // If we have encountered a member expression so far, keep track of the 7732 // mapped member. If the parent is "*this", then the value declaration 7733 // is nullptr. 7734 if (EncounteredME) { 7735 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7736 unsigned FieldIndex = FD->getFieldIndex(); 7737 7738 // Update info about the lowest and highest elements for this struct 7739 if (!PartialStruct.Base.isValid()) { 7740 PartialStruct.LowestElem = {FieldIndex, LB}; 7741 if (IsFinalArraySection) { 7742 Address HB = 7743 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7744 .getAddress(CGF); 7745 PartialStruct.HighestElem = {FieldIndex, HB}; 7746 } else { 7747 PartialStruct.HighestElem = {FieldIndex, LB}; 7748 } 7749 PartialStruct.Base = BP; 7750 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7751 PartialStruct.LowestElem = {FieldIndex, LB}; 7752 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7753 PartialStruct.HighestElem = {FieldIndex, LB}; 7754 } 7755 } 7756 7757 // If we have a final array section, we are done with this expression. 7758 if (IsFinalArraySection) 7759 break; 7760 7761 // The pointer becomes the base for the next element. 7762 if (Next != CE) 7763 BP = LB; 7764 7765 IsExpressionFirstInfo = false; 7766 IsCaptureFirstInfo = false; 7767 FirstPointerInComplexData = false; 7768 } 7769 } 7770 } 7771 7772 /// Return the adjusted map modifiers if the declaration a capture refers to 7773 /// appears in a first-private clause. This is expected to be used only with 7774 /// directives that start with 'target'. 7775 MappableExprsHandler::OpenMPOffloadMappingFlags 7776 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7777 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7778 7779 // A first private variable captured by reference will use only the 7780 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7781 // declaration is known as first-private in this handler. 7782 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7783 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7784 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7785 return MappableExprsHandler::OMP_MAP_ALWAYS | 7786 MappableExprsHandler::OMP_MAP_TO; 7787 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7788 return MappableExprsHandler::OMP_MAP_TO | 7789 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7790 return MappableExprsHandler::OMP_MAP_PRIVATE | 7791 MappableExprsHandler::OMP_MAP_TO; 7792 } 7793 return MappableExprsHandler::OMP_MAP_TO | 7794 MappableExprsHandler::OMP_MAP_FROM; 7795 } 7796 7797 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7798 // Rotate by getFlagMemberOffset() bits. 7799 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7800 << getFlagMemberOffset()); 7801 } 7802 7803 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7804 OpenMPOffloadMappingFlags MemberOfFlag) { 7805 // If the entry is PTR_AND_OBJ but has not been marked with the special 7806 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7807 // marked as MEMBER_OF. 7808 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7809 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7810 return; 7811 7812 // Reset the placeholder value to prepare the flag for the assignment of the 7813 // proper MEMBER_OF value. 7814 Flags &= ~OMP_MAP_MEMBER_OF; 7815 Flags |= MemberOfFlag; 7816 } 7817 7818 void getPlainLayout(const CXXRecordDecl *RD, 7819 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7820 bool AsBase) const { 7821 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7822 7823 llvm::StructType *St = 7824 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7825 7826 unsigned NumElements = St->getNumElements(); 7827 llvm::SmallVector< 7828 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7829 RecordLayout(NumElements); 7830 7831 // Fill bases. 7832 for (const auto &I : RD->bases()) { 7833 if (I.isVirtual()) 7834 continue; 7835 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7836 // Ignore empty bases. 7837 if (Base->isEmpty() || CGF.getContext() 7838 .getASTRecordLayout(Base) 7839 .getNonVirtualSize() 7840 .isZero()) 7841 continue; 7842 7843 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7844 RecordLayout[FieldIndex] = Base; 7845 } 7846 // Fill in virtual bases. 7847 for (const auto &I : RD->vbases()) { 7848 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7849 // Ignore empty bases. 7850 if (Base->isEmpty()) 7851 continue; 7852 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7853 if (RecordLayout[FieldIndex]) 7854 continue; 7855 RecordLayout[FieldIndex] = Base; 7856 } 7857 // Fill in all the fields. 7858 assert(!RD->isUnion() && "Unexpected union."); 7859 for (const auto *Field : RD->fields()) { 7860 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7861 // will fill in later.) 7862 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7863 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7864 RecordLayout[FieldIndex] = Field; 7865 } 7866 } 7867 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7868 &Data : RecordLayout) { 7869 if (Data.isNull()) 7870 continue; 7871 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7872 getPlainLayout(Base, Layout, /*AsBase=*/true); 7873 else 7874 Layout.push_back(Data.get<const FieldDecl *>()); 7875 } 7876 } 7877 7878 public: 7879 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7880 : CurDir(&Dir), CGF(CGF) { 7881 // Extract firstprivate clause information. 7882 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7883 for (const auto *D : C->varlists()) 7884 FirstPrivateDecls.try_emplace( 7885 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7886 // Extract implicit firstprivates from uses_allocators clauses. 7887 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7888 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7889 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7890 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7891 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7892 /*Implicit=*/true); 7893 else if (const auto *VD = dyn_cast<VarDecl>( 7894 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7895 ->getDecl())) 7896 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7897 } 7898 } 7899 // Extract device pointer clause information. 7900 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7901 for (auto L : C->component_lists()) 7902 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 7903 } 7904 7905 /// Constructor for the declare mapper directive. 7906 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7907 : CurDir(&Dir), CGF(CGF) {} 7908 7909 /// Generate code for the combined entry if we have a partially mapped struct 7910 /// and take care of the mapping flags of the arguments corresponding to 7911 /// individual struct members. 7912 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 7913 MapFlagsArrayTy &CurTypes, 7914 const StructRangeInfoTy &PartialStruct) const { 7915 // Base is the base of the struct 7916 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 7917 // Pointer is the address of the lowest element 7918 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7919 CombinedInfo.Pointers.push_back(LB); 7920 // There should not be a mapper for a combined entry. 7921 CombinedInfo.Mappers.push_back(nullptr); 7922 // Size is (addr of {highest+1} element) - (addr of lowest element) 7923 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7924 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7925 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7926 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7927 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7928 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7929 /*isSigned=*/false); 7930 CombinedInfo.Sizes.push_back(Size); 7931 // Map type is always TARGET_PARAM 7932 CombinedInfo.Types.push_back(OMP_MAP_TARGET_PARAM); 7933 // If any element has the present modifier, then make sure the runtime 7934 // doesn't attempt to allocate the struct. 7935 if (CurTypes.end() != 7936 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 7937 return Type & OMP_MAP_PRESENT; 7938 })) 7939 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 7940 // Remove TARGET_PARAM flag from the first element 7941 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7942 7943 // All other current entries will be MEMBER_OF the combined entry 7944 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7945 // 0xFFFF in the MEMBER_OF field). 7946 OpenMPOffloadMappingFlags MemberOfFlag = 7947 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 7948 for (auto &M : CurTypes) 7949 setCorrectMemberOfFlag(M, MemberOfFlag); 7950 } 7951 7952 /// Generate all the base pointers, section pointers, sizes, map types, and 7953 /// mappers for the extracted mappable expressions (all included in \a 7954 /// CombinedInfo). Also, for each item that relates with a device pointer, a 7955 /// pair of the relevant declaration and index where it occurs is appended to 7956 /// the device pointers info array. 7957 void generateAllInfo( 7958 MapCombinedInfoTy &CombinedInfo, 7959 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 7960 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 7961 // We have to process the component lists that relate with the same 7962 // declaration in a single chunk so that we can generate the map flags 7963 // correctly. Therefore, we organize all lists in a map. 7964 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7965 7966 // Helper function to fill the information map for the different supported 7967 // clauses. 7968 auto &&InfoGen = 7969 [&Info, &SkipVarSet]( 7970 const ValueDecl *D, 7971 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7972 OpenMPMapClauseKind MapType, 7973 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7974 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7975 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 7976 bool ForDeviceAddr = false) { 7977 const ValueDecl *VD = 7978 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7979 if (SkipVarSet.count(VD)) 7980 return; 7981 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 7982 ReturnDevicePointer, IsImplicit, Mapper, 7983 ForDeviceAddr); 7984 }; 7985 7986 assert(CurDir.is<const OMPExecutableDirective *>() && 7987 "Expect a executable directive"); 7988 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7989 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7990 for (const auto L : C->component_lists()) { 7991 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 7992 C->getMapTypeModifiers(), llvm::None, 7993 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); 7994 } 7995 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7996 for (const auto L : C->component_lists()) { 7997 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 7998 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 7999 C->isImplicit(), std::get<2>(L)); 8000 } 8001 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8002 for (const auto L : C->component_lists()) { 8003 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8004 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8005 C->isImplicit(), std::get<2>(L)); 8006 } 8007 8008 // Look at the use_device_ptr clause information and mark the existing map 8009 // entries as such. If there is no map information for an entry in the 8010 // use_device_ptr list, we create one with map type 'alloc' and zero size 8011 // section. It is the user fault if that was not mapped before. If there is 8012 // no map information and the pointer is a struct member, then we defer the 8013 // emission of that entry until the whole struct has been processed. 8014 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8015 DeferredInfo; 8016 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8017 8018 for (const auto *C : 8019 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8020 for (const auto L : C->component_lists()) { 8021 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8022 std::get<1>(L); 8023 assert(!Components.empty() && 8024 "Not expecting empty list of components!"); 8025 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8026 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8027 const Expr *IE = Components.back().getAssociatedExpression(); 8028 // If the first component is a member expression, we have to look into 8029 // 'this', which maps to null in the map of map information. Otherwise 8030 // look directly for the information. 8031 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8032 8033 // We potentially have map information for this declaration already. 8034 // Look for the first set of components that refer to it. 8035 if (It != Info.end()) { 8036 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8037 return MI.Components.back().getAssociatedDeclaration() == VD; 8038 }); 8039 // If we found a map entry, signal that the pointer has to be returned 8040 // and move on to the next declaration. 8041 // Exclude cases where the base pointer is mapped as array subscript, 8042 // array section or array shaping. The base address is passed as a 8043 // pointer to base in this case and cannot be used as a base for 8044 // use_device_ptr list item. 8045 if (CI != It->second.end()) { 8046 auto PrevCI = std::next(CI->Components.rbegin()); 8047 const auto *VarD = dyn_cast<VarDecl>(VD); 8048 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8049 isa<MemberExpr>(IE) || 8050 !VD->getType().getNonReferenceType()->isPointerType() || 8051 PrevCI == CI->Components.rend() || 8052 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8053 VarD->hasLocalStorage()) { 8054 CI->ReturnDevicePointer = true; 8055 continue; 8056 } 8057 } 8058 } 8059 8060 // We didn't find any match in our map information - generate a zero 8061 // size array section - if the pointer is a struct member we defer this 8062 // action until the whole struct has been processed. 8063 if (isa<MemberExpr>(IE)) { 8064 // Insert the pointer into Info to be processed by 8065 // generateInfoForComponentList. Because it is a member pointer 8066 // without a pointee, no entry will be generated for it, therefore 8067 // we need to generate one after the whole struct has been processed. 8068 // Nonetheless, generateInfoForComponentList must be called to take 8069 // the pointer into account for the calculation of the range of the 8070 // partial struct. 8071 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8072 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8073 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8074 } else { 8075 llvm::Value *Ptr = 8076 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8077 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8078 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8079 UseDevicePtrCombinedInfo.Sizes.push_back( 8080 llvm::Constant::getNullValue(CGF.Int64Ty)); 8081 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | 8082 OMP_MAP_TARGET_PARAM); 8083 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8084 } 8085 } 8086 } 8087 8088 // Look at the use_device_addr clause information and mark the existing map 8089 // entries as such. If there is no map information for an entry in the 8090 // use_device_addr list, we create one with map type 'alloc' and zero size 8091 // section. It is the user fault if that was not mapped before. If there is 8092 // no map information and the pointer is a struct member, then we defer the 8093 // emission of that entry until the whole struct has been processed. 8094 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8095 for (const auto *C : 8096 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8097 for (const auto L : C->component_lists()) { 8098 assert(!std::get<1>(L).empty() && 8099 "Not expecting empty list of components!"); 8100 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8101 if (!Processed.insert(VD).second) 8102 continue; 8103 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8104 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8105 // If the first component is a member expression, we have to look into 8106 // 'this', which maps to null in the map of map information. Otherwise 8107 // look directly for the information. 8108 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8109 8110 // We potentially have map information for this declaration already. 8111 // Look for the first set of components that refer to it. 8112 if (It != Info.end()) { 8113 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8114 return MI.Components.back().getAssociatedDeclaration() == VD; 8115 }); 8116 // If we found a map entry, signal that the pointer has to be returned 8117 // and move on to the next declaration. 8118 if (CI != It->second.end()) { 8119 CI->ReturnDevicePointer = true; 8120 continue; 8121 } 8122 } 8123 8124 // We didn't find any match in our map information - generate a zero 8125 // size array section - if the pointer is a struct member we defer this 8126 // action until the whole struct has been processed. 8127 if (isa<MemberExpr>(IE)) { 8128 // Insert the pointer into Info to be processed by 8129 // generateInfoForComponentList. Because it is a member pointer 8130 // without a pointee, no entry will be generated for it, therefore 8131 // we need to generate one after the whole struct has been processed. 8132 // Nonetheless, generateInfoForComponentList must be called to take 8133 // the pointer into account for the calculation of the range of the 8134 // partial struct. 8135 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8136 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8137 nullptr, /*ForDeviceAddr=*/true); 8138 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8139 } else { 8140 llvm::Value *Ptr; 8141 if (IE->isGLValue()) 8142 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8143 else 8144 Ptr = CGF.EmitScalarExpr(IE); 8145 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8146 CombinedInfo.Pointers.push_back(Ptr); 8147 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8148 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8149 CombinedInfo.Mappers.push_back(nullptr); 8150 } 8151 } 8152 } 8153 8154 for (const auto &M : Info) { 8155 // We need to know when we generate information for the first component 8156 // associated with a capture, because the mapping flags depend on it. 8157 bool IsFirstComponentList = true; 8158 8159 // Temporary generated information. 8160 MapCombinedInfoTy CurInfo; 8161 StructRangeInfoTy PartialStruct; 8162 8163 for (const MapInfo &L : M.second) { 8164 assert(!L.Components.empty() && 8165 "Not expecting declaration with no component lists."); 8166 8167 // Remember the current base pointer index. 8168 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8169 generateInfoForComponentList(L.MapType, L.MapModifiers, 8170 L.MotionModifiers, L.Components, CurInfo, 8171 PartialStruct, IsFirstComponentList, 8172 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8173 8174 // If this entry relates with a device pointer, set the relevant 8175 // declaration and add the 'return pointer' flag. 8176 if (L.ReturnDevicePointer) { 8177 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8178 "Unexpected number of mapped base pointers."); 8179 8180 const ValueDecl *RelevantVD = 8181 L.Components.back().getAssociatedDeclaration(); 8182 assert(RelevantVD && 8183 "No relevant declaration related with device pointer??"); 8184 8185 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8186 RelevantVD); 8187 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8188 } 8189 IsFirstComponentList = false; 8190 } 8191 8192 // Append any pending zero-length pointers which are struct members and 8193 // used with use_device_ptr or use_device_addr. 8194 auto CI = DeferredInfo.find(M.first); 8195 if (CI != DeferredInfo.end()) { 8196 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8197 llvm::Value *BasePtr; 8198 llvm::Value *Ptr; 8199 if (L.ForDeviceAddr) { 8200 if (L.IE->isGLValue()) 8201 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8202 else 8203 Ptr = this->CGF.EmitScalarExpr(L.IE); 8204 BasePtr = Ptr; 8205 // Entry is RETURN_PARAM. Also, set the placeholder value 8206 // MEMBER_OF=FFFF so that the entry is later updated with the 8207 // correct value of MEMBER_OF. 8208 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8209 } else { 8210 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8211 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8212 L.IE->getExprLoc()); 8213 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8214 // value MEMBER_OF=FFFF so that the entry is later updated with the 8215 // correct value of MEMBER_OF. 8216 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8217 OMP_MAP_MEMBER_OF); 8218 } 8219 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8220 CurInfo.Pointers.push_back(Ptr); 8221 CurInfo.Sizes.push_back( 8222 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8223 CurInfo.Mappers.push_back(nullptr); 8224 } 8225 } 8226 8227 // If there is an entry in PartialStruct it means we have a struct with 8228 // individual members mapped. Emit an extra combined entry. 8229 if (PartialStruct.Base.isValid()) 8230 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8231 8232 // We need to append the results of this capture to what we already have. 8233 CombinedInfo.append(CurInfo); 8234 } 8235 // Append data for use_device_ptr clauses. 8236 CombinedInfo.append(UseDevicePtrCombinedInfo); 8237 } 8238 8239 /// Generate all the base pointers, section pointers, sizes, map types, and 8240 /// mappers for the extracted map clauses of user-defined mapper (all included 8241 /// in \a CombinedInfo). 8242 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8243 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8244 "Expect a declare mapper directive"); 8245 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8246 // We have to process the component lists that relate with the same 8247 // declaration in a single chunk so that we can generate the map flags 8248 // correctly. Therefore, we organize all lists in a map. 8249 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8250 8251 // Fill the information map for map clauses. 8252 for (const auto *C : CurMapperDir->clauselists()) { 8253 const auto *MC = cast<OMPMapClause>(C); 8254 for (const auto L : MC->component_lists()) { 8255 const ValueDecl *VD = 8256 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8257 : nullptr; 8258 // Get the corresponding user-defined mapper. 8259 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8260 MC->getMapTypeModifiers(), llvm::None, 8261 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8262 std::get<2>(L)); 8263 } 8264 } 8265 8266 for (const auto &M : Info) { 8267 // We need to know when we generate information for the first component 8268 // associated with a capture, because the mapping flags depend on it. 8269 bool IsFirstComponentList = true; 8270 8271 // Temporary generated information. 8272 MapCombinedInfoTy CurInfo; 8273 StructRangeInfoTy PartialStruct; 8274 8275 for (const MapInfo &L : M.second) { 8276 assert(!L.Components.empty() && 8277 "Not expecting declaration with no component lists."); 8278 generateInfoForComponentList(L.MapType, L.MapModifiers, 8279 L.MotionModifiers, L.Components, CurInfo, 8280 PartialStruct, IsFirstComponentList, 8281 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8282 IsFirstComponentList = false; 8283 } 8284 8285 // If there is an entry in PartialStruct it means we have a struct with 8286 // individual members mapped. Emit an extra combined entry. 8287 if (PartialStruct.Base.isValid()) 8288 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8289 8290 // We need to append the results of this capture to what we already have. 8291 CombinedInfo.append(CurInfo); 8292 } 8293 } 8294 8295 /// Emit capture info for lambdas for variables captured by reference. 8296 void generateInfoForLambdaCaptures( 8297 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8298 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8299 const auto *RD = VD->getType() 8300 .getCanonicalType() 8301 .getNonReferenceType() 8302 ->getAsCXXRecordDecl(); 8303 if (!RD || !RD->isLambda()) 8304 return; 8305 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8306 LValue VDLVal = CGF.MakeAddrLValue( 8307 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8308 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8309 FieldDecl *ThisCapture = nullptr; 8310 RD->getCaptureFields(Captures, ThisCapture); 8311 if (ThisCapture) { 8312 LValue ThisLVal = 8313 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8314 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8315 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8316 VDLVal.getPointer(CGF)); 8317 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8318 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8319 CombinedInfo.Sizes.push_back( 8320 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8321 CGF.Int64Ty, /*isSigned=*/true)); 8322 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8323 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8324 CombinedInfo.Mappers.push_back(nullptr); 8325 } 8326 for (const LambdaCapture &LC : RD->captures()) { 8327 if (!LC.capturesVariable()) 8328 continue; 8329 const VarDecl *VD = LC.getCapturedVar(); 8330 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8331 continue; 8332 auto It = Captures.find(VD); 8333 assert(It != Captures.end() && "Found lambda capture without field."); 8334 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8335 if (LC.getCaptureKind() == LCK_ByRef) { 8336 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8337 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8338 VDLVal.getPointer(CGF)); 8339 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8340 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8341 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8342 CGF.getTypeSize( 8343 VD->getType().getCanonicalType().getNonReferenceType()), 8344 CGF.Int64Ty, /*isSigned=*/true)); 8345 } else { 8346 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8347 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8348 VDLVal.getPointer(CGF)); 8349 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8350 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8351 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8352 } 8353 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8354 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8355 CombinedInfo.Mappers.push_back(nullptr); 8356 } 8357 } 8358 8359 /// Set correct indices for lambdas captures. 8360 void adjustMemberOfForLambdaCaptures( 8361 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8362 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8363 MapFlagsArrayTy &Types) const { 8364 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8365 // Set correct member_of idx for all implicit lambda captures. 8366 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8367 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8368 continue; 8369 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8370 assert(BasePtr && "Unable to find base lambda address."); 8371 int TgtIdx = -1; 8372 for (unsigned J = I; J > 0; --J) { 8373 unsigned Idx = J - 1; 8374 if (Pointers[Idx] != BasePtr) 8375 continue; 8376 TgtIdx = Idx; 8377 break; 8378 } 8379 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8380 // All other current entries will be MEMBER_OF the combined entry 8381 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8382 // 0xFFFF in the MEMBER_OF field). 8383 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8384 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8385 } 8386 } 8387 8388 /// Generate the base pointers, section pointers, sizes, map types, and 8389 /// mappers associated to a given capture (all included in \a CombinedInfo). 8390 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8391 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8392 StructRangeInfoTy &PartialStruct) const { 8393 assert(!Cap->capturesVariableArrayType() && 8394 "Not expecting to generate map info for a variable array type!"); 8395 8396 // We need to know when we generating information for the first component 8397 const ValueDecl *VD = Cap->capturesThis() 8398 ? nullptr 8399 : Cap->getCapturedVar()->getCanonicalDecl(); 8400 8401 // If this declaration appears in a is_device_ptr clause we just have to 8402 // pass the pointer by value. If it is a reference to a declaration, we just 8403 // pass its value. 8404 if (DevPointersMap.count(VD)) { 8405 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8406 CombinedInfo.Pointers.push_back(Arg); 8407 CombinedInfo.Sizes.push_back( 8408 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8409 CGF.Int64Ty, /*isSigned=*/true)); 8410 CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8411 CombinedInfo.Mappers.push_back(nullptr); 8412 return; 8413 } 8414 8415 using MapData = 8416 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8417 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8418 const ValueDecl *>; 8419 SmallVector<MapData, 4> DeclComponentLists; 8420 assert(CurDir.is<const OMPExecutableDirective *>() && 8421 "Expect a executable directive"); 8422 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8423 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8424 for (const auto L : C->decl_component_lists(VD)) { 8425 const ValueDecl *VDecl, *Mapper; 8426 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8427 std::tie(VDecl, Components, Mapper) = L; 8428 assert(VDecl == VD && "We got information for the wrong declaration??"); 8429 assert(!Components.empty() && 8430 "Not expecting declaration with no component lists."); 8431 DeclComponentLists.emplace_back(Components, C->getMapType(), 8432 C->getMapTypeModifiers(), 8433 C->isImplicit(), Mapper); 8434 } 8435 } 8436 8437 // Find overlapping elements (including the offset from the base element). 8438 llvm::SmallDenseMap< 8439 const MapData *, 8440 llvm::SmallVector< 8441 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8442 4> 8443 OverlappedData; 8444 size_t Count = 0; 8445 for (const MapData &L : DeclComponentLists) { 8446 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8447 OpenMPMapClauseKind MapType; 8448 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8449 bool IsImplicit; 8450 const ValueDecl *Mapper; 8451 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8452 ++Count; 8453 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8454 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8455 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1; 8456 auto CI = Components.rbegin(); 8457 auto CE = Components.rend(); 8458 auto SI = Components1.rbegin(); 8459 auto SE = Components1.rend(); 8460 for (; CI != CE && SI != SE; ++CI, ++SI) { 8461 if (CI->getAssociatedExpression()->getStmtClass() != 8462 SI->getAssociatedExpression()->getStmtClass()) 8463 break; 8464 // Are we dealing with different variables/fields? 8465 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8466 break; 8467 } 8468 // Found overlapping if, at least for one component, reached the head of 8469 // the components list. 8470 if (CI == CE || SI == SE) { 8471 assert((CI != CE || SI != SE) && 8472 "Unexpected full match of the mapping components."); 8473 const MapData &BaseData = CI == CE ? L : L1; 8474 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8475 SI == SE ? Components : Components1; 8476 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8477 OverlappedElements.getSecond().push_back(SubData); 8478 } 8479 } 8480 } 8481 // Sort the overlapped elements for each item. 8482 llvm::SmallVector<const FieldDecl *, 4> Layout; 8483 if (!OverlappedData.empty()) { 8484 if (const auto *CRD = 8485 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8486 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8487 else { 8488 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8489 Layout.append(RD->field_begin(), RD->field_end()); 8490 } 8491 } 8492 for (auto &Pair : OverlappedData) { 8493 llvm::sort( 8494 Pair.getSecond(), 8495 [&Layout]( 8496 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8497 OMPClauseMappableExprCommon::MappableExprComponentListRef 8498 Second) { 8499 auto CI = First.rbegin(); 8500 auto CE = First.rend(); 8501 auto SI = Second.rbegin(); 8502 auto SE = Second.rend(); 8503 for (; CI != CE && SI != SE; ++CI, ++SI) { 8504 if (CI->getAssociatedExpression()->getStmtClass() != 8505 SI->getAssociatedExpression()->getStmtClass()) 8506 break; 8507 // Are we dealing with different variables/fields? 8508 if (CI->getAssociatedDeclaration() != 8509 SI->getAssociatedDeclaration()) 8510 break; 8511 } 8512 8513 // Lists contain the same elements. 8514 if (CI == CE && SI == SE) 8515 return false; 8516 8517 // List with less elements is less than list with more elements. 8518 if (CI == CE || SI == SE) 8519 return CI == CE; 8520 8521 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8522 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8523 if (FD1->getParent() == FD2->getParent()) 8524 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8525 const auto It = 8526 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8527 return FD == FD1 || FD == FD2; 8528 }); 8529 return *It == FD1; 8530 }); 8531 } 8532 8533 // Associated with a capture, because the mapping flags depend on it. 8534 // Go through all of the elements with the overlapped elements. 8535 for (const auto &Pair : OverlappedData) { 8536 const MapData &L = *Pair.getFirst(); 8537 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8538 OpenMPMapClauseKind MapType; 8539 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8540 bool IsImplicit; 8541 const ValueDecl *Mapper; 8542 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8543 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8544 OverlappedComponents = Pair.getSecond(); 8545 bool IsFirstComponentList = true; 8546 generateInfoForComponentList( 8547 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8548 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8549 /*ForDeviceAddr=*/false, OverlappedComponents); 8550 } 8551 // Go through other elements without overlapped elements. 8552 bool IsFirstComponentList = OverlappedData.empty(); 8553 for (const MapData &L : DeclComponentLists) { 8554 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8555 OpenMPMapClauseKind MapType; 8556 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8557 bool IsImplicit; 8558 const ValueDecl *Mapper; 8559 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8560 auto It = OverlappedData.find(&L); 8561 if (It == OverlappedData.end()) 8562 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8563 Components, CombinedInfo, PartialStruct, 8564 IsFirstComponentList, IsImplicit, Mapper); 8565 IsFirstComponentList = false; 8566 } 8567 } 8568 8569 /// Generate the default map information for a given capture \a CI, 8570 /// record field declaration \a RI and captured value \a CV. 8571 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8572 const FieldDecl &RI, llvm::Value *CV, 8573 MapCombinedInfoTy &CombinedInfo) const { 8574 bool IsImplicit = true; 8575 // Do the default mapping. 8576 if (CI.capturesThis()) { 8577 CombinedInfo.BasePointers.push_back(CV); 8578 CombinedInfo.Pointers.push_back(CV); 8579 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8580 CombinedInfo.Sizes.push_back( 8581 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8582 CGF.Int64Ty, /*isSigned=*/true)); 8583 // Default map type. 8584 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8585 } else if (CI.capturesVariableByCopy()) { 8586 CombinedInfo.BasePointers.push_back(CV); 8587 CombinedInfo.Pointers.push_back(CV); 8588 if (!RI.getType()->isAnyPointerType()) { 8589 // We have to signal to the runtime captures passed by value that are 8590 // not pointers. 8591 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8592 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8593 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8594 } else { 8595 // Pointers are implicitly mapped with a zero size and no flags 8596 // (other than first map that is added for all implicit maps). 8597 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8598 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8599 } 8600 const VarDecl *VD = CI.getCapturedVar(); 8601 auto I = FirstPrivateDecls.find(VD); 8602 if (I != FirstPrivateDecls.end()) 8603 IsImplicit = I->getSecond(); 8604 } else { 8605 assert(CI.capturesVariable() && "Expected captured reference."); 8606 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8607 QualType ElementType = PtrTy->getPointeeType(); 8608 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8609 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8610 // The default map type for a scalar/complex type is 'to' because by 8611 // default the value doesn't have to be retrieved. For an aggregate 8612 // type, the default is 'tofrom'. 8613 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8614 const VarDecl *VD = CI.getCapturedVar(); 8615 auto I = FirstPrivateDecls.find(VD); 8616 if (I != FirstPrivateDecls.end() && 8617 VD->getType().isConstant(CGF.getContext())) { 8618 llvm::Constant *Addr = 8619 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8620 // Copy the value of the original variable to the new global copy. 8621 CGF.Builder.CreateMemCpy( 8622 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8623 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8624 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 8625 // Use new global variable as the base pointers. 8626 CombinedInfo.BasePointers.push_back(Addr); 8627 CombinedInfo.Pointers.push_back(Addr); 8628 } else { 8629 CombinedInfo.BasePointers.push_back(CV); 8630 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8631 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8632 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8633 AlignmentSource::Decl)); 8634 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8635 } else { 8636 CombinedInfo.Pointers.push_back(CV); 8637 } 8638 } 8639 if (I != FirstPrivateDecls.end()) 8640 IsImplicit = I->getSecond(); 8641 } 8642 // Every default map produces a single argument which is a target parameter. 8643 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 8644 8645 // Add flag stating this is an implicit map. 8646 if (IsImplicit) 8647 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 8648 8649 // No user-defined mapper for default mapping. 8650 CombinedInfo.Mappers.push_back(nullptr); 8651 } 8652 }; 8653 } // anonymous namespace 8654 8655 /// Emit the arrays used to pass the captures and map information to the 8656 /// offloading runtime library. If there is no map or capture information, 8657 /// return nullptr by reference. 8658 static void 8659 emitOffloadingArrays(CodeGenFunction &CGF, 8660 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8661 CGOpenMPRuntime::TargetDataInfo &Info) { 8662 CodeGenModule &CGM = CGF.CGM; 8663 ASTContext &Ctx = CGF.getContext(); 8664 8665 // Reset the array information. 8666 Info.clearArrayInfo(); 8667 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8668 8669 if (Info.NumberOfPtrs) { 8670 // Detect if we have any capture size requiring runtime evaluation of the 8671 // size so that a constant array could be eventually used. 8672 bool hasRuntimeEvaluationCaptureSize = false; 8673 for (llvm::Value *S : CombinedInfo.Sizes) 8674 if (!isa<llvm::Constant>(S)) { 8675 hasRuntimeEvaluationCaptureSize = true; 8676 break; 8677 } 8678 8679 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8680 QualType PointerArrayType = Ctx.getConstantArrayType( 8681 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8682 /*IndexTypeQuals=*/0); 8683 8684 Info.BasePointersArray = 8685 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8686 Info.PointersArray = 8687 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8688 Address MappersArray = 8689 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 8690 Info.MappersArray = MappersArray.getPointer(); 8691 8692 // If we don't have any VLA types or other types that require runtime 8693 // evaluation, we can use a constant array for the map sizes, otherwise we 8694 // need to fill up the arrays as we do for the pointers. 8695 QualType Int64Ty = 8696 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8697 if (hasRuntimeEvaluationCaptureSize) { 8698 QualType SizeArrayType = Ctx.getConstantArrayType( 8699 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8700 /*IndexTypeQuals=*/0); 8701 Info.SizesArray = 8702 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8703 } else { 8704 // We expect all the sizes to be constant, so we collect them to create 8705 // a constant array. 8706 SmallVector<llvm::Constant *, 16> ConstSizes; 8707 for (llvm::Value *S : CombinedInfo.Sizes) 8708 ConstSizes.push_back(cast<llvm::Constant>(S)); 8709 8710 auto *SizesArrayInit = llvm::ConstantArray::get( 8711 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8712 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8713 auto *SizesArrayGbl = new llvm::GlobalVariable( 8714 CGM.getModule(), SizesArrayInit->getType(), 8715 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8716 SizesArrayInit, Name); 8717 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8718 Info.SizesArray = SizesArrayGbl; 8719 } 8720 8721 // The map types are always constant so we don't need to generate code to 8722 // fill arrays. Instead, we create an array constant. 8723 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 8724 llvm::copy(CombinedInfo.Types, Mapping.begin()); 8725 llvm::Constant *MapTypesArrayInit = 8726 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8727 std::string MaptypesName = 8728 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8729 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8730 CGM.getModule(), MapTypesArrayInit->getType(), 8731 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8732 MapTypesArrayInit, MaptypesName); 8733 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8734 Info.MapTypesArray = MapTypesArrayGbl; 8735 8736 // If there's a present map type modifier, it must not be applied to the end 8737 // of a region, so generate a separate map type array in that case. 8738 if (Info.separateBeginEndCalls()) { 8739 bool EndMapTypesDiffer = false; 8740 for (uint64_t &Type : Mapping) { 8741 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 8742 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 8743 EndMapTypesDiffer = true; 8744 } 8745 } 8746 if (EndMapTypesDiffer) { 8747 MapTypesArrayInit = 8748 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8749 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8750 MapTypesArrayGbl = new llvm::GlobalVariable( 8751 CGM.getModule(), MapTypesArrayInit->getType(), 8752 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8753 MapTypesArrayInit, MaptypesName); 8754 MapTypesArrayGbl->setUnnamedAddr( 8755 llvm::GlobalValue::UnnamedAddr::Global); 8756 Info.MapTypesArrayEnd = MapTypesArrayGbl; 8757 } 8758 } 8759 8760 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8761 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 8762 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8763 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8764 Info.BasePointersArray, 0, I); 8765 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8766 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8767 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8768 CGF.Builder.CreateStore(BPVal, BPAddr); 8769 8770 if (Info.requiresDevicePointerInfo()) 8771 if (const ValueDecl *DevVD = 8772 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 8773 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8774 8775 llvm::Value *PVal = CombinedInfo.Pointers[I]; 8776 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8777 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8778 Info.PointersArray, 0, I); 8779 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8780 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8781 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8782 CGF.Builder.CreateStore(PVal, PAddr); 8783 8784 if (hasRuntimeEvaluationCaptureSize) { 8785 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8786 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8787 Info.SizesArray, 8788 /*Idx0=*/0, 8789 /*Idx1=*/I); 8790 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8791 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 8792 CGM.Int64Ty, 8793 /*isSigned=*/true), 8794 SAddr); 8795 } 8796 8797 // Fill up the mapper array. 8798 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 8799 if (CombinedInfo.Mappers[I]) { 8800 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8801 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8802 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 8803 Info.HasMapper = true; 8804 } 8805 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 8806 CGF.Builder.CreateStore(MFunc, MAddr); 8807 } 8808 } 8809 } 8810 8811 /// Emit the arguments to be passed to the runtime library based on the 8812 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 8813 /// ForEndCall, emit map types to be passed for the end of the region instead of 8814 /// the beginning. 8815 static void emitOffloadingArraysArgument( 8816 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8817 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8818 llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg, 8819 CGOpenMPRuntime::TargetDataInfo &Info, bool ForEndCall = false) { 8820 assert((!ForEndCall || Info.separateBeginEndCalls()) && 8821 "expected region end call to runtime only when end call is separate"); 8822 CodeGenModule &CGM = CGF.CGM; 8823 if (Info.NumberOfPtrs) { 8824 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8825 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8826 Info.BasePointersArray, 8827 /*Idx0=*/0, /*Idx1=*/0); 8828 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8829 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8830 Info.PointersArray, 8831 /*Idx0=*/0, 8832 /*Idx1=*/0); 8833 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8834 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8835 /*Idx0=*/0, /*Idx1=*/0); 8836 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8837 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8838 ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 8839 : Info.MapTypesArray, 8840 /*Idx0=*/0, 8841 /*Idx1=*/0); 8842 MappersArrayArg = 8843 Info.HasMapper 8844 ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy) 8845 : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8846 } else { 8847 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8848 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8849 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8850 MapTypesArrayArg = 8851 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8852 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8853 } 8854 } 8855 8856 /// Check for inner distribute directive. 8857 static const OMPExecutableDirective * 8858 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8859 const auto *CS = D.getInnermostCapturedStmt(); 8860 const auto *Body = 8861 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8862 const Stmt *ChildStmt = 8863 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8864 8865 if (const auto *NestedDir = 8866 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8867 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8868 switch (D.getDirectiveKind()) { 8869 case OMPD_target: 8870 if (isOpenMPDistributeDirective(DKind)) 8871 return NestedDir; 8872 if (DKind == OMPD_teams) { 8873 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8874 /*IgnoreCaptured=*/true); 8875 if (!Body) 8876 return nullptr; 8877 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8878 if (const auto *NND = 8879 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8880 DKind = NND->getDirectiveKind(); 8881 if (isOpenMPDistributeDirective(DKind)) 8882 return NND; 8883 } 8884 } 8885 return nullptr; 8886 case OMPD_target_teams: 8887 if (isOpenMPDistributeDirective(DKind)) 8888 return NestedDir; 8889 return nullptr; 8890 case OMPD_target_parallel: 8891 case OMPD_target_simd: 8892 case OMPD_target_parallel_for: 8893 case OMPD_target_parallel_for_simd: 8894 return nullptr; 8895 case OMPD_target_teams_distribute: 8896 case OMPD_target_teams_distribute_simd: 8897 case OMPD_target_teams_distribute_parallel_for: 8898 case OMPD_target_teams_distribute_parallel_for_simd: 8899 case OMPD_parallel: 8900 case OMPD_for: 8901 case OMPD_parallel_for: 8902 case OMPD_parallel_master: 8903 case OMPD_parallel_sections: 8904 case OMPD_for_simd: 8905 case OMPD_parallel_for_simd: 8906 case OMPD_cancel: 8907 case OMPD_cancellation_point: 8908 case OMPD_ordered: 8909 case OMPD_threadprivate: 8910 case OMPD_allocate: 8911 case OMPD_task: 8912 case OMPD_simd: 8913 case OMPD_sections: 8914 case OMPD_section: 8915 case OMPD_single: 8916 case OMPD_master: 8917 case OMPD_critical: 8918 case OMPD_taskyield: 8919 case OMPD_barrier: 8920 case OMPD_taskwait: 8921 case OMPD_taskgroup: 8922 case OMPD_atomic: 8923 case OMPD_flush: 8924 case OMPD_depobj: 8925 case OMPD_scan: 8926 case OMPD_teams: 8927 case OMPD_target_data: 8928 case OMPD_target_exit_data: 8929 case OMPD_target_enter_data: 8930 case OMPD_distribute: 8931 case OMPD_distribute_simd: 8932 case OMPD_distribute_parallel_for: 8933 case OMPD_distribute_parallel_for_simd: 8934 case OMPD_teams_distribute: 8935 case OMPD_teams_distribute_simd: 8936 case OMPD_teams_distribute_parallel_for: 8937 case OMPD_teams_distribute_parallel_for_simd: 8938 case OMPD_target_update: 8939 case OMPD_declare_simd: 8940 case OMPD_declare_variant: 8941 case OMPD_begin_declare_variant: 8942 case OMPD_end_declare_variant: 8943 case OMPD_declare_target: 8944 case OMPD_end_declare_target: 8945 case OMPD_declare_reduction: 8946 case OMPD_declare_mapper: 8947 case OMPD_taskloop: 8948 case OMPD_taskloop_simd: 8949 case OMPD_master_taskloop: 8950 case OMPD_master_taskloop_simd: 8951 case OMPD_parallel_master_taskloop: 8952 case OMPD_parallel_master_taskloop_simd: 8953 case OMPD_requires: 8954 case OMPD_unknown: 8955 default: 8956 llvm_unreachable("Unexpected directive."); 8957 } 8958 } 8959 8960 return nullptr; 8961 } 8962 8963 /// Emit the user-defined mapper function. The code generation follows the 8964 /// pattern in the example below. 8965 /// \code 8966 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8967 /// void *base, void *begin, 8968 /// int64_t size, int64_t type) { 8969 /// // Allocate space for an array section first. 8970 /// if (size > 1 && !maptype.IsDelete) 8971 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8972 /// size*sizeof(Ty), clearToFrom(type)); 8973 /// // Map members. 8974 /// for (unsigned i = 0; i < size; i++) { 8975 /// // For each component specified by this mapper: 8976 /// for (auto c : all_components) { 8977 /// if (c.hasMapper()) 8978 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8979 /// c.arg_type); 8980 /// else 8981 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8982 /// c.arg_begin, c.arg_size, c.arg_type); 8983 /// } 8984 /// } 8985 /// // Delete the array section. 8986 /// if (size > 1 && maptype.IsDelete) 8987 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8988 /// size*sizeof(Ty), clearToFrom(type)); 8989 /// } 8990 /// \endcode 8991 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8992 CodeGenFunction *CGF) { 8993 if (UDMMap.count(D) > 0) 8994 return; 8995 ASTContext &C = CGM.getContext(); 8996 QualType Ty = D->getType(); 8997 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8998 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8999 auto *MapperVarDecl = 9000 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9001 SourceLocation Loc = D->getLocation(); 9002 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9003 9004 // Prepare mapper function arguments and attributes. 9005 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9006 C.VoidPtrTy, ImplicitParamDecl::Other); 9007 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9008 ImplicitParamDecl::Other); 9009 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9010 C.VoidPtrTy, ImplicitParamDecl::Other); 9011 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9012 ImplicitParamDecl::Other); 9013 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9014 ImplicitParamDecl::Other); 9015 FunctionArgList Args; 9016 Args.push_back(&HandleArg); 9017 Args.push_back(&BaseArg); 9018 Args.push_back(&BeginArg); 9019 Args.push_back(&SizeArg); 9020 Args.push_back(&TypeArg); 9021 const CGFunctionInfo &FnInfo = 9022 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9023 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9024 SmallString<64> TyStr; 9025 llvm::raw_svector_ostream Out(TyStr); 9026 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9027 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9028 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9029 Name, &CGM.getModule()); 9030 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9031 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9032 // Start the mapper function code generation. 9033 CodeGenFunction MapperCGF(CGM); 9034 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9035 // Compute the starting and end addreses of array elements. 9036 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9037 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9038 C.getPointerType(Int64Ty), Loc); 9039 // Convert the size in bytes into the number of array elements. 9040 Size = MapperCGF.Builder.CreateExactUDiv( 9041 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9042 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9043 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9044 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9045 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9046 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9047 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9048 C.getPointerType(Int64Ty), Loc); 9049 // Prepare common arguments for array initiation and deletion. 9050 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9051 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9052 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9053 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9054 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9055 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9056 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9057 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9058 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9059 9060 // Emit array initiation if this is an array section and \p MapType indicates 9061 // that memory allocation is required. 9062 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9063 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9064 ElementSize, HeadBB, /*IsInit=*/true); 9065 9066 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9067 9068 // Emit the loop header block. 9069 MapperCGF.EmitBlock(HeadBB); 9070 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9071 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9072 // Evaluate whether the initial condition is satisfied. 9073 llvm::Value *IsEmpty = 9074 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9075 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9076 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9077 9078 // Emit the loop body block. 9079 MapperCGF.EmitBlock(BodyBB); 9080 llvm::BasicBlock *LastBB = BodyBB; 9081 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9082 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9083 PtrPHI->addIncoming(PtrBegin, EntryBB); 9084 Address PtrCurrent = 9085 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9086 .getAlignment() 9087 .alignmentOfArrayElement(ElementSize)); 9088 // Privatize the declared variable of mapper to be the current array element. 9089 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9090 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9091 return MapperCGF 9092 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9093 .getAddress(MapperCGF); 9094 }); 9095 (void)Scope.Privatize(); 9096 9097 // Get map clause information. Fill up the arrays with all mapped variables. 9098 MappableExprsHandler::MapCombinedInfoTy Info; 9099 MappableExprsHandler MEHandler(*D, MapperCGF); 9100 MEHandler.generateAllInfoForMapper(Info); 9101 9102 // Call the runtime API __tgt_mapper_num_components to get the number of 9103 // pre-existing components. 9104 llvm::Value *OffloadingArgs[] = {Handle}; 9105 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9106 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9107 OMPRTL___tgt_mapper_num_components), 9108 OffloadingArgs); 9109 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9110 PreviousSize, 9111 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9112 9113 // Fill up the runtime mapper handle for all components. 9114 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9115 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9116 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9117 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9118 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9119 llvm::Value *CurSizeArg = Info.Sizes[I]; 9120 9121 // Extract the MEMBER_OF field from the map type. 9122 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9123 MapperCGF.EmitBlock(MemberBB); 9124 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9125 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9126 OriMapType, 9127 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9128 llvm::BasicBlock *MemberCombineBB = 9129 MapperCGF.createBasicBlock("omp.member.combine"); 9130 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9131 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9132 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9133 // Add the number of pre-existing components to the MEMBER_OF field if it 9134 // is valid. 9135 MapperCGF.EmitBlock(MemberCombineBB); 9136 llvm::Value *CombinedMember = 9137 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9138 // Do nothing if it is not a member of previous components. 9139 MapperCGF.EmitBlock(TypeBB); 9140 llvm::PHINode *MemberMapType = 9141 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9142 MemberMapType->addIncoming(OriMapType, MemberBB); 9143 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9144 9145 // Combine the map type inherited from user-defined mapper with that 9146 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9147 // bits of the \a MapType, which is the input argument of the mapper 9148 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9149 // bits of MemberMapType. 9150 // [OpenMP 5.0], 1.2.6. map-type decay. 9151 // | alloc | to | from | tofrom | release | delete 9152 // ---------------------------------------------------------- 9153 // alloc | alloc | alloc | alloc | alloc | release | delete 9154 // to | alloc | to | alloc | to | release | delete 9155 // from | alloc | alloc | from | from | release | delete 9156 // tofrom | alloc | to | from | tofrom | release | delete 9157 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9158 MapType, 9159 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9160 MappableExprsHandler::OMP_MAP_FROM)); 9161 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9162 llvm::BasicBlock *AllocElseBB = 9163 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9164 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9165 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9166 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9167 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9168 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9169 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9170 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9171 MapperCGF.EmitBlock(AllocBB); 9172 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9173 MemberMapType, 9174 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9175 MappableExprsHandler::OMP_MAP_FROM))); 9176 MapperCGF.Builder.CreateBr(EndBB); 9177 MapperCGF.EmitBlock(AllocElseBB); 9178 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9179 LeftToFrom, 9180 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9181 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9182 // In case of to, clear OMP_MAP_FROM. 9183 MapperCGF.EmitBlock(ToBB); 9184 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9185 MemberMapType, 9186 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9187 MapperCGF.Builder.CreateBr(EndBB); 9188 MapperCGF.EmitBlock(ToElseBB); 9189 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9190 LeftToFrom, 9191 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9192 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9193 // In case of from, clear OMP_MAP_TO. 9194 MapperCGF.EmitBlock(FromBB); 9195 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9196 MemberMapType, 9197 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9198 // In case of tofrom, do nothing. 9199 MapperCGF.EmitBlock(EndBB); 9200 LastBB = EndBB; 9201 llvm::PHINode *CurMapType = 9202 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9203 CurMapType->addIncoming(AllocMapType, AllocBB); 9204 CurMapType->addIncoming(ToMapType, ToBB); 9205 CurMapType->addIncoming(FromMapType, FromBB); 9206 CurMapType->addIncoming(MemberMapType, ToElseBB); 9207 9208 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9209 CurSizeArg, CurMapType}; 9210 if (Info.Mappers[I]) { 9211 // Call the corresponding mapper function. 9212 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9213 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9214 assert(MapperFunc && "Expect a valid mapper function is available."); 9215 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9216 } else { 9217 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9218 // data structure. 9219 MapperCGF.EmitRuntimeCall( 9220 OMPBuilder.getOrCreateRuntimeFunction( 9221 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9222 OffloadingArgs); 9223 } 9224 } 9225 9226 // Update the pointer to point to the next element that needs to be mapped, 9227 // and check whether we have mapped all elements. 9228 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9229 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9230 PtrPHI->addIncoming(PtrNext, LastBB); 9231 llvm::Value *IsDone = 9232 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9233 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9234 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9235 9236 MapperCGF.EmitBlock(ExitBB); 9237 // Emit array deletion if this is an array section and \p MapType indicates 9238 // that deletion is required. 9239 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9240 ElementSize, DoneBB, /*IsInit=*/false); 9241 9242 // Emit the function exit block. 9243 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9244 MapperCGF.FinishFunction(); 9245 UDMMap.try_emplace(D, Fn); 9246 if (CGF) { 9247 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9248 Decls.second.push_back(D); 9249 } 9250 } 9251 9252 /// Emit the array initialization or deletion portion for user-defined mapper 9253 /// code generation. First, it evaluates whether an array section is mapped and 9254 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9255 /// true, and \a MapType indicates to not delete this array, array 9256 /// initialization code is generated. If \a IsInit is false, and \a MapType 9257 /// indicates to not this array, array deletion code is generated. 9258 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9259 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9260 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9261 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9262 StringRef Prefix = IsInit ? ".init" : ".del"; 9263 9264 // Evaluate if this is an array section. 9265 llvm::BasicBlock *IsDeleteBB = 9266 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9267 llvm::BasicBlock *BodyBB = 9268 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9269 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9270 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9271 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9272 9273 // Evaluate if we are going to delete this section. 9274 MapperCGF.EmitBlock(IsDeleteBB); 9275 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9276 MapType, 9277 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9278 llvm::Value *DeleteCond; 9279 if (IsInit) { 9280 DeleteCond = MapperCGF.Builder.CreateIsNull( 9281 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9282 } else { 9283 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9284 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9285 } 9286 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9287 9288 MapperCGF.EmitBlock(BodyBB); 9289 // Get the array size by multiplying element size and element number (i.e., \p 9290 // Size). 9291 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9292 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9293 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9294 // memory allocation/deletion purpose only. 9295 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9296 MapType, 9297 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9298 MappableExprsHandler::OMP_MAP_FROM))); 9299 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9300 // data structure. 9301 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9302 MapperCGF.EmitRuntimeCall( 9303 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9304 OMPRTL___tgt_push_mapper_component), 9305 OffloadingArgs); 9306 } 9307 9308 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9309 const OMPDeclareMapperDecl *D) { 9310 auto I = UDMMap.find(D); 9311 if (I != UDMMap.end()) 9312 return I->second; 9313 emitUserDefinedMapper(D); 9314 return UDMMap.lookup(D); 9315 } 9316 9317 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9318 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9319 llvm::Value *DeviceID, 9320 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9321 const OMPLoopDirective &D)> 9322 SizeEmitter) { 9323 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9324 const OMPExecutableDirective *TD = &D; 9325 // Get nested teams distribute kind directive, if any. 9326 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9327 TD = getNestedDistributeDirective(CGM.getContext(), D); 9328 if (!TD) 9329 return; 9330 const auto *LD = cast<OMPLoopDirective>(TD); 9331 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9332 PrePostActionTy &) { 9333 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9334 llvm::Value *Args[] = {DeviceID, NumIterations}; 9335 CGF.EmitRuntimeCall( 9336 OMPBuilder.getOrCreateRuntimeFunction( 9337 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9338 Args); 9339 } 9340 }; 9341 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9342 } 9343 9344 void CGOpenMPRuntime::emitTargetCall( 9345 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9346 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9347 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9348 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9349 const OMPLoopDirective &D)> 9350 SizeEmitter) { 9351 if (!CGF.HaveInsertPoint()) 9352 return; 9353 9354 assert(OutlinedFn && "Invalid outlined function!"); 9355 9356 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9357 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9358 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9359 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9360 PrePostActionTy &) { 9361 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9362 }; 9363 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9364 9365 CodeGenFunction::OMPTargetDataInfo InputInfo; 9366 llvm::Value *MapTypesArray = nullptr; 9367 // Fill up the pointer arrays and transfer execution to the device. 9368 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9369 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9370 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9371 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9372 // Reverse offloading is not supported, so just execute on the host. 9373 if (RequiresOuterTask) { 9374 CapturedVars.clear(); 9375 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9376 } 9377 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9378 return; 9379 } 9380 9381 // On top of the arrays that were filled up, the target offloading call 9382 // takes as arguments the device id as well as the host pointer. The host 9383 // pointer is used by the runtime library to identify the current target 9384 // region, so it only has to be unique and not necessarily point to 9385 // anything. It could be the pointer to the outlined function that 9386 // implements the target region, but we aren't using that so that the 9387 // compiler doesn't need to keep that, and could therefore inline the host 9388 // function if proven worthwhile during optimization. 9389 9390 // From this point on, we need to have an ID of the target region defined. 9391 assert(OutlinedFnID && "Invalid outlined function ID!"); 9392 9393 // Emit device ID if any. 9394 llvm::Value *DeviceID; 9395 if (Device.getPointer()) { 9396 assert((Device.getInt() == OMPC_DEVICE_unknown || 9397 Device.getInt() == OMPC_DEVICE_device_num) && 9398 "Expected device_num modifier."); 9399 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9400 DeviceID = 9401 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9402 } else { 9403 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9404 } 9405 9406 // Emit the number of elements in the offloading arrays. 9407 llvm::Value *PointerNum = 9408 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9409 9410 // Return value of the runtime offloading call. 9411 llvm::Value *Return; 9412 9413 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9414 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9415 9416 // Emit tripcount for the target loop-based directive. 9417 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9418 9419 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9420 // The target region is an outlined function launched by the runtime 9421 // via calls __tgt_target() or __tgt_target_teams(). 9422 // 9423 // __tgt_target() launches a target region with one team and one thread, 9424 // executing a serial region. This master thread may in turn launch 9425 // more threads within its team upon encountering a parallel region, 9426 // however, no additional teams can be launched on the device. 9427 // 9428 // __tgt_target_teams() launches a target region with one or more teams, 9429 // each with one or more threads. This call is required for target 9430 // constructs such as: 9431 // 'target teams' 9432 // 'target' / 'teams' 9433 // 'target teams distribute parallel for' 9434 // 'target parallel' 9435 // and so on. 9436 // 9437 // Note that on the host and CPU targets, the runtime implementation of 9438 // these calls simply call the outlined function without forking threads. 9439 // The outlined functions themselves have runtime calls to 9440 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9441 // the compiler in emitTeamsCall() and emitParallelCall(). 9442 // 9443 // In contrast, on the NVPTX target, the implementation of 9444 // __tgt_target_teams() launches a GPU kernel with the requested number 9445 // of teams and threads so no additional calls to the runtime are required. 9446 if (NumTeams) { 9447 // If we have NumTeams defined this means that we have an enclosed teams 9448 // region. Therefore we also expect to have NumThreads defined. These two 9449 // values should be defined in the presence of a teams directive, 9450 // regardless of having any clauses associated. If the user is using teams 9451 // but no clauses, these two values will be the default that should be 9452 // passed to the runtime library - a 32-bit integer with the value zero. 9453 assert(NumThreads && "Thread limit expression should be available along " 9454 "with number of teams."); 9455 llvm::Value *OffloadingArgs[] = {DeviceID, 9456 OutlinedFnID, 9457 PointerNum, 9458 InputInfo.BasePointersArray.getPointer(), 9459 InputInfo.PointersArray.getPointer(), 9460 InputInfo.SizesArray.getPointer(), 9461 MapTypesArray, 9462 InputInfo.MappersArray.getPointer(), 9463 NumTeams, 9464 NumThreads}; 9465 Return = CGF.EmitRuntimeCall( 9466 OMPBuilder.getOrCreateRuntimeFunction( 9467 CGM.getModule(), HasNowait 9468 ? OMPRTL___tgt_target_teams_nowait_mapper 9469 : OMPRTL___tgt_target_teams_mapper), 9470 OffloadingArgs); 9471 } else { 9472 llvm::Value *OffloadingArgs[] = {DeviceID, 9473 OutlinedFnID, 9474 PointerNum, 9475 InputInfo.BasePointersArray.getPointer(), 9476 InputInfo.PointersArray.getPointer(), 9477 InputInfo.SizesArray.getPointer(), 9478 MapTypesArray, 9479 InputInfo.MappersArray.getPointer()}; 9480 Return = CGF.EmitRuntimeCall( 9481 OMPBuilder.getOrCreateRuntimeFunction( 9482 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 9483 : OMPRTL___tgt_target_mapper), 9484 OffloadingArgs); 9485 } 9486 9487 // Check the error code and execute the host version if required. 9488 llvm::BasicBlock *OffloadFailedBlock = 9489 CGF.createBasicBlock("omp_offload.failed"); 9490 llvm::BasicBlock *OffloadContBlock = 9491 CGF.createBasicBlock("omp_offload.cont"); 9492 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9493 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9494 9495 CGF.EmitBlock(OffloadFailedBlock); 9496 if (RequiresOuterTask) { 9497 CapturedVars.clear(); 9498 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9499 } 9500 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9501 CGF.EmitBranch(OffloadContBlock); 9502 9503 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9504 }; 9505 9506 // Notify that the host version must be executed. 9507 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9508 RequiresOuterTask](CodeGenFunction &CGF, 9509 PrePostActionTy &) { 9510 if (RequiresOuterTask) { 9511 CapturedVars.clear(); 9512 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9513 } 9514 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9515 }; 9516 9517 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9518 &CapturedVars, RequiresOuterTask, 9519 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9520 // Fill up the arrays with all the captured variables. 9521 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9522 9523 // Get mappable expression information. 9524 MappableExprsHandler MEHandler(D, CGF); 9525 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9526 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9527 9528 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9529 auto CV = CapturedVars.begin(); 9530 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9531 CE = CS.capture_end(); 9532 CI != CE; ++CI, ++RI, ++CV) { 9533 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9534 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9535 9536 // VLA sizes are passed to the outlined region by copy and do not have map 9537 // information associated. 9538 if (CI->capturesVariableArrayType()) { 9539 CurInfo.BasePointers.push_back(*CV); 9540 CurInfo.Pointers.push_back(*CV); 9541 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9542 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9543 // Copy to the device as an argument. No need to retrieve it. 9544 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9545 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9546 MappableExprsHandler::OMP_MAP_IMPLICIT); 9547 CurInfo.Mappers.push_back(nullptr); 9548 } else { 9549 // If we have any information in the map clause, we use it, otherwise we 9550 // just do a default mapping. 9551 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9552 if (!CI->capturesThis()) 9553 MappedVarSet.insert(CI->getCapturedVar()); 9554 else 9555 MappedVarSet.insert(nullptr); 9556 if (CurInfo.BasePointers.empty()) 9557 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9558 // Generate correct mapping for variables captured by reference in 9559 // lambdas. 9560 if (CI->capturesVariable()) 9561 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9562 CurInfo, LambdaPointers); 9563 } 9564 // We expect to have at least an element of information for this capture. 9565 assert(!CurInfo.BasePointers.empty() && 9566 "Non-existing map pointer for capture!"); 9567 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9568 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9569 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9570 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9571 "Inconsistent map information sizes!"); 9572 9573 // If there is an entry in PartialStruct it means we have a struct with 9574 // individual members mapped. Emit an extra combined entry. 9575 if (PartialStruct.Base.isValid()) 9576 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 9577 9578 // We need to append the results of this capture to what we already have. 9579 CombinedInfo.append(CurInfo); 9580 } 9581 // Adjust MEMBER_OF flags for the lambdas captures. 9582 MEHandler.adjustMemberOfForLambdaCaptures( 9583 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 9584 CombinedInfo.Types); 9585 // Map any list items in a map clause that were not captures because they 9586 // weren't referenced within the construct. 9587 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 9588 9589 TargetDataInfo Info; 9590 // Fill up the arrays and create the arguments. 9591 emitOffloadingArrays(CGF, CombinedInfo, Info); 9592 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9593 Info.PointersArray, Info.SizesArray, 9594 Info.MapTypesArray, Info.MappersArray, Info); 9595 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9596 InputInfo.BasePointersArray = 9597 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9598 InputInfo.PointersArray = 9599 Address(Info.PointersArray, CGM.getPointerAlign()); 9600 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9601 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 9602 MapTypesArray = Info.MapTypesArray; 9603 if (RequiresOuterTask) 9604 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9605 else 9606 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9607 }; 9608 9609 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9610 CodeGenFunction &CGF, PrePostActionTy &) { 9611 if (RequiresOuterTask) { 9612 CodeGenFunction::OMPTargetDataInfo InputInfo; 9613 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9614 } else { 9615 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9616 } 9617 }; 9618 9619 // If we have a target function ID it means that we need to support 9620 // offloading, otherwise, just execute on the host. We need to execute on host 9621 // regardless of the conditional in the if clause if, e.g., the user do not 9622 // specify target triples. 9623 if (OutlinedFnID) { 9624 if (IfCond) { 9625 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9626 } else { 9627 RegionCodeGenTy ThenRCG(TargetThenGen); 9628 ThenRCG(CGF); 9629 } 9630 } else { 9631 RegionCodeGenTy ElseRCG(TargetElseGen); 9632 ElseRCG(CGF); 9633 } 9634 } 9635 9636 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9637 StringRef ParentName) { 9638 if (!S) 9639 return; 9640 9641 // Codegen OMP target directives that offload compute to the device. 9642 bool RequiresDeviceCodegen = 9643 isa<OMPExecutableDirective>(S) && 9644 isOpenMPTargetExecutionDirective( 9645 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9646 9647 if (RequiresDeviceCodegen) { 9648 const auto &E = *cast<OMPExecutableDirective>(S); 9649 unsigned DeviceID; 9650 unsigned FileID; 9651 unsigned Line; 9652 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9653 FileID, Line); 9654 9655 // Is this a target region that should not be emitted as an entry point? If 9656 // so just signal we are done with this target region. 9657 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9658 ParentName, Line)) 9659 return; 9660 9661 switch (E.getDirectiveKind()) { 9662 case OMPD_target: 9663 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9664 cast<OMPTargetDirective>(E)); 9665 break; 9666 case OMPD_target_parallel: 9667 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9668 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9669 break; 9670 case OMPD_target_teams: 9671 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9672 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9673 break; 9674 case OMPD_target_teams_distribute: 9675 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9676 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9677 break; 9678 case OMPD_target_teams_distribute_simd: 9679 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9680 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9681 break; 9682 case OMPD_target_parallel_for: 9683 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9684 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9685 break; 9686 case OMPD_target_parallel_for_simd: 9687 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9688 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9689 break; 9690 case OMPD_target_simd: 9691 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9692 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9693 break; 9694 case OMPD_target_teams_distribute_parallel_for: 9695 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9696 CGM, ParentName, 9697 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9698 break; 9699 case OMPD_target_teams_distribute_parallel_for_simd: 9700 CodeGenFunction:: 9701 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9702 CGM, ParentName, 9703 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9704 break; 9705 case OMPD_parallel: 9706 case OMPD_for: 9707 case OMPD_parallel_for: 9708 case OMPD_parallel_master: 9709 case OMPD_parallel_sections: 9710 case OMPD_for_simd: 9711 case OMPD_parallel_for_simd: 9712 case OMPD_cancel: 9713 case OMPD_cancellation_point: 9714 case OMPD_ordered: 9715 case OMPD_threadprivate: 9716 case OMPD_allocate: 9717 case OMPD_task: 9718 case OMPD_simd: 9719 case OMPD_sections: 9720 case OMPD_section: 9721 case OMPD_single: 9722 case OMPD_master: 9723 case OMPD_critical: 9724 case OMPD_taskyield: 9725 case OMPD_barrier: 9726 case OMPD_taskwait: 9727 case OMPD_taskgroup: 9728 case OMPD_atomic: 9729 case OMPD_flush: 9730 case OMPD_depobj: 9731 case OMPD_scan: 9732 case OMPD_teams: 9733 case OMPD_target_data: 9734 case OMPD_target_exit_data: 9735 case OMPD_target_enter_data: 9736 case OMPD_distribute: 9737 case OMPD_distribute_simd: 9738 case OMPD_distribute_parallel_for: 9739 case OMPD_distribute_parallel_for_simd: 9740 case OMPD_teams_distribute: 9741 case OMPD_teams_distribute_simd: 9742 case OMPD_teams_distribute_parallel_for: 9743 case OMPD_teams_distribute_parallel_for_simd: 9744 case OMPD_target_update: 9745 case OMPD_declare_simd: 9746 case OMPD_declare_variant: 9747 case OMPD_begin_declare_variant: 9748 case OMPD_end_declare_variant: 9749 case OMPD_declare_target: 9750 case OMPD_end_declare_target: 9751 case OMPD_declare_reduction: 9752 case OMPD_declare_mapper: 9753 case OMPD_taskloop: 9754 case OMPD_taskloop_simd: 9755 case OMPD_master_taskloop: 9756 case OMPD_master_taskloop_simd: 9757 case OMPD_parallel_master_taskloop: 9758 case OMPD_parallel_master_taskloop_simd: 9759 case OMPD_requires: 9760 case OMPD_unknown: 9761 default: 9762 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9763 } 9764 return; 9765 } 9766 9767 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9768 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9769 return; 9770 9771 scanForTargetRegionsFunctions( 9772 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9773 return; 9774 } 9775 9776 // If this is a lambda function, look into its body. 9777 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9778 S = L->getBody(); 9779 9780 // Keep looking for target regions recursively. 9781 for (const Stmt *II : S->children()) 9782 scanForTargetRegionsFunctions(II, ParentName); 9783 } 9784 9785 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9786 // If emitting code for the host, we do not process FD here. Instead we do 9787 // the normal code generation. 9788 if (!CGM.getLangOpts().OpenMPIsDevice) { 9789 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9790 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9791 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9792 // Do not emit device_type(nohost) functions for the host. 9793 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9794 return true; 9795 } 9796 return false; 9797 } 9798 9799 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9800 // Try to detect target regions in the function. 9801 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9802 StringRef Name = CGM.getMangledName(GD); 9803 scanForTargetRegionsFunctions(FD->getBody(), Name); 9804 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9805 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9806 // Do not emit device_type(nohost) functions for the host. 9807 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9808 return true; 9809 } 9810 9811 // Do not to emit function if it is not marked as declare target. 9812 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9813 AlreadyEmittedTargetDecls.count(VD) == 0; 9814 } 9815 9816 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9817 if (!CGM.getLangOpts().OpenMPIsDevice) 9818 return false; 9819 9820 // Check if there are Ctors/Dtors in this declaration and look for target 9821 // regions in it. We use the complete variant to produce the kernel name 9822 // mangling. 9823 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9824 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9825 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9826 StringRef ParentName = 9827 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9828 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9829 } 9830 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9831 StringRef ParentName = 9832 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9833 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9834 } 9835 } 9836 9837 // Do not to emit variable if it is not marked as declare target. 9838 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9839 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9840 cast<VarDecl>(GD.getDecl())); 9841 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9842 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9843 HasRequiresUnifiedSharedMemory)) { 9844 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9845 return true; 9846 } 9847 return false; 9848 } 9849 9850 llvm::Constant * 9851 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9852 const VarDecl *VD) { 9853 assert(VD->getType().isConstant(CGM.getContext()) && 9854 "Expected constant variable."); 9855 StringRef VarName; 9856 llvm::Constant *Addr; 9857 llvm::GlobalValue::LinkageTypes Linkage; 9858 QualType Ty = VD->getType(); 9859 SmallString<128> Buffer; 9860 { 9861 unsigned DeviceID; 9862 unsigned FileID; 9863 unsigned Line; 9864 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9865 FileID, Line); 9866 llvm::raw_svector_ostream OS(Buffer); 9867 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9868 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9869 VarName = OS.str(); 9870 } 9871 Linkage = llvm::GlobalValue::InternalLinkage; 9872 Addr = 9873 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9874 getDefaultFirstprivateAddressSpace()); 9875 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9876 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9877 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9878 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9879 VarName, Addr, VarSize, 9880 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9881 return Addr; 9882 } 9883 9884 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9885 llvm::Constant *Addr) { 9886 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9887 !CGM.getLangOpts().OpenMPIsDevice) 9888 return; 9889 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9890 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9891 if (!Res) { 9892 if (CGM.getLangOpts().OpenMPIsDevice) { 9893 // Register non-target variables being emitted in device code (debug info 9894 // may cause this). 9895 StringRef VarName = CGM.getMangledName(VD); 9896 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9897 } 9898 return; 9899 } 9900 // Register declare target variables. 9901 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9902 StringRef VarName; 9903 CharUnits VarSize; 9904 llvm::GlobalValue::LinkageTypes Linkage; 9905 9906 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9907 !HasRequiresUnifiedSharedMemory) { 9908 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9909 VarName = CGM.getMangledName(VD); 9910 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9911 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9912 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9913 } else { 9914 VarSize = CharUnits::Zero(); 9915 } 9916 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9917 // Temp solution to prevent optimizations of the internal variables. 9918 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9919 std::string RefName = getName({VarName, "ref"}); 9920 if (!CGM.GetGlobalValue(RefName)) { 9921 llvm::Constant *AddrRef = 9922 getOrCreateInternalVariable(Addr->getType(), RefName); 9923 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9924 GVAddrRef->setConstant(/*Val=*/true); 9925 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9926 GVAddrRef->setInitializer(Addr); 9927 CGM.addCompilerUsedGlobal(GVAddrRef); 9928 } 9929 } 9930 } else { 9931 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9932 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9933 HasRequiresUnifiedSharedMemory)) && 9934 "Declare target attribute must link or to with unified memory."); 9935 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9936 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9937 else 9938 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9939 9940 if (CGM.getLangOpts().OpenMPIsDevice) { 9941 VarName = Addr->getName(); 9942 Addr = nullptr; 9943 } else { 9944 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9945 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9946 } 9947 VarSize = CGM.getPointerSize(); 9948 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9949 } 9950 9951 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9952 VarName, Addr, VarSize, Flags, Linkage); 9953 } 9954 9955 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9956 if (isa<FunctionDecl>(GD.getDecl()) || 9957 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9958 return emitTargetFunctions(GD); 9959 9960 return emitTargetGlobalVariable(GD); 9961 } 9962 9963 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9964 for (const VarDecl *VD : DeferredGlobalVariables) { 9965 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9966 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9967 if (!Res) 9968 continue; 9969 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9970 !HasRequiresUnifiedSharedMemory) { 9971 CGM.EmitGlobal(VD); 9972 } else { 9973 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9974 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9975 HasRequiresUnifiedSharedMemory)) && 9976 "Expected link clause or to clause with unified memory."); 9977 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9978 } 9979 } 9980 } 9981 9982 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9983 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9984 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9985 " Expected target-based directive."); 9986 } 9987 9988 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9989 for (const OMPClause *Clause : D->clauselists()) { 9990 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9991 HasRequiresUnifiedSharedMemory = true; 9992 } else if (const auto *AC = 9993 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9994 switch (AC->getAtomicDefaultMemOrderKind()) { 9995 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9996 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9997 break; 9998 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9999 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10000 break; 10001 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10002 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10003 break; 10004 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10005 break; 10006 } 10007 } 10008 } 10009 } 10010 10011 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10012 return RequiresAtomicOrdering; 10013 } 10014 10015 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10016 LangAS &AS) { 10017 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10018 return false; 10019 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10020 switch(A->getAllocatorType()) { 10021 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10022 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10023 // Not supported, fallback to the default mem space. 10024 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10025 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10026 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10027 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10028 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10029 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10030 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10031 AS = LangAS::Default; 10032 return true; 10033 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10034 llvm_unreachable("Expected predefined allocator for the variables with the " 10035 "static storage."); 10036 } 10037 return false; 10038 } 10039 10040 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10041 return HasRequiresUnifiedSharedMemory; 10042 } 10043 10044 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10045 CodeGenModule &CGM) 10046 : CGM(CGM) { 10047 if (CGM.getLangOpts().OpenMPIsDevice) { 10048 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10049 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10050 } 10051 } 10052 10053 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10054 if (CGM.getLangOpts().OpenMPIsDevice) 10055 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10056 } 10057 10058 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10059 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10060 return true; 10061 10062 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10063 // Do not to emit function if it is marked as declare target as it was already 10064 // emitted. 10065 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10066 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10067 if (auto *F = dyn_cast_or_null<llvm::Function>( 10068 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10069 return !F->isDeclaration(); 10070 return false; 10071 } 10072 return true; 10073 } 10074 10075 return !AlreadyEmittedTargetDecls.insert(D).second; 10076 } 10077 10078 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10079 // If we don't have entries or if we are emitting code for the device, we 10080 // don't need to do anything. 10081 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10082 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10083 (OffloadEntriesInfoManager.empty() && 10084 !HasEmittedDeclareTargetRegion && 10085 !HasEmittedTargetRegion)) 10086 return nullptr; 10087 10088 // Create and register the function that handles the requires directives. 10089 ASTContext &C = CGM.getContext(); 10090 10091 llvm::Function *RequiresRegFn; 10092 { 10093 CodeGenFunction CGF(CGM); 10094 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10095 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10096 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10097 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10098 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10099 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10100 // TODO: check for other requires clauses. 10101 // The requires directive takes effect only when a target region is 10102 // present in the compilation unit. Otherwise it is ignored and not 10103 // passed to the runtime. This avoids the runtime from throwing an error 10104 // for mismatching requires clauses across compilation units that don't 10105 // contain at least 1 target region. 10106 assert((HasEmittedTargetRegion || 10107 HasEmittedDeclareTargetRegion || 10108 !OffloadEntriesInfoManager.empty()) && 10109 "Target or declare target region expected."); 10110 if (HasRequiresUnifiedSharedMemory) 10111 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10112 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10113 CGM.getModule(), OMPRTL___tgt_register_requires), 10114 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10115 CGF.FinishFunction(); 10116 } 10117 return RequiresRegFn; 10118 } 10119 10120 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10121 const OMPExecutableDirective &D, 10122 SourceLocation Loc, 10123 llvm::Function *OutlinedFn, 10124 ArrayRef<llvm::Value *> CapturedVars) { 10125 if (!CGF.HaveInsertPoint()) 10126 return; 10127 10128 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10129 CodeGenFunction::RunCleanupsScope Scope(CGF); 10130 10131 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10132 llvm::Value *Args[] = { 10133 RTLoc, 10134 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10135 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10136 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10137 RealArgs.append(std::begin(Args), std::end(Args)); 10138 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10139 10140 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10141 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10142 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10143 } 10144 10145 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10146 const Expr *NumTeams, 10147 const Expr *ThreadLimit, 10148 SourceLocation Loc) { 10149 if (!CGF.HaveInsertPoint()) 10150 return; 10151 10152 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10153 10154 llvm::Value *NumTeamsVal = 10155 NumTeams 10156 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10157 CGF.CGM.Int32Ty, /* isSigned = */ true) 10158 : CGF.Builder.getInt32(0); 10159 10160 llvm::Value *ThreadLimitVal = 10161 ThreadLimit 10162 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10163 CGF.CGM.Int32Ty, /* isSigned = */ true) 10164 : CGF.Builder.getInt32(0); 10165 10166 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10167 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10168 ThreadLimitVal}; 10169 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10170 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10171 PushNumTeamsArgs); 10172 } 10173 10174 void CGOpenMPRuntime::emitTargetDataCalls( 10175 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10176 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10177 if (!CGF.HaveInsertPoint()) 10178 return; 10179 10180 // Action used to replace the default codegen action and turn privatization 10181 // off. 10182 PrePostActionTy NoPrivAction; 10183 10184 // Generate the code for the opening of the data environment. Capture all the 10185 // arguments of the runtime call by reference because they are used in the 10186 // closing of the region. 10187 auto &&BeginThenGen = [this, &D, Device, &Info, 10188 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10189 // Fill up the arrays with all the mapped variables. 10190 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10191 10192 // Get map clause information. 10193 MappableExprsHandler MEHandler(D, CGF); 10194 MEHandler.generateAllInfo(CombinedInfo); 10195 10196 // Fill up the arrays and create the arguments. 10197 emitOffloadingArrays(CGF, CombinedInfo, Info); 10198 10199 llvm::Value *BasePointersArrayArg = nullptr; 10200 llvm::Value *PointersArrayArg = nullptr; 10201 llvm::Value *SizesArrayArg = nullptr; 10202 llvm::Value *MapTypesArrayArg = nullptr; 10203 llvm::Value *MappersArrayArg = nullptr; 10204 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10205 SizesArrayArg, MapTypesArrayArg, 10206 MappersArrayArg, Info, /*ForEndCall=*/false); 10207 10208 // Emit device ID if any. 10209 llvm::Value *DeviceID = nullptr; 10210 if (Device) { 10211 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10212 CGF.Int64Ty, /*isSigned=*/true); 10213 } else { 10214 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10215 } 10216 10217 // Emit the number of elements in the offloading arrays. 10218 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10219 10220 llvm::Value *OffloadingArgs[] = { 10221 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10222 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10223 CGF.EmitRuntimeCall( 10224 OMPBuilder.getOrCreateRuntimeFunction( 10225 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10226 OffloadingArgs); 10227 10228 // If device pointer privatization is required, emit the body of the region 10229 // here. It will have to be duplicated: with and without privatization. 10230 if (!Info.CaptureDeviceAddrMap.empty()) 10231 CodeGen(CGF); 10232 }; 10233 10234 // Generate code for the closing of the data region. 10235 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10236 PrePostActionTy &) { 10237 assert(Info.isValid() && "Invalid data environment closing arguments."); 10238 10239 llvm::Value *BasePointersArrayArg = nullptr; 10240 llvm::Value *PointersArrayArg = nullptr; 10241 llvm::Value *SizesArrayArg = nullptr; 10242 llvm::Value *MapTypesArrayArg = nullptr; 10243 llvm::Value *MappersArrayArg = nullptr; 10244 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10245 SizesArrayArg, MapTypesArrayArg, 10246 MappersArrayArg, Info, /*ForEndCall=*/true); 10247 10248 // Emit device ID if any. 10249 llvm::Value *DeviceID = nullptr; 10250 if (Device) { 10251 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10252 CGF.Int64Ty, /*isSigned=*/true); 10253 } else { 10254 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10255 } 10256 10257 // Emit the number of elements in the offloading arrays. 10258 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10259 10260 llvm::Value *OffloadingArgs[] = { 10261 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10262 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10263 CGF.EmitRuntimeCall( 10264 OMPBuilder.getOrCreateRuntimeFunction( 10265 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10266 OffloadingArgs); 10267 }; 10268 10269 // If we need device pointer privatization, we need to emit the body of the 10270 // region with no privatization in the 'else' branch of the conditional. 10271 // Otherwise, we don't have to do anything. 10272 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10273 PrePostActionTy &) { 10274 if (!Info.CaptureDeviceAddrMap.empty()) { 10275 CodeGen.setAction(NoPrivAction); 10276 CodeGen(CGF); 10277 } 10278 }; 10279 10280 // We don't have to do anything to close the region if the if clause evaluates 10281 // to false. 10282 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10283 10284 if (IfCond) { 10285 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10286 } else { 10287 RegionCodeGenTy RCG(BeginThenGen); 10288 RCG(CGF); 10289 } 10290 10291 // If we don't require privatization of device pointers, we emit the body in 10292 // between the runtime calls. This avoids duplicating the body code. 10293 if (Info.CaptureDeviceAddrMap.empty()) { 10294 CodeGen.setAction(NoPrivAction); 10295 CodeGen(CGF); 10296 } 10297 10298 if (IfCond) { 10299 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10300 } else { 10301 RegionCodeGenTy RCG(EndThenGen); 10302 RCG(CGF); 10303 } 10304 } 10305 10306 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10307 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10308 const Expr *Device) { 10309 if (!CGF.HaveInsertPoint()) 10310 return; 10311 10312 assert((isa<OMPTargetEnterDataDirective>(D) || 10313 isa<OMPTargetExitDataDirective>(D) || 10314 isa<OMPTargetUpdateDirective>(D)) && 10315 "Expecting either target enter, exit data, or update directives."); 10316 10317 CodeGenFunction::OMPTargetDataInfo InputInfo; 10318 llvm::Value *MapTypesArray = nullptr; 10319 // Generate the code for the opening of the data environment. 10320 auto &&ThenGen = [this, &D, Device, &InputInfo, 10321 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10322 // Emit device ID if any. 10323 llvm::Value *DeviceID = nullptr; 10324 if (Device) { 10325 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10326 CGF.Int64Ty, /*isSigned=*/true); 10327 } else { 10328 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10329 } 10330 10331 // Emit the number of elements in the offloading arrays. 10332 llvm::Constant *PointerNum = 10333 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10334 10335 llvm::Value *OffloadingArgs[] = {DeviceID, 10336 PointerNum, 10337 InputInfo.BasePointersArray.getPointer(), 10338 InputInfo.PointersArray.getPointer(), 10339 InputInfo.SizesArray.getPointer(), 10340 MapTypesArray, 10341 InputInfo.MappersArray.getPointer()}; 10342 10343 // Select the right runtime function call for each standalone 10344 // directive. 10345 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10346 RuntimeFunction RTLFn; 10347 switch (D.getDirectiveKind()) { 10348 case OMPD_target_enter_data: 10349 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10350 : OMPRTL___tgt_target_data_begin_mapper; 10351 break; 10352 case OMPD_target_exit_data: 10353 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10354 : OMPRTL___tgt_target_data_end_mapper; 10355 break; 10356 case OMPD_target_update: 10357 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10358 : OMPRTL___tgt_target_data_update_mapper; 10359 break; 10360 case OMPD_parallel: 10361 case OMPD_for: 10362 case OMPD_parallel_for: 10363 case OMPD_parallel_master: 10364 case OMPD_parallel_sections: 10365 case OMPD_for_simd: 10366 case OMPD_parallel_for_simd: 10367 case OMPD_cancel: 10368 case OMPD_cancellation_point: 10369 case OMPD_ordered: 10370 case OMPD_threadprivate: 10371 case OMPD_allocate: 10372 case OMPD_task: 10373 case OMPD_simd: 10374 case OMPD_sections: 10375 case OMPD_section: 10376 case OMPD_single: 10377 case OMPD_master: 10378 case OMPD_critical: 10379 case OMPD_taskyield: 10380 case OMPD_barrier: 10381 case OMPD_taskwait: 10382 case OMPD_taskgroup: 10383 case OMPD_atomic: 10384 case OMPD_flush: 10385 case OMPD_depobj: 10386 case OMPD_scan: 10387 case OMPD_teams: 10388 case OMPD_target_data: 10389 case OMPD_distribute: 10390 case OMPD_distribute_simd: 10391 case OMPD_distribute_parallel_for: 10392 case OMPD_distribute_parallel_for_simd: 10393 case OMPD_teams_distribute: 10394 case OMPD_teams_distribute_simd: 10395 case OMPD_teams_distribute_parallel_for: 10396 case OMPD_teams_distribute_parallel_for_simd: 10397 case OMPD_declare_simd: 10398 case OMPD_declare_variant: 10399 case OMPD_begin_declare_variant: 10400 case OMPD_end_declare_variant: 10401 case OMPD_declare_target: 10402 case OMPD_end_declare_target: 10403 case OMPD_declare_reduction: 10404 case OMPD_declare_mapper: 10405 case OMPD_taskloop: 10406 case OMPD_taskloop_simd: 10407 case OMPD_master_taskloop: 10408 case OMPD_master_taskloop_simd: 10409 case OMPD_parallel_master_taskloop: 10410 case OMPD_parallel_master_taskloop_simd: 10411 case OMPD_target: 10412 case OMPD_target_simd: 10413 case OMPD_target_teams_distribute: 10414 case OMPD_target_teams_distribute_simd: 10415 case OMPD_target_teams_distribute_parallel_for: 10416 case OMPD_target_teams_distribute_parallel_for_simd: 10417 case OMPD_target_teams: 10418 case OMPD_target_parallel: 10419 case OMPD_target_parallel_for: 10420 case OMPD_target_parallel_for_simd: 10421 case OMPD_requires: 10422 case OMPD_unknown: 10423 default: 10424 llvm_unreachable("Unexpected standalone target data directive."); 10425 break; 10426 } 10427 CGF.EmitRuntimeCall( 10428 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10429 OffloadingArgs); 10430 }; 10431 10432 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10433 CodeGenFunction &CGF, PrePostActionTy &) { 10434 // Fill up the arrays with all the mapped variables. 10435 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10436 10437 // Get map clause information. 10438 MappableExprsHandler MEHandler(D, CGF); 10439 MEHandler.generateAllInfo(CombinedInfo); 10440 10441 TargetDataInfo Info; 10442 // Fill up the arrays and create the arguments. 10443 emitOffloadingArrays(CGF, CombinedInfo, Info); 10444 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10445 Info.PointersArray, Info.SizesArray, 10446 Info.MapTypesArray, Info.MappersArray, Info); 10447 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10448 InputInfo.BasePointersArray = 10449 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10450 InputInfo.PointersArray = 10451 Address(Info.PointersArray, CGM.getPointerAlign()); 10452 InputInfo.SizesArray = 10453 Address(Info.SizesArray, CGM.getPointerAlign()); 10454 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10455 MapTypesArray = Info.MapTypesArray; 10456 if (D.hasClausesOfKind<OMPDependClause>()) 10457 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10458 else 10459 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10460 }; 10461 10462 if (IfCond) { 10463 emitIfClause(CGF, IfCond, TargetThenGen, 10464 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10465 } else { 10466 RegionCodeGenTy ThenRCG(TargetThenGen); 10467 ThenRCG(CGF); 10468 } 10469 } 10470 10471 namespace { 10472 /// Kind of parameter in a function with 'declare simd' directive. 10473 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10474 /// Attribute set of the parameter. 10475 struct ParamAttrTy { 10476 ParamKindTy Kind = Vector; 10477 llvm::APSInt StrideOrArg; 10478 llvm::APSInt Alignment; 10479 }; 10480 } // namespace 10481 10482 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10483 ArrayRef<ParamAttrTy> ParamAttrs) { 10484 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10485 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10486 // of that clause. The VLEN value must be power of 2. 10487 // In other case the notion of the function`s "characteristic data type" (CDT) 10488 // is used to compute the vector length. 10489 // CDT is defined in the following order: 10490 // a) For non-void function, the CDT is the return type. 10491 // b) If the function has any non-uniform, non-linear parameters, then the 10492 // CDT is the type of the first such parameter. 10493 // c) If the CDT determined by a) or b) above is struct, union, or class 10494 // type which is pass-by-value (except for the type that maps to the 10495 // built-in complex data type), the characteristic data type is int. 10496 // d) If none of the above three cases is applicable, the CDT is int. 10497 // The VLEN is then determined based on the CDT and the size of vector 10498 // register of that ISA for which current vector version is generated. The 10499 // VLEN is computed using the formula below: 10500 // VLEN = sizeof(vector_register) / sizeof(CDT), 10501 // where vector register size specified in section 3.2.1 Registers and the 10502 // Stack Frame of original AMD64 ABI document. 10503 QualType RetType = FD->getReturnType(); 10504 if (RetType.isNull()) 10505 return 0; 10506 ASTContext &C = FD->getASTContext(); 10507 QualType CDT; 10508 if (!RetType.isNull() && !RetType->isVoidType()) { 10509 CDT = RetType; 10510 } else { 10511 unsigned Offset = 0; 10512 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10513 if (ParamAttrs[Offset].Kind == Vector) 10514 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10515 ++Offset; 10516 } 10517 if (CDT.isNull()) { 10518 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10519 if (ParamAttrs[I + Offset].Kind == Vector) { 10520 CDT = FD->getParamDecl(I)->getType(); 10521 break; 10522 } 10523 } 10524 } 10525 } 10526 if (CDT.isNull()) 10527 CDT = C.IntTy; 10528 CDT = CDT->getCanonicalTypeUnqualified(); 10529 if (CDT->isRecordType() || CDT->isUnionType()) 10530 CDT = C.IntTy; 10531 return C.getTypeSize(CDT); 10532 } 10533 10534 static void 10535 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10536 const llvm::APSInt &VLENVal, 10537 ArrayRef<ParamAttrTy> ParamAttrs, 10538 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10539 struct ISADataTy { 10540 char ISA; 10541 unsigned VecRegSize; 10542 }; 10543 ISADataTy ISAData[] = { 10544 { 10545 'b', 128 10546 }, // SSE 10547 { 10548 'c', 256 10549 }, // AVX 10550 { 10551 'd', 256 10552 }, // AVX2 10553 { 10554 'e', 512 10555 }, // AVX512 10556 }; 10557 llvm::SmallVector<char, 2> Masked; 10558 switch (State) { 10559 case OMPDeclareSimdDeclAttr::BS_Undefined: 10560 Masked.push_back('N'); 10561 Masked.push_back('M'); 10562 break; 10563 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10564 Masked.push_back('N'); 10565 break; 10566 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10567 Masked.push_back('M'); 10568 break; 10569 } 10570 for (char Mask : Masked) { 10571 for (const ISADataTy &Data : ISAData) { 10572 SmallString<256> Buffer; 10573 llvm::raw_svector_ostream Out(Buffer); 10574 Out << "_ZGV" << Data.ISA << Mask; 10575 if (!VLENVal) { 10576 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10577 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10578 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10579 } else { 10580 Out << VLENVal; 10581 } 10582 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10583 switch (ParamAttr.Kind){ 10584 case LinearWithVarStride: 10585 Out << 's' << ParamAttr.StrideOrArg; 10586 break; 10587 case Linear: 10588 Out << 'l'; 10589 if (ParamAttr.StrideOrArg != 1) 10590 Out << ParamAttr.StrideOrArg; 10591 break; 10592 case Uniform: 10593 Out << 'u'; 10594 break; 10595 case Vector: 10596 Out << 'v'; 10597 break; 10598 } 10599 if (!!ParamAttr.Alignment) 10600 Out << 'a' << ParamAttr.Alignment; 10601 } 10602 Out << '_' << Fn->getName(); 10603 Fn->addFnAttr(Out.str()); 10604 } 10605 } 10606 } 10607 10608 // This are the Functions that are needed to mangle the name of the 10609 // vector functions generated by the compiler, according to the rules 10610 // defined in the "Vector Function ABI specifications for AArch64", 10611 // available at 10612 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10613 10614 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10615 /// 10616 /// TODO: Need to implement the behavior for reference marked with a 10617 /// var or no linear modifiers (1.b in the section). For this, we 10618 /// need to extend ParamKindTy to support the linear modifiers. 10619 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10620 QT = QT.getCanonicalType(); 10621 10622 if (QT->isVoidType()) 10623 return false; 10624 10625 if (Kind == ParamKindTy::Uniform) 10626 return false; 10627 10628 if (Kind == ParamKindTy::Linear) 10629 return false; 10630 10631 // TODO: Handle linear references with modifiers 10632 10633 if (Kind == ParamKindTy::LinearWithVarStride) 10634 return false; 10635 10636 return true; 10637 } 10638 10639 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10640 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10641 QT = QT.getCanonicalType(); 10642 unsigned Size = C.getTypeSize(QT); 10643 10644 // Only scalars and complex within 16 bytes wide set PVB to true. 10645 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10646 return false; 10647 10648 if (QT->isFloatingType()) 10649 return true; 10650 10651 if (QT->isIntegerType()) 10652 return true; 10653 10654 if (QT->isPointerType()) 10655 return true; 10656 10657 // TODO: Add support for complex types (section 3.1.2, item 2). 10658 10659 return false; 10660 } 10661 10662 /// Computes the lane size (LS) of a return type or of an input parameter, 10663 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10664 /// TODO: Add support for references, section 3.2.1, item 1. 10665 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10666 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10667 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10668 if (getAArch64PBV(PTy, C)) 10669 return C.getTypeSize(PTy); 10670 } 10671 if (getAArch64PBV(QT, C)) 10672 return C.getTypeSize(QT); 10673 10674 return C.getTypeSize(C.getUIntPtrType()); 10675 } 10676 10677 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10678 // signature of the scalar function, as defined in 3.2.2 of the 10679 // AAVFABI. 10680 static std::tuple<unsigned, unsigned, bool> 10681 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10682 QualType RetType = FD->getReturnType().getCanonicalType(); 10683 10684 ASTContext &C = FD->getASTContext(); 10685 10686 bool OutputBecomesInput = false; 10687 10688 llvm::SmallVector<unsigned, 8> Sizes; 10689 if (!RetType->isVoidType()) { 10690 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10691 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10692 OutputBecomesInput = true; 10693 } 10694 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10695 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10696 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10697 } 10698 10699 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10700 // The LS of a function parameter / return value can only be a power 10701 // of 2, starting from 8 bits, up to 128. 10702 assert(std::all_of(Sizes.begin(), Sizes.end(), 10703 [](unsigned Size) { 10704 return Size == 8 || Size == 16 || Size == 32 || 10705 Size == 64 || Size == 128; 10706 }) && 10707 "Invalid size"); 10708 10709 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10710 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10711 OutputBecomesInput); 10712 } 10713 10714 /// Mangle the parameter part of the vector function name according to 10715 /// their OpenMP classification. The mangling function is defined in 10716 /// section 3.5 of the AAVFABI. 10717 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10718 SmallString<256> Buffer; 10719 llvm::raw_svector_ostream Out(Buffer); 10720 for (const auto &ParamAttr : ParamAttrs) { 10721 switch (ParamAttr.Kind) { 10722 case LinearWithVarStride: 10723 Out << "ls" << ParamAttr.StrideOrArg; 10724 break; 10725 case Linear: 10726 Out << 'l'; 10727 // Don't print the step value if it is not present or if it is 10728 // equal to 1. 10729 if (ParamAttr.StrideOrArg != 1) 10730 Out << ParamAttr.StrideOrArg; 10731 break; 10732 case Uniform: 10733 Out << 'u'; 10734 break; 10735 case Vector: 10736 Out << 'v'; 10737 break; 10738 } 10739 10740 if (!!ParamAttr.Alignment) 10741 Out << 'a' << ParamAttr.Alignment; 10742 } 10743 10744 return std::string(Out.str()); 10745 } 10746 10747 // Function used to add the attribute. The parameter `VLEN` is 10748 // templated to allow the use of "x" when targeting scalable functions 10749 // for SVE. 10750 template <typename T> 10751 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10752 char ISA, StringRef ParSeq, 10753 StringRef MangledName, bool OutputBecomesInput, 10754 llvm::Function *Fn) { 10755 SmallString<256> Buffer; 10756 llvm::raw_svector_ostream Out(Buffer); 10757 Out << Prefix << ISA << LMask << VLEN; 10758 if (OutputBecomesInput) 10759 Out << "v"; 10760 Out << ParSeq << "_" << MangledName; 10761 Fn->addFnAttr(Out.str()); 10762 } 10763 10764 // Helper function to generate the Advanced SIMD names depending on 10765 // the value of the NDS when simdlen is not present. 10766 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10767 StringRef Prefix, char ISA, 10768 StringRef ParSeq, StringRef MangledName, 10769 bool OutputBecomesInput, 10770 llvm::Function *Fn) { 10771 switch (NDS) { 10772 case 8: 10773 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10774 OutputBecomesInput, Fn); 10775 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10776 OutputBecomesInput, Fn); 10777 break; 10778 case 16: 10779 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10780 OutputBecomesInput, Fn); 10781 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10782 OutputBecomesInput, Fn); 10783 break; 10784 case 32: 10785 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10786 OutputBecomesInput, Fn); 10787 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10788 OutputBecomesInput, Fn); 10789 break; 10790 case 64: 10791 case 128: 10792 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10793 OutputBecomesInput, Fn); 10794 break; 10795 default: 10796 llvm_unreachable("Scalar type is too wide."); 10797 } 10798 } 10799 10800 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10801 static void emitAArch64DeclareSimdFunction( 10802 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10803 ArrayRef<ParamAttrTy> ParamAttrs, 10804 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10805 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10806 10807 // Get basic data for building the vector signature. 10808 const auto Data = getNDSWDS(FD, ParamAttrs); 10809 const unsigned NDS = std::get<0>(Data); 10810 const unsigned WDS = std::get<1>(Data); 10811 const bool OutputBecomesInput = std::get<2>(Data); 10812 10813 // Check the values provided via `simdlen` by the user. 10814 // 1. A `simdlen(1)` doesn't produce vector signatures, 10815 if (UserVLEN == 1) { 10816 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10817 DiagnosticsEngine::Warning, 10818 "The clause simdlen(1) has no effect when targeting aarch64."); 10819 CGM.getDiags().Report(SLoc, DiagID); 10820 return; 10821 } 10822 10823 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10824 // Advanced SIMD output. 10825 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10826 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10827 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10828 "power of 2 when targeting Advanced SIMD."); 10829 CGM.getDiags().Report(SLoc, DiagID); 10830 return; 10831 } 10832 10833 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10834 // limits. 10835 if (ISA == 's' && UserVLEN != 0) { 10836 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10837 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10838 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10839 "lanes in the architectural constraints " 10840 "for SVE (min is 128-bit, max is " 10841 "2048-bit, by steps of 128-bit)"); 10842 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10843 return; 10844 } 10845 } 10846 10847 // Sort out parameter sequence. 10848 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10849 StringRef Prefix = "_ZGV"; 10850 // Generate simdlen from user input (if any). 10851 if (UserVLEN) { 10852 if (ISA == 's') { 10853 // SVE generates only a masked function. 10854 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10855 OutputBecomesInput, Fn); 10856 } else { 10857 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10858 // Advanced SIMD generates one or two functions, depending on 10859 // the `[not]inbranch` clause. 10860 switch (State) { 10861 case OMPDeclareSimdDeclAttr::BS_Undefined: 10862 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10863 OutputBecomesInput, Fn); 10864 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10865 OutputBecomesInput, Fn); 10866 break; 10867 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10868 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10869 OutputBecomesInput, Fn); 10870 break; 10871 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10872 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10873 OutputBecomesInput, Fn); 10874 break; 10875 } 10876 } 10877 } else { 10878 // If no user simdlen is provided, follow the AAVFABI rules for 10879 // generating the vector length. 10880 if (ISA == 's') { 10881 // SVE, section 3.4.1, item 1. 10882 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10883 OutputBecomesInput, Fn); 10884 } else { 10885 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10886 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10887 // two vector names depending on the use of the clause 10888 // `[not]inbranch`. 10889 switch (State) { 10890 case OMPDeclareSimdDeclAttr::BS_Undefined: 10891 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10892 OutputBecomesInput, Fn); 10893 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10894 OutputBecomesInput, Fn); 10895 break; 10896 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10897 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10898 OutputBecomesInput, Fn); 10899 break; 10900 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10901 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10902 OutputBecomesInput, Fn); 10903 break; 10904 } 10905 } 10906 } 10907 } 10908 10909 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10910 llvm::Function *Fn) { 10911 ASTContext &C = CGM.getContext(); 10912 FD = FD->getMostRecentDecl(); 10913 // Map params to their positions in function decl. 10914 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10915 if (isa<CXXMethodDecl>(FD)) 10916 ParamPositions.try_emplace(FD, 0); 10917 unsigned ParamPos = ParamPositions.size(); 10918 for (const ParmVarDecl *P : FD->parameters()) { 10919 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10920 ++ParamPos; 10921 } 10922 while (FD) { 10923 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10924 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10925 // Mark uniform parameters. 10926 for (const Expr *E : Attr->uniforms()) { 10927 E = E->IgnoreParenImpCasts(); 10928 unsigned Pos; 10929 if (isa<CXXThisExpr>(E)) { 10930 Pos = ParamPositions[FD]; 10931 } else { 10932 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10933 ->getCanonicalDecl(); 10934 Pos = ParamPositions[PVD]; 10935 } 10936 ParamAttrs[Pos].Kind = Uniform; 10937 } 10938 // Get alignment info. 10939 auto NI = Attr->alignments_begin(); 10940 for (const Expr *E : Attr->aligneds()) { 10941 E = E->IgnoreParenImpCasts(); 10942 unsigned Pos; 10943 QualType ParmTy; 10944 if (isa<CXXThisExpr>(E)) { 10945 Pos = ParamPositions[FD]; 10946 ParmTy = E->getType(); 10947 } else { 10948 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10949 ->getCanonicalDecl(); 10950 Pos = ParamPositions[PVD]; 10951 ParmTy = PVD->getType(); 10952 } 10953 ParamAttrs[Pos].Alignment = 10954 (*NI) 10955 ? (*NI)->EvaluateKnownConstInt(C) 10956 : llvm::APSInt::getUnsigned( 10957 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10958 .getQuantity()); 10959 ++NI; 10960 } 10961 // Mark linear parameters. 10962 auto SI = Attr->steps_begin(); 10963 auto MI = Attr->modifiers_begin(); 10964 for (const Expr *E : Attr->linears()) { 10965 E = E->IgnoreParenImpCasts(); 10966 unsigned Pos; 10967 // Rescaling factor needed to compute the linear parameter 10968 // value in the mangled name. 10969 unsigned PtrRescalingFactor = 1; 10970 if (isa<CXXThisExpr>(E)) { 10971 Pos = ParamPositions[FD]; 10972 } else { 10973 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10974 ->getCanonicalDecl(); 10975 Pos = ParamPositions[PVD]; 10976 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 10977 PtrRescalingFactor = CGM.getContext() 10978 .getTypeSizeInChars(P->getPointeeType()) 10979 .getQuantity(); 10980 } 10981 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10982 ParamAttr.Kind = Linear; 10983 // Assuming a stride of 1, for `linear` without modifiers. 10984 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 10985 if (*SI) { 10986 Expr::EvalResult Result; 10987 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10988 if (const auto *DRE = 10989 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10990 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10991 ParamAttr.Kind = LinearWithVarStride; 10992 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10993 ParamPositions[StridePVD->getCanonicalDecl()]); 10994 } 10995 } 10996 } else { 10997 ParamAttr.StrideOrArg = Result.Val.getInt(); 10998 } 10999 } 11000 // If we are using a linear clause on a pointer, we need to 11001 // rescale the value of linear_step with the byte size of the 11002 // pointee type. 11003 if (Linear == ParamAttr.Kind) 11004 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11005 ++SI; 11006 ++MI; 11007 } 11008 llvm::APSInt VLENVal; 11009 SourceLocation ExprLoc; 11010 const Expr *VLENExpr = Attr->getSimdlen(); 11011 if (VLENExpr) { 11012 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11013 ExprLoc = VLENExpr->getExprLoc(); 11014 } 11015 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11016 if (CGM.getTriple().isX86()) { 11017 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11018 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11019 unsigned VLEN = VLENVal.getExtValue(); 11020 StringRef MangledName = Fn->getName(); 11021 if (CGM.getTarget().hasFeature("sve")) 11022 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11023 MangledName, 's', 128, Fn, ExprLoc); 11024 if (CGM.getTarget().hasFeature("neon")) 11025 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11026 MangledName, 'n', 128, Fn, ExprLoc); 11027 } 11028 } 11029 FD = FD->getPreviousDecl(); 11030 } 11031 } 11032 11033 namespace { 11034 /// Cleanup action for doacross support. 11035 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11036 public: 11037 static const int DoacrossFinArgs = 2; 11038 11039 private: 11040 llvm::FunctionCallee RTLFn; 11041 llvm::Value *Args[DoacrossFinArgs]; 11042 11043 public: 11044 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11045 ArrayRef<llvm::Value *> CallArgs) 11046 : RTLFn(RTLFn) { 11047 assert(CallArgs.size() == DoacrossFinArgs); 11048 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11049 } 11050 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11051 if (!CGF.HaveInsertPoint()) 11052 return; 11053 CGF.EmitRuntimeCall(RTLFn, Args); 11054 } 11055 }; 11056 } // namespace 11057 11058 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11059 const OMPLoopDirective &D, 11060 ArrayRef<Expr *> NumIterations) { 11061 if (!CGF.HaveInsertPoint()) 11062 return; 11063 11064 ASTContext &C = CGM.getContext(); 11065 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11066 RecordDecl *RD; 11067 if (KmpDimTy.isNull()) { 11068 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11069 // kmp_int64 lo; // lower 11070 // kmp_int64 up; // upper 11071 // kmp_int64 st; // stride 11072 // }; 11073 RD = C.buildImplicitRecord("kmp_dim"); 11074 RD->startDefinition(); 11075 addFieldToRecordDecl(C, RD, Int64Ty); 11076 addFieldToRecordDecl(C, RD, Int64Ty); 11077 addFieldToRecordDecl(C, RD, Int64Ty); 11078 RD->completeDefinition(); 11079 KmpDimTy = C.getRecordType(RD); 11080 } else { 11081 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11082 } 11083 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11084 QualType ArrayTy = 11085 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11086 11087 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11088 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11089 enum { LowerFD = 0, UpperFD, StrideFD }; 11090 // Fill dims with data. 11091 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11092 LValue DimsLVal = CGF.MakeAddrLValue( 11093 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11094 // dims.upper = num_iterations; 11095 LValue UpperLVal = CGF.EmitLValueForField( 11096 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11097 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11098 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11099 Int64Ty, NumIterations[I]->getExprLoc()); 11100 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11101 // dims.stride = 1; 11102 LValue StrideLVal = CGF.EmitLValueForField( 11103 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11104 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11105 StrideLVal); 11106 } 11107 11108 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11109 // kmp_int32 num_dims, struct kmp_dim * dims); 11110 llvm::Value *Args[] = { 11111 emitUpdateLocation(CGF, D.getBeginLoc()), 11112 getThreadID(CGF, D.getBeginLoc()), 11113 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11114 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11115 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11116 CGM.VoidPtrTy)}; 11117 11118 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11119 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11120 CGF.EmitRuntimeCall(RTLFn, Args); 11121 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11122 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11123 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11124 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11125 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11126 llvm::makeArrayRef(FiniArgs)); 11127 } 11128 11129 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11130 const OMPDependClause *C) { 11131 QualType Int64Ty = 11132 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11133 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11134 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11135 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11136 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11137 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11138 const Expr *CounterVal = C->getLoopData(I); 11139 assert(CounterVal); 11140 llvm::Value *CntVal = CGF.EmitScalarConversion( 11141 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11142 CounterVal->getExprLoc()); 11143 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11144 /*Volatile=*/false, Int64Ty); 11145 } 11146 llvm::Value *Args[] = { 11147 emitUpdateLocation(CGF, C->getBeginLoc()), 11148 getThreadID(CGF, C->getBeginLoc()), 11149 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11150 llvm::FunctionCallee RTLFn; 11151 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11152 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11153 OMPRTL___kmpc_doacross_post); 11154 } else { 11155 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11156 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11157 OMPRTL___kmpc_doacross_wait); 11158 } 11159 CGF.EmitRuntimeCall(RTLFn, Args); 11160 } 11161 11162 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11163 llvm::FunctionCallee Callee, 11164 ArrayRef<llvm::Value *> Args) const { 11165 assert(Loc.isValid() && "Outlined function call location must be valid."); 11166 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11167 11168 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11169 if (Fn->doesNotThrow()) { 11170 CGF.EmitNounwindRuntimeCall(Fn, Args); 11171 return; 11172 } 11173 } 11174 CGF.EmitRuntimeCall(Callee, Args); 11175 } 11176 11177 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11178 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11179 ArrayRef<llvm::Value *> Args) const { 11180 emitCall(CGF, Loc, OutlinedFn, Args); 11181 } 11182 11183 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11184 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11185 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11186 HasEmittedDeclareTargetRegion = true; 11187 } 11188 11189 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11190 const VarDecl *NativeParam, 11191 const VarDecl *TargetParam) const { 11192 return CGF.GetAddrOfLocalVar(NativeParam); 11193 } 11194 11195 namespace { 11196 /// Cleanup action for allocate support. 11197 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11198 public: 11199 static const int CleanupArgs = 3; 11200 11201 private: 11202 llvm::FunctionCallee RTLFn; 11203 llvm::Value *Args[CleanupArgs]; 11204 11205 public: 11206 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11207 ArrayRef<llvm::Value *> CallArgs) 11208 : RTLFn(RTLFn) { 11209 assert(CallArgs.size() == CleanupArgs && 11210 "Size of arguments does not match."); 11211 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11212 } 11213 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11214 if (!CGF.HaveInsertPoint()) 11215 return; 11216 CGF.EmitRuntimeCall(RTLFn, Args); 11217 } 11218 }; 11219 } // namespace 11220 11221 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11222 const VarDecl *VD) { 11223 if (!VD) 11224 return Address::invalid(); 11225 const VarDecl *CVD = VD->getCanonicalDecl(); 11226 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11227 return Address::invalid(); 11228 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11229 // Use the default allocation. 11230 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11231 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11232 !AA->getAllocator()) 11233 return Address::invalid(); 11234 llvm::Value *Size; 11235 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11236 if (CVD->getType()->isVariablyModifiedType()) { 11237 Size = CGF.getTypeSize(CVD->getType()); 11238 // Align the size: ((size + align - 1) / align) * align 11239 Size = CGF.Builder.CreateNUWAdd( 11240 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11241 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11242 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11243 } else { 11244 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11245 Size = CGM.getSize(Sz.alignTo(Align)); 11246 } 11247 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11248 assert(AA->getAllocator() && 11249 "Expected allocator expression for non-default allocator."); 11250 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11251 // According to the standard, the original allocator type is a enum (integer). 11252 // Convert to pointer type, if required. 11253 if (Allocator->getType()->isIntegerTy()) 11254 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11255 else if (Allocator->getType()->isPointerTy()) 11256 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11257 CGM.VoidPtrTy); 11258 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11259 11260 llvm::Value *Addr = 11261 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11262 CGM.getModule(), OMPRTL___kmpc_alloc), 11263 Args, getName({CVD->getName(), ".void.addr"})); 11264 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11265 Allocator}; 11266 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11267 CGM.getModule(), OMPRTL___kmpc_free); 11268 11269 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11270 llvm::makeArrayRef(FiniArgs)); 11271 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11272 Addr, 11273 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11274 getName({CVD->getName(), ".addr"})); 11275 return Address(Addr, Align); 11276 } 11277 11278 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11279 CodeGenModule &CGM, const OMPLoopDirective &S) 11280 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11281 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11282 if (!NeedToPush) 11283 return; 11284 NontemporalDeclsSet &DS = 11285 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11286 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11287 for (const Stmt *Ref : C->private_refs()) { 11288 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11289 const ValueDecl *VD; 11290 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11291 VD = DRE->getDecl(); 11292 } else { 11293 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11294 assert((ME->isImplicitCXXThis() || 11295 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11296 "Expected member of current class."); 11297 VD = ME->getMemberDecl(); 11298 } 11299 DS.insert(VD); 11300 } 11301 } 11302 } 11303 11304 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11305 if (!NeedToPush) 11306 return; 11307 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11308 } 11309 11310 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11311 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11312 11313 return llvm::any_of( 11314 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11315 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11316 } 11317 11318 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11319 const OMPExecutableDirective &S, 11320 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11321 const { 11322 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11323 // Vars in target/task regions must be excluded completely. 11324 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11325 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11326 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11327 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11328 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11329 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11330 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11331 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11332 } 11333 } 11334 // Exclude vars in private clauses. 11335 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11336 for (const Expr *Ref : C->varlists()) { 11337 if (!Ref->getType()->isScalarType()) 11338 continue; 11339 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11340 if (!DRE) 11341 continue; 11342 NeedToCheckForLPCs.insert(DRE->getDecl()); 11343 } 11344 } 11345 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11346 for (const Expr *Ref : C->varlists()) { 11347 if (!Ref->getType()->isScalarType()) 11348 continue; 11349 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11350 if (!DRE) 11351 continue; 11352 NeedToCheckForLPCs.insert(DRE->getDecl()); 11353 } 11354 } 11355 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11356 for (const Expr *Ref : C->varlists()) { 11357 if (!Ref->getType()->isScalarType()) 11358 continue; 11359 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11360 if (!DRE) 11361 continue; 11362 NeedToCheckForLPCs.insert(DRE->getDecl()); 11363 } 11364 } 11365 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11366 for (const Expr *Ref : C->varlists()) { 11367 if (!Ref->getType()->isScalarType()) 11368 continue; 11369 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11370 if (!DRE) 11371 continue; 11372 NeedToCheckForLPCs.insert(DRE->getDecl()); 11373 } 11374 } 11375 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11376 for (const Expr *Ref : C->varlists()) { 11377 if (!Ref->getType()->isScalarType()) 11378 continue; 11379 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11380 if (!DRE) 11381 continue; 11382 NeedToCheckForLPCs.insert(DRE->getDecl()); 11383 } 11384 } 11385 for (const Decl *VD : NeedToCheckForLPCs) { 11386 for (const LastprivateConditionalData &Data : 11387 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11388 if (Data.DeclToUniqueName.count(VD) > 0) { 11389 if (!Data.Disabled) 11390 NeedToAddForLPCsAsDisabled.insert(VD); 11391 break; 11392 } 11393 } 11394 } 11395 } 11396 11397 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11398 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11399 : CGM(CGF.CGM), 11400 Action((CGM.getLangOpts().OpenMP >= 50 && 11401 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11402 [](const OMPLastprivateClause *C) { 11403 return C->getKind() == 11404 OMPC_LASTPRIVATE_conditional; 11405 })) 11406 ? ActionToDo::PushAsLastprivateConditional 11407 : ActionToDo::DoNotPush) { 11408 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11409 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11410 return; 11411 assert(Action == ActionToDo::PushAsLastprivateConditional && 11412 "Expected a push action."); 11413 LastprivateConditionalData &Data = 11414 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11415 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11416 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11417 continue; 11418 11419 for (const Expr *Ref : C->varlists()) { 11420 Data.DeclToUniqueName.insert(std::make_pair( 11421 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11422 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11423 } 11424 } 11425 Data.IVLVal = IVLVal; 11426 Data.Fn = CGF.CurFn; 11427 } 11428 11429 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11430 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11431 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11432 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11433 if (CGM.getLangOpts().OpenMP < 50) 11434 return; 11435 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11436 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11437 if (!NeedToAddForLPCsAsDisabled.empty()) { 11438 Action = ActionToDo::DisableLastprivateConditional; 11439 LastprivateConditionalData &Data = 11440 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11441 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11442 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11443 Data.Fn = CGF.CurFn; 11444 Data.Disabled = true; 11445 } 11446 } 11447 11448 CGOpenMPRuntime::LastprivateConditionalRAII 11449 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11450 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11451 return LastprivateConditionalRAII(CGF, S); 11452 } 11453 11454 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11455 if (CGM.getLangOpts().OpenMP < 50) 11456 return; 11457 if (Action == ActionToDo::DisableLastprivateConditional) { 11458 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11459 "Expected list of disabled private vars."); 11460 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11461 } 11462 if (Action == ActionToDo::PushAsLastprivateConditional) { 11463 assert( 11464 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11465 "Expected list of lastprivate conditional vars."); 11466 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11467 } 11468 } 11469 11470 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11471 const VarDecl *VD) { 11472 ASTContext &C = CGM.getContext(); 11473 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11474 if (I == LastprivateConditionalToTypes.end()) 11475 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11476 QualType NewType; 11477 const FieldDecl *VDField; 11478 const FieldDecl *FiredField; 11479 LValue BaseLVal; 11480 auto VI = I->getSecond().find(VD); 11481 if (VI == I->getSecond().end()) { 11482 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11483 RD->startDefinition(); 11484 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11485 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11486 RD->completeDefinition(); 11487 NewType = C.getRecordType(RD); 11488 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11489 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11490 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11491 } else { 11492 NewType = std::get<0>(VI->getSecond()); 11493 VDField = std::get<1>(VI->getSecond()); 11494 FiredField = std::get<2>(VI->getSecond()); 11495 BaseLVal = std::get<3>(VI->getSecond()); 11496 } 11497 LValue FiredLVal = 11498 CGF.EmitLValueForField(BaseLVal, FiredField); 11499 CGF.EmitStoreOfScalar( 11500 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11501 FiredLVal); 11502 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11503 } 11504 11505 namespace { 11506 /// Checks if the lastprivate conditional variable is referenced in LHS. 11507 class LastprivateConditionalRefChecker final 11508 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11509 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11510 const Expr *FoundE = nullptr; 11511 const Decl *FoundD = nullptr; 11512 StringRef UniqueDeclName; 11513 LValue IVLVal; 11514 llvm::Function *FoundFn = nullptr; 11515 SourceLocation Loc; 11516 11517 public: 11518 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11519 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11520 llvm::reverse(LPM)) { 11521 auto It = D.DeclToUniqueName.find(E->getDecl()); 11522 if (It == D.DeclToUniqueName.end()) 11523 continue; 11524 if (D.Disabled) 11525 return false; 11526 FoundE = E; 11527 FoundD = E->getDecl()->getCanonicalDecl(); 11528 UniqueDeclName = It->second; 11529 IVLVal = D.IVLVal; 11530 FoundFn = D.Fn; 11531 break; 11532 } 11533 return FoundE == E; 11534 } 11535 bool VisitMemberExpr(const MemberExpr *E) { 11536 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11537 return false; 11538 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11539 llvm::reverse(LPM)) { 11540 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11541 if (It == D.DeclToUniqueName.end()) 11542 continue; 11543 if (D.Disabled) 11544 return false; 11545 FoundE = E; 11546 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11547 UniqueDeclName = It->second; 11548 IVLVal = D.IVLVal; 11549 FoundFn = D.Fn; 11550 break; 11551 } 11552 return FoundE == E; 11553 } 11554 bool VisitStmt(const Stmt *S) { 11555 for (const Stmt *Child : S->children()) { 11556 if (!Child) 11557 continue; 11558 if (const auto *E = dyn_cast<Expr>(Child)) 11559 if (!E->isGLValue()) 11560 continue; 11561 if (Visit(Child)) 11562 return true; 11563 } 11564 return false; 11565 } 11566 explicit LastprivateConditionalRefChecker( 11567 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11568 : LPM(LPM) {} 11569 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11570 getFoundData() const { 11571 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11572 } 11573 }; 11574 } // namespace 11575 11576 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11577 LValue IVLVal, 11578 StringRef UniqueDeclName, 11579 LValue LVal, 11580 SourceLocation Loc) { 11581 // Last updated loop counter for the lastprivate conditional var. 11582 // int<xx> last_iv = 0; 11583 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11584 llvm::Constant *LastIV = 11585 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11586 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11587 IVLVal.getAlignment().getAsAlign()); 11588 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11589 11590 // Last value of the lastprivate conditional. 11591 // decltype(priv_a) last_a; 11592 llvm::Constant *Last = getOrCreateInternalVariable( 11593 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11594 cast<llvm::GlobalVariable>(Last)->setAlignment( 11595 LVal.getAlignment().getAsAlign()); 11596 LValue LastLVal = 11597 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11598 11599 // Global loop counter. Required to handle inner parallel-for regions. 11600 // iv 11601 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11602 11603 // #pragma omp critical(a) 11604 // if (last_iv <= iv) { 11605 // last_iv = iv; 11606 // last_a = priv_a; 11607 // } 11608 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11609 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11610 Action.Enter(CGF); 11611 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11612 // (last_iv <= iv) ? Check if the variable is updated and store new 11613 // value in global var. 11614 llvm::Value *CmpRes; 11615 if (IVLVal.getType()->isSignedIntegerType()) { 11616 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11617 } else { 11618 assert(IVLVal.getType()->isUnsignedIntegerType() && 11619 "Loop iteration variable must be integer."); 11620 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11621 } 11622 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11623 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11624 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11625 // { 11626 CGF.EmitBlock(ThenBB); 11627 11628 // last_iv = iv; 11629 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11630 11631 // last_a = priv_a; 11632 switch (CGF.getEvaluationKind(LVal.getType())) { 11633 case TEK_Scalar: { 11634 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11635 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11636 break; 11637 } 11638 case TEK_Complex: { 11639 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11640 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11641 break; 11642 } 11643 case TEK_Aggregate: 11644 llvm_unreachable( 11645 "Aggregates are not supported in lastprivate conditional."); 11646 } 11647 // } 11648 CGF.EmitBranch(ExitBB); 11649 // There is no need to emit line number for unconditional branch. 11650 (void)ApplyDebugLocation::CreateEmpty(CGF); 11651 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11652 }; 11653 11654 if (CGM.getLangOpts().OpenMPSimd) { 11655 // Do not emit as a critical region as no parallel region could be emitted. 11656 RegionCodeGenTy ThenRCG(CodeGen); 11657 ThenRCG(CGF); 11658 } else { 11659 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11660 } 11661 } 11662 11663 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11664 const Expr *LHS) { 11665 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11666 return; 11667 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11668 if (!Checker.Visit(LHS)) 11669 return; 11670 const Expr *FoundE; 11671 const Decl *FoundD; 11672 StringRef UniqueDeclName; 11673 LValue IVLVal; 11674 llvm::Function *FoundFn; 11675 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11676 Checker.getFoundData(); 11677 if (FoundFn != CGF.CurFn) { 11678 // Special codegen for inner parallel regions. 11679 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11680 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11681 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11682 "Lastprivate conditional is not found in outer region."); 11683 QualType StructTy = std::get<0>(It->getSecond()); 11684 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11685 LValue PrivLVal = CGF.EmitLValue(FoundE); 11686 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11687 PrivLVal.getAddress(CGF), 11688 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11689 LValue BaseLVal = 11690 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11691 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11692 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11693 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11694 FiredLVal, llvm::AtomicOrdering::Unordered, 11695 /*IsVolatile=*/true, /*isInit=*/false); 11696 return; 11697 } 11698 11699 // Private address of the lastprivate conditional in the current context. 11700 // priv_a 11701 LValue LVal = CGF.EmitLValue(FoundE); 11702 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11703 FoundE->getExprLoc()); 11704 } 11705 11706 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11707 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11708 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11709 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11710 return; 11711 auto Range = llvm::reverse(LastprivateConditionalStack); 11712 auto It = llvm::find_if( 11713 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11714 if (It == Range.end() || It->Fn != CGF.CurFn) 11715 return; 11716 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11717 assert(LPCI != LastprivateConditionalToTypes.end() && 11718 "Lastprivates must be registered already."); 11719 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11720 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11721 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11722 for (const auto &Pair : It->DeclToUniqueName) { 11723 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11724 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11725 continue; 11726 auto I = LPCI->getSecond().find(Pair.first); 11727 assert(I != LPCI->getSecond().end() && 11728 "Lastprivate must be rehistered already."); 11729 // bool Cmp = priv_a.Fired != 0; 11730 LValue BaseLVal = std::get<3>(I->getSecond()); 11731 LValue FiredLVal = 11732 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11733 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11734 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11735 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11736 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11737 // if (Cmp) { 11738 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11739 CGF.EmitBlock(ThenBB); 11740 Address Addr = CGF.GetAddrOfLocalVar(VD); 11741 LValue LVal; 11742 if (VD->getType()->isReferenceType()) 11743 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11744 AlignmentSource::Decl); 11745 else 11746 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11747 AlignmentSource::Decl); 11748 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11749 D.getBeginLoc()); 11750 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11751 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11752 // } 11753 } 11754 } 11755 11756 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11757 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11758 SourceLocation Loc) { 11759 if (CGF.getLangOpts().OpenMP < 50) 11760 return; 11761 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11762 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11763 "Unknown lastprivate conditional variable."); 11764 StringRef UniqueName = It->second; 11765 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11766 // The variable was not updated in the region - exit. 11767 if (!GV) 11768 return; 11769 LValue LPLVal = CGF.MakeAddrLValue( 11770 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11771 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11772 CGF.EmitStoreOfScalar(Res, PrivLVal); 11773 } 11774 11775 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11776 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11777 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11778 llvm_unreachable("Not supported in SIMD-only mode"); 11779 } 11780 11781 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11782 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11783 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11784 llvm_unreachable("Not supported in SIMD-only mode"); 11785 } 11786 11787 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11788 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11789 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11790 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11791 bool Tied, unsigned &NumberOfParts) { 11792 llvm_unreachable("Not supported in SIMD-only mode"); 11793 } 11794 11795 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11796 SourceLocation Loc, 11797 llvm::Function *OutlinedFn, 11798 ArrayRef<llvm::Value *> CapturedVars, 11799 const Expr *IfCond) { 11800 llvm_unreachable("Not supported in SIMD-only mode"); 11801 } 11802 11803 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11804 CodeGenFunction &CGF, StringRef CriticalName, 11805 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11806 const Expr *Hint) { 11807 llvm_unreachable("Not supported in SIMD-only mode"); 11808 } 11809 11810 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11811 const RegionCodeGenTy &MasterOpGen, 11812 SourceLocation Loc) { 11813 llvm_unreachable("Not supported in SIMD-only mode"); 11814 } 11815 11816 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11817 SourceLocation Loc) { 11818 llvm_unreachable("Not supported in SIMD-only mode"); 11819 } 11820 11821 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11822 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11823 SourceLocation Loc) { 11824 llvm_unreachable("Not supported in SIMD-only mode"); 11825 } 11826 11827 void CGOpenMPSIMDRuntime::emitSingleRegion( 11828 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11829 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11830 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11831 ArrayRef<const Expr *> AssignmentOps) { 11832 llvm_unreachable("Not supported in SIMD-only mode"); 11833 } 11834 11835 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11836 const RegionCodeGenTy &OrderedOpGen, 11837 SourceLocation Loc, 11838 bool IsThreads) { 11839 llvm_unreachable("Not supported in SIMD-only mode"); 11840 } 11841 11842 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11843 SourceLocation Loc, 11844 OpenMPDirectiveKind Kind, 11845 bool EmitChecks, 11846 bool ForceSimpleCall) { 11847 llvm_unreachable("Not supported in SIMD-only mode"); 11848 } 11849 11850 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11851 CodeGenFunction &CGF, SourceLocation Loc, 11852 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11853 bool Ordered, const DispatchRTInput &DispatchValues) { 11854 llvm_unreachable("Not supported in SIMD-only mode"); 11855 } 11856 11857 void CGOpenMPSIMDRuntime::emitForStaticInit( 11858 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11859 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11860 llvm_unreachable("Not supported in SIMD-only mode"); 11861 } 11862 11863 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11864 CodeGenFunction &CGF, SourceLocation Loc, 11865 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11866 llvm_unreachable("Not supported in SIMD-only mode"); 11867 } 11868 11869 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11870 SourceLocation Loc, 11871 unsigned IVSize, 11872 bool IVSigned) { 11873 llvm_unreachable("Not supported in SIMD-only mode"); 11874 } 11875 11876 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11877 SourceLocation Loc, 11878 OpenMPDirectiveKind DKind) { 11879 llvm_unreachable("Not supported in SIMD-only mode"); 11880 } 11881 11882 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11883 SourceLocation Loc, 11884 unsigned IVSize, bool IVSigned, 11885 Address IL, Address LB, 11886 Address UB, Address ST) { 11887 llvm_unreachable("Not supported in SIMD-only mode"); 11888 } 11889 11890 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11891 llvm::Value *NumThreads, 11892 SourceLocation Loc) { 11893 llvm_unreachable("Not supported in SIMD-only mode"); 11894 } 11895 11896 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11897 ProcBindKind ProcBind, 11898 SourceLocation Loc) { 11899 llvm_unreachable("Not supported in SIMD-only mode"); 11900 } 11901 11902 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11903 const VarDecl *VD, 11904 Address VDAddr, 11905 SourceLocation Loc) { 11906 llvm_unreachable("Not supported in SIMD-only mode"); 11907 } 11908 11909 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11910 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11911 CodeGenFunction *CGF) { 11912 llvm_unreachable("Not supported in SIMD-only mode"); 11913 } 11914 11915 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11916 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11917 llvm_unreachable("Not supported in SIMD-only mode"); 11918 } 11919 11920 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11921 ArrayRef<const Expr *> Vars, 11922 SourceLocation Loc, 11923 llvm::AtomicOrdering AO) { 11924 llvm_unreachable("Not supported in SIMD-only mode"); 11925 } 11926 11927 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11928 const OMPExecutableDirective &D, 11929 llvm::Function *TaskFunction, 11930 QualType SharedsTy, Address Shareds, 11931 const Expr *IfCond, 11932 const OMPTaskDataTy &Data) { 11933 llvm_unreachable("Not supported in SIMD-only mode"); 11934 } 11935 11936 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11937 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11938 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11939 const Expr *IfCond, const OMPTaskDataTy &Data) { 11940 llvm_unreachable("Not supported in SIMD-only mode"); 11941 } 11942 11943 void CGOpenMPSIMDRuntime::emitReduction( 11944 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11945 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11946 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11947 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11948 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11949 ReductionOps, Options); 11950 } 11951 11952 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11953 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11954 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11955 llvm_unreachable("Not supported in SIMD-only mode"); 11956 } 11957 11958 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 11959 SourceLocation Loc, 11960 bool IsWorksharingReduction) { 11961 llvm_unreachable("Not supported in SIMD-only mode"); 11962 } 11963 11964 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11965 SourceLocation Loc, 11966 ReductionCodeGen &RCG, 11967 unsigned N) { 11968 llvm_unreachable("Not supported in SIMD-only mode"); 11969 } 11970 11971 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11972 SourceLocation Loc, 11973 llvm::Value *ReductionsPtr, 11974 LValue SharedLVal) { 11975 llvm_unreachable("Not supported in SIMD-only mode"); 11976 } 11977 11978 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11979 SourceLocation Loc) { 11980 llvm_unreachable("Not supported in SIMD-only mode"); 11981 } 11982 11983 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11984 CodeGenFunction &CGF, SourceLocation Loc, 11985 OpenMPDirectiveKind CancelRegion) { 11986 llvm_unreachable("Not supported in SIMD-only mode"); 11987 } 11988 11989 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11990 SourceLocation Loc, const Expr *IfCond, 11991 OpenMPDirectiveKind CancelRegion) { 11992 llvm_unreachable("Not supported in SIMD-only mode"); 11993 } 11994 11995 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11996 const OMPExecutableDirective &D, StringRef ParentName, 11997 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11998 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11999 llvm_unreachable("Not supported in SIMD-only mode"); 12000 } 12001 12002 void CGOpenMPSIMDRuntime::emitTargetCall( 12003 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12004 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12005 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12006 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12007 const OMPLoopDirective &D)> 12008 SizeEmitter) { 12009 llvm_unreachable("Not supported in SIMD-only mode"); 12010 } 12011 12012 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12013 llvm_unreachable("Not supported in SIMD-only mode"); 12014 } 12015 12016 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12017 llvm_unreachable("Not supported in SIMD-only mode"); 12018 } 12019 12020 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12021 return false; 12022 } 12023 12024 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12025 const OMPExecutableDirective &D, 12026 SourceLocation Loc, 12027 llvm::Function *OutlinedFn, 12028 ArrayRef<llvm::Value *> CapturedVars) { 12029 llvm_unreachable("Not supported in SIMD-only mode"); 12030 } 12031 12032 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12033 const Expr *NumTeams, 12034 const Expr *ThreadLimit, 12035 SourceLocation Loc) { 12036 llvm_unreachable("Not supported in SIMD-only mode"); 12037 } 12038 12039 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12040 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12041 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12042 llvm_unreachable("Not supported in SIMD-only mode"); 12043 } 12044 12045 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12046 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12047 const Expr *Device) { 12048 llvm_unreachable("Not supported in SIMD-only mode"); 12049 } 12050 12051 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12052 const OMPLoopDirective &D, 12053 ArrayRef<Expr *> NumIterations) { 12054 llvm_unreachable("Not supported in SIMD-only mode"); 12055 } 12056 12057 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12058 const OMPDependClause *C) { 12059 llvm_unreachable("Not supported in SIMD-only mode"); 12060 } 12061 12062 const VarDecl * 12063 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12064 const VarDecl *NativeParam) const { 12065 llvm_unreachable("Not supported in SIMD-only mode"); 12066 } 12067 12068 Address 12069 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12070 const VarDecl *NativeParam, 12071 const VarDecl *TargetParam) const { 12072 llvm_unreachable("Not supported in SIMD-only mode"); 12073 } 12074