1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1530 } 1531 1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1533 return OMPBuilder.IdentPtr; 1534 } 1535 1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1537 if (!Kmpc_MicroTy) { 1538 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1539 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1540 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1541 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1542 } 1543 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1544 } 1545 1546 llvm::FunctionCallee 1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1548 assert((IVSize == 32 || IVSize == 64) && 1549 "IV size is not compatible with the omp runtime"); 1550 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1551 : "__kmpc_for_static_init_4u") 1552 : (IVSigned ? "__kmpc_for_static_init_8" 1553 : "__kmpc_for_static_init_8u"); 1554 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1555 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1556 llvm::Type *TypeParams[] = { 1557 getIdentTyPointerTy(), // loc 1558 CGM.Int32Ty, // tid 1559 CGM.Int32Ty, // schedtype 1560 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1561 PtrTy, // p_lower 1562 PtrTy, // p_upper 1563 PtrTy, // p_stride 1564 ITy, // incr 1565 ITy // chunk 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1579 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1582 CGM.Int32Ty, // tid 1583 CGM.Int32Ty, // schedtype 1584 ITy, // lower 1585 ITy, // upper 1586 ITy, // stride 1587 ITy // chunk 1588 }; 1589 auto *FnTy = 1590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1591 return CGM.CreateRuntimeFunction(FnTy, Name); 1592 } 1593 1594 llvm::FunctionCallee 1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1596 assert((IVSize == 32 || IVSize == 64) && 1597 "IV size is not compatible with the omp runtime"); 1598 StringRef Name = 1599 IVSize == 32 1600 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1601 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1602 llvm::Type *TypeParams[] = { 1603 getIdentTyPointerTy(), // loc 1604 CGM.Int32Ty, // tid 1605 }; 1606 auto *FnTy = 1607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1608 return CGM.CreateRuntimeFunction(FnTy, Name); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = 1616 IVSize == 32 1617 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1618 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1621 llvm::Type *TypeParams[] = { 1622 getIdentTyPointerTy(), // loc 1623 CGM.Int32Ty, // tid 1624 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1625 PtrTy, // p_lower 1626 PtrTy, // p_upper 1627 PtrTy // p_stride 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 /// Obtain information that uniquely identifies a target entry. This 1635 /// consists of the file and device IDs as well as line number associated with 1636 /// the relevant entry source location. 1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1638 unsigned &DeviceID, unsigned &FileID, 1639 unsigned &LineNum) { 1640 SourceManager &SM = C.getSourceManager(); 1641 1642 // The loc should be always valid and have a file ID (the user cannot use 1643 // #pragma directives in macros) 1644 1645 assert(Loc.isValid() && "Source location is expected to be always valid."); 1646 1647 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1648 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1649 1650 llvm::sys::fs::UniqueID ID; 1651 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1652 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1653 << PLoc.getFilename() << EC.message(); 1654 1655 DeviceID = ID.getDevice(); 1656 FileID = ID.getFile(); 1657 LineNum = PLoc.getLine(); 1658 } 1659 1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1661 if (CGM.getLangOpts().OpenMPSimd) 1662 return Address::invalid(); 1663 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1664 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1665 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1666 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1667 HasRequiresUnifiedSharedMemory))) { 1668 SmallString<64> PtrName; 1669 { 1670 llvm::raw_svector_ostream OS(PtrName); 1671 OS << CGM.getMangledName(GlobalDecl(VD)); 1672 if (!VD->isExternallyVisible()) { 1673 unsigned DeviceID, FileID, Line; 1674 getTargetEntryUniqueInfo(CGM.getContext(), 1675 VD->getCanonicalDecl()->getBeginLoc(), 1676 DeviceID, FileID, Line); 1677 OS << llvm::format("_%x", FileID); 1678 } 1679 OS << "_decl_tgt_ref_ptr"; 1680 } 1681 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1682 if (!Ptr) { 1683 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1684 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1685 PtrName); 1686 1687 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1688 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1689 1690 if (!CGM.getLangOpts().OpenMPIsDevice) 1691 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1692 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1693 } 1694 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1695 } 1696 return Address::invalid(); 1697 } 1698 1699 llvm::Constant * 1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1701 assert(!CGM.getLangOpts().OpenMPUseTLS || 1702 !CGM.getContext().getTargetInfo().isTLSSupported()); 1703 // Lookup the entry, lazily creating it if necessary. 1704 std::string Suffix = getName({"cache", ""}); 1705 return getOrCreateInternalVariable( 1706 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1707 } 1708 1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1710 const VarDecl *VD, 1711 Address VDAddr, 1712 SourceLocation Loc) { 1713 if (CGM.getLangOpts().OpenMPUseTLS && 1714 CGM.getContext().getTargetInfo().isTLSSupported()) 1715 return VDAddr; 1716 1717 llvm::Type *VarTy = VDAddr.getElementType(); 1718 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1719 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1720 CGM.Int8PtrTy), 1721 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1722 getOrCreateThreadPrivateCache(VD)}; 1723 return Address(CGF.EmitRuntimeCall( 1724 OMPBuilder.getOrCreateRuntimeFunction( 1725 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1726 Args), 1727 VDAddr.getAlignment()); 1728 } 1729 1730 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1731 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1732 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1733 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1734 // library. 1735 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1736 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1737 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1738 OMPLoc); 1739 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1740 // to register constructor/destructor for variable. 1741 llvm::Value *Args[] = { 1742 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1743 Ctor, CopyCtor, Dtor}; 1744 CGF.EmitRuntimeCall( 1745 OMPBuilder.getOrCreateRuntimeFunction( 1746 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1747 Args); 1748 } 1749 1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1751 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1752 bool PerformInit, CodeGenFunction *CGF) { 1753 if (CGM.getLangOpts().OpenMPUseTLS && 1754 CGM.getContext().getTargetInfo().isTLSSupported()) 1755 return nullptr; 1756 1757 VD = VD->getDefinition(CGM.getContext()); 1758 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1759 QualType ASTTy = VD->getType(); 1760 1761 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1762 const Expr *Init = VD->getAnyInitializer(); 1763 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1764 // Generate function that re-emits the declaration's initializer into the 1765 // threadprivate copy of the variable VD 1766 CodeGenFunction CtorCGF(CGM); 1767 FunctionArgList Args; 1768 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1769 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1770 ImplicitParamDecl::Other); 1771 Args.push_back(&Dst); 1772 1773 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1774 CGM.getContext().VoidPtrTy, Args); 1775 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1776 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1777 llvm::Function *Fn = 1778 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1779 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1780 Args, Loc, Loc); 1781 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1782 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1783 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1784 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1785 Arg = CtorCGF.Builder.CreateElementBitCast( 1786 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1787 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1788 /*IsInitializer=*/true); 1789 ArgVal = CtorCGF.EmitLoadOfScalar( 1790 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1791 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1792 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1793 CtorCGF.FinishFunction(); 1794 Ctor = Fn; 1795 } 1796 if (VD->getType().isDestructedType() != QualType::DK_none) { 1797 // Generate function that emits destructor call for the threadprivate copy 1798 // of the variable VD 1799 CodeGenFunction DtorCGF(CGM); 1800 FunctionArgList Args; 1801 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1802 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1803 ImplicitParamDecl::Other); 1804 Args.push_back(&Dst); 1805 1806 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1807 CGM.getContext().VoidTy, Args); 1808 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1809 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1810 llvm::Function *Fn = 1811 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1812 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1813 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1814 Loc, Loc); 1815 // Create a scope with an artificial location for the body of this function. 1816 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1817 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1818 DtorCGF.GetAddrOfLocalVar(&Dst), 1819 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1820 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1821 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1822 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1823 DtorCGF.FinishFunction(); 1824 Dtor = Fn; 1825 } 1826 // Do not emit init function if it is not required. 1827 if (!Ctor && !Dtor) 1828 return nullptr; 1829 1830 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1831 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1832 /*isVarArg=*/false) 1833 ->getPointerTo(); 1834 // Copying constructor for the threadprivate variable. 1835 // Must be NULL - reserved by runtime, but currently it requires that this 1836 // parameter is always NULL. Otherwise it fires assertion. 1837 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1838 if (Ctor == nullptr) { 1839 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1840 /*isVarArg=*/false) 1841 ->getPointerTo(); 1842 Ctor = llvm::Constant::getNullValue(CtorTy); 1843 } 1844 if (Dtor == nullptr) { 1845 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1846 /*isVarArg=*/false) 1847 ->getPointerTo(); 1848 Dtor = llvm::Constant::getNullValue(DtorTy); 1849 } 1850 if (!CGF) { 1851 auto *InitFunctionTy = 1852 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1853 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1854 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1855 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1856 CodeGenFunction InitCGF(CGM); 1857 FunctionArgList ArgList; 1858 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1859 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1860 Loc, Loc); 1861 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1862 InitCGF.FinishFunction(); 1863 return InitFunction; 1864 } 1865 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1866 } 1867 return nullptr; 1868 } 1869 1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1871 llvm::GlobalVariable *Addr, 1872 bool PerformInit) { 1873 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1874 !CGM.getLangOpts().OpenMPIsDevice) 1875 return false; 1876 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1877 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1878 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1879 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1880 HasRequiresUnifiedSharedMemory)) 1881 return CGM.getLangOpts().OpenMPIsDevice; 1882 VD = VD->getDefinition(CGM.getContext()); 1883 assert(VD && "Unknown VarDecl"); 1884 1885 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1886 return CGM.getLangOpts().OpenMPIsDevice; 1887 1888 QualType ASTTy = VD->getType(); 1889 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1890 1891 // Produce the unique prefix to identify the new target regions. We use 1892 // the source location of the variable declaration which we know to not 1893 // conflict with any target region. 1894 unsigned DeviceID; 1895 unsigned FileID; 1896 unsigned Line; 1897 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1898 SmallString<128> Buffer, Out; 1899 { 1900 llvm::raw_svector_ostream OS(Buffer); 1901 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1902 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1903 } 1904 1905 const Expr *Init = VD->getAnyInitializer(); 1906 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1907 llvm::Constant *Ctor; 1908 llvm::Constant *ID; 1909 if (CGM.getLangOpts().OpenMPIsDevice) { 1910 // Generate function that re-emits the declaration's initializer into 1911 // the threadprivate copy of the variable VD 1912 CodeGenFunction CtorCGF(CGM); 1913 1914 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1915 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1916 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1917 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1918 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1919 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1920 FunctionArgList(), Loc, Loc); 1921 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1922 CtorCGF.EmitAnyExprToMem(Init, 1923 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1924 Init->getType().getQualifiers(), 1925 /*IsInitializer=*/true); 1926 CtorCGF.FinishFunction(); 1927 Ctor = Fn; 1928 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1929 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1930 } else { 1931 Ctor = new llvm::GlobalVariable( 1932 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1933 llvm::GlobalValue::PrivateLinkage, 1934 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1935 ID = Ctor; 1936 } 1937 1938 // Register the information for the entry associated with the constructor. 1939 Out.clear(); 1940 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1941 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1942 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1943 } 1944 if (VD->getType().isDestructedType() != QualType::DK_none) { 1945 llvm::Constant *Dtor; 1946 llvm::Constant *ID; 1947 if (CGM.getLangOpts().OpenMPIsDevice) { 1948 // Generate function that emits destructor call for the threadprivate 1949 // copy of the variable VD 1950 CodeGenFunction DtorCGF(CGM); 1951 1952 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1953 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1954 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1955 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1956 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1957 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1958 FunctionArgList(), Loc, Loc); 1959 // Create a scope with an artificial location for the body of this 1960 // function. 1961 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1962 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1963 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1964 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1965 DtorCGF.FinishFunction(); 1966 Dtor = Fn; 1967 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1968 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1969 } else { 1970 Dtor = new llvm::GlobalVariable( 1971 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1972 llvm::GlobalValue::PrivateLinkage, 1973 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1974 ID = Dtor; 1975 } 1976 // Register the information for the entry associated with the destructor. 1977 Out.clear(); 1978 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1979 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1980 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1981 } 1982 return CGM.getLangOpts().OpenMPIsDevice; 1983 } 1984 1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1986 QualType VarType, 1987 StringRef Name) { 1988 std::string Suffix = getName({"artificial", ""}); 1989 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1990 llvm::Value *GAddr = 1991 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1992 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1993 CGM.getTarget().isTLSSupported()) { 1994 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1995 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1996 } 1997 std::string CacheSuffix = getName({"cache", ""}); 1998 llvm::Value *Args[] = { 1999 emitUpdateLocation(CGF, SourceLocation()), 2000 getThreadID(CGF, SourceLocation()), 2001 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2002 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2003 /*isSigned=*/false), 2004 getOrCreateInternalVariable( 2005 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2006 return Address( 2007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2008 CGF.EmitRuntimeCall( 2009 OMPBuilder.getOrCreateRuntimeFunction( 2010 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2011 Args), 2012 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2013 CGM.getContext().getTypeAlignInChars(VarType)); 2014 } 2015 2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2017 const RegionCodeGenTy &ThenGen, 2018 const RegionCodeGenTy &ElseGen) { 2019 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2020 2021 // If the condition constant folds and can be elided, try to avoid emitting 2022 // the condition and the dead arm of the if/else. 2023 bool CondConstant; 2024 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2025 if (CondConstant) 2026 ThenGen(CGF); 2027 else 2028 ElseGen(CGF); 2029 return; 2030 } 2031 2032 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2033 // emit the conditional branch. 2034 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2035 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2036 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2037 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2038 2039 // Emit the 'then' code. 2040 CGF.EmitBlock(ThenBlock); 2041 ThenGen(CGF); 2042 CGF.EmitBranch(ContBlock); 2043 // Emit the 'else' code if present. 2044 // There is no need to emit line number for unconditional branch. 2045 (void)ApplyDebugLocation::CreateEmpty(CGF); 2046 CGF.EmitBlock(ElseBlock); 2047 ElseGen(CGF); 2048 // There is no need to emit line number for unconditional branch. 2049 (void)ApplyDebugLocation::CreateEmpty(CGF); 2050 CGF.EmitBranch(ContBlock); 2051 // Emit the continuation block for code after the if. 2052 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2053 } 2054 2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2056 llvm::Function *OutlinedFn, 2057 ArrayRef<llvm::Value *> CapturedVars, 2058 const Expr *IfCond) { 2059 if (!CGF.HaveInsertPoint()) 2060 return; 2061 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2062 auto &M = CGM.getModule(); 2063 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2064 this](CodeGenFunction &CGF, PrePostActionTy &) { 2065 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *Args[] = { 2068 RTLoc, 2069 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2070 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2071 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2072 RealArgs.append(std::begin(Args), std::end(Args)); 2073 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2074 2075 llvm::FunctionCallee RTLFn = 2076 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2077 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2078 }; 2079 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2082 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2083 // Build calls: 2084 // __kmpc_serialized_parallel(&Loc, GTid); 2085 llvm::Value *Args[] = {RTLoc, ThreadID}; 2086 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2087 M, OMPRTL___kmpc_serialized_parallel), 2088 Args); 2089 2090 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2091 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2092 Address ZeroAddrBound = 2093 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2094 /*Name=*/".bound.zero.addr"); 2095 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2096 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2097 // ThreadId for serialized parallels is 0. 2098 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2099 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2100 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2101 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2102 2103 // __kmpc_end_serialized_parallel(&Loc, GTid); 2104 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2105 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2106 M, OMPRTL___kmpc_end_serialized_parallel), 2107 EndArgs); 2108 }; 2109 if (IfCond) { 2110 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2111 } else { 2112 RegionCodeGenTy ThenRCG(ThenGen); 2113 ThenRCG(CGF); 2114 } 2115 } 2116 2117 // If we're inside an (outlined) parallel region, use the region info's 2118 // thread-ID variable (it is passed in a first argument of the outlined function 2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2120 // regular serial code region, get thread ID by calling kmp_int32 2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2122 // return the address of that temp. 2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2124 SourceLocation Loc) { 2125 if (auto *OMPRegionInfo = 2126 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2127 if (OMPRegionInfo->getThreadIDVariable()) 2128 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2129 2130 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2131 QualType Int32Ty = 2132 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2133 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2134 CGF.EmitStoreOfScalar(ThreadID, 2135 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2136 2137 return ThreadIDTemp; 2138 } 2139 2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2141 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2142 SmallString<256> Buffer; 2143 llvm::raw_svector_ostream Out(Buffer); 2144 Out << Name; 2145 StringRef RuntimeName = Out.str(); 2146 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2147 if (Elem.second) { 2148 assert(Elem.second->getType()->getPointerElementType() == Ty && 2149 "OMP internal variable has different type than requested"); 2150 return &*Elem.second; 2151 } 2152 2153 return Elem.second = new llvm::GlobalVariable( 2154 CGM.getModule(), Ty, /*IsConstant*/ false, 2155 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2156 Elem.first(), /*InsertBefore=*/nullptr, 2157 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2158 } 2159 2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2161 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2162 std::string Name = getName({Prefix, "var"}); 2163 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2164 } 2165 2166 namespace { 2167 /// Common pre(post)-action for different OpenMP constructs. 2168 class CommonActionTy final : public PrePostActionTy { 2169 llvm::FunctionCallee EnterCallee; 2170 ArrayRef<llvm::Value *> EnterArgs; 2171 llvm::FunctionCallee ExitCallee; 2172 ArrayRef<llvm::Value *> ExitArgs; 2173 bool Conditional; 2174 llvm::BasicBlock *ContBlock = nullptr; 2175 2176 public: 2177 CommonActionTy(llvm::FunctionCallee EnterCallee, 2178 ArrayRef<llvm::Value *> EnterArgs, 2179 llvm::FunctionCallee ExitCallee, 2180 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2181 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2182 ExitArgs(ExitArgs), Conditional(Conditional) {} 2183 void Enter(CodeGenFunction &CGF) override { 2184 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2185 if (Conditional) { 2186 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2187 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2188 ContBlock = CGF.createBasicBlock("omp_if.end"); 2189 // Generate the branch (If-stmt) 2190 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2191 CGF.EmitBlock(ThenBlock); 2192 } 2193 } 2194 void Done(CodeGenFunction &CGF) { 2195 // Emit the rest of blocks/branches 2196 CGF.EmitBranch(ContBlock); 2197 CGF.EmitBlock(ContBlock, true); 2198 } 2199 void Exit(CodeGenFunction &CGF) override { 2200 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2201 } 2202 }; 2203 } // anonymous namespace 2204 2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2206 StringRef CriticalName, 2207 const RegionCodeGenTy &CriticalOpGen, 2208 SourceLocation Loc, const Expr *Hint) { 2209 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2210 // CriticalOpGen(); 2211 // __kmpc_end_critical(ident_t *, gtid, Lock); 2212 // Prepare arguments and build a call to __kmpc_critical 2213 if (!CGF.HaveInsertPoint()) 2214 return; 2215 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2216 getCriticalRegionLock(CriticalName)}; 2217 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2218 std::end(Args)); 2219 if (Hint) { 2220 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2221 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2222 } 2223 CommonActionTy Action( 2224 OMPBuilder.getOrCreateRuntimeFunction( 2225 CGM.getModule(), 2226 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2227 EnterArgs, 2228 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2229 OMPRTL___kmpc_end_critical), 2230 Args); 2231 CriticalOpGen.setAction(Action); 2232 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2233 } 2234 2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2236 const RegionCodeGenTy &MasterOpGen, 2237 SourceLocation Loc) { 2238 if (!CGF.HaveInsertPoint()) 2239 return; 2240 // if(__kmpc_master(ident_t *, gtid)) { 2241 // MasterOpGen(); 2242 // __kmpc_end_master(ident_t *, gtid); 2243 // } 2244 // Prepare arguments and build a call to __kmpc_master 2245 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2246 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2247 CGM.getModule(), OMPRTL___kmpc_master), 2248 Args, 2249 OMPBuilder.getOrCreateRuntimeFunction( 2250 CGM.getModule(), OMPRTL___kmpc_end_master), 2251 Args, 2252 /*Conditional=*/true); 2253 MasterOpGen.setAction(Action); 2254 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2255 Action.Done(CGF); 2256 } 2257 2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2259 SourceLocation Loc) { 2260 if (!CGF.HaveInsertPoint()) 2261 return; 2262 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2263 OMPBuilder.CreateTaskyield(CGF.Builder); 2264 } else { 2265 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2266 llvm::Value *Args[] = { 2267 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2268 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2269 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2271 Args); 2272 } 2273 2274 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2275 Region->emitUntiedSwitch(CGF); 2276 } 2277 2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2279 const RegionCodeGenTy &TaskgroupOpGen, 2280 SourceLocation Loc) { 2281 if (!CGF.HaveInsertPoint()) 2282 return; 2283 // __kmpc_taskgroup(ident_t *, gtid); 2284 // TaskgroupOpGen(); 2285 // __kmpc_end_taskgroup(ident_t *, gtid); 2286 // Prepare arguments and build a call to __kmpc_taskgroup 2287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2288 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2289 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2290 Args, 2291 OMPBuilder.getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2293 Args); 2294 TaskgroupOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2296 } 2297 2298 /// Given an array of pointers to variables, project the address of a 2299 /// given variable. 2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2301 unsigned Index, const VarDecl *Var) { 2302 // Pull out the pointer to the variable. 2303 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2304 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2305 2306 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2307 Addr = CGF.Builder.CreateElementBitCast( 2308 Addr, CGF.ConvertTypeForMem(Var->getType())); 2309 return Addr; 2310 } 2311 2312 static llvm::Value *emitCopyprivateCopyFunction( 2313 CodeGenModule &CGM, llvm::Type *ArgsType, 2314 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2315 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2316 SourceLocation Loc) { 2317 ASTContext &C = CGM.getContext(); 2318 // void copy_func(void *LHSArg, void *RHSArg); 2319 FunctionArgList Args; 2320 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2321 ImplicitParamDecl::Other); 2322 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2323 ImplicitParamDecl::Other); 2324 Args.push_back(&LHSArg); 2325 Args.push_back(&RHSArg); 2326 const auto &CGFI = 2327 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2328 std::string Name = 2329 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2330 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2331 llvm::GlobalValue::InternalLinkage, Name, 2332 &CGM.getModule()); 2333 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2334 Fn->setDoesNotRecurse(); 2335 CodeGenFunction CGF(CGM); 2336 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2337 // Dest = (void*[n])(LHSArg); 2338 // Src = (void*[n])(RHSArg); 2339 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2340 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2341 ArgsType), CGF.getPointerAlign()); 2342 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2343 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2344 ArgsType), CGF.getPointerAlign()); 2345 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2346 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2347 // ... 2348 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2349 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2350 const auto *DestVar = 2351 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2352 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2353 2354 const auto *SrcVar = 2355 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2356 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2357 2358 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2359 QualType Type = VD->getType(); 2360 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2361 } 2362 CGF.FinishFunction(); 2363 return Fn; 2364 } 2365 2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2367 const RegionCodeGenTy &SingleOpGen, 2368 SourceLocation Loc, 2369 ArrayRef<const Expr *> CopyprivateVars, 2370 ArrayRef<const Expr *> SrcExprs, 2371 ArrayRef<const Expr *> DstExprs, 2372 ArrayRef<const Expr *> AssignmentOps) { 2373 if (!CGF.HaveInsertPoint()) 2374 return; 2375 assert(CopyprivateVars.size() == SrcExprs.size() && 2376 CopyprivateVars.size() == DstExprs.size() && 2377 CopyprivateVars.size() == AssignmentOps.size()); 2378 ASTContext &C = CGM.getContext(); 2379 // int32 did_it = 0; 2380 // if(__kmpc_single(ident_t *, gtid)) { 2381 // SingleOpGen(); 2382 // __kmpc_end_single(ident_t *, gtid); 2383 // did_it = 1; 2384 // } 2385 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2386 // <copy_func>, did_it); 2387 2388 Address DidIt = Address::invalid(); 2389 if (!CopyprivateVars.empty()) { 2390 // int32 did_it = 0; 2391 QualType KmpInt32Ty = 2392 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2393 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2394 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2395 } 2396 // Prepare arguments and build a call to __kmpc_single 2397 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2398 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2399 CGM.getModule(), OMPRTL___kmpc_single), 2400 Args, 2401 OMPBuilder.getOrCreateRuntimeFunction( 2402 CGM.getModule(), OMPRTL___kmpc_end_single), 2403 Args, 2404 /*Conditional=*/true); 2405 SingleOpGen.setAction(Action); 2406 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2407 if (DidIt.isValid()) { 2408 // did_it = 1; 2409 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2410 } 2411 Action.Done(CGF); 2412 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2413 // <copy_func>, did_it); 2414 if (DidIt.isValid()) { 2415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2416 QualType CopyprivateArrayTy = C.getConstantArrayType( 2417 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2418 /*IndexTypeQuals=*/0); 2419 // Create a list of all private variables for copyprivate. 2420 Address CopyprivateList = 2421 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2422 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2423 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2424 CGF.Builder.CreateStore( 2425 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2426 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2427 CGF.VoidPtrTy), 2428 Elem); 2429 } 2430 // Build function that copies private values from single region to all other 2431 // threads in the corresponding parallel region. 2432 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2433 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2434 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2435 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2436 Address CL = 2437 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2438 CGF.VoidPtrTy); 2439 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2440 llvm::Value *Args[] = { 2441 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2442 getThreadID(CGF, Loc), // i32 <gtid> 2443 BufSize, // size_t <buf_size> 2444 CL.getPointer(), // void *<copyprivate list> 2445 CpyFn, // void (*) (void *, void *) <copy_func> 2446 DidItVal // i32 did_it 2447 }; 2448 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2449 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2450 Args); 2451 } 2452 } 2453 2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2455 const RegionCodeGenTy &OrderedOpGen, 2456 SourceLocation Loc, bool IsThreads) { 2457 if (!CGF.HaveInsertPoint()) 2458 return; 2459 // __kmpc_ordered(ident_t *, gtid); 2460 // OrderedOpGen(); 2461 // __kmpc_end_ordered(ident_t *, gtid); 2462 // Prepare arguments and build a call to __kmpc_ordered 2463 if (IsThreads) { 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_ordered), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2470 Args); 2471 OrderedOpGen.setAction(Action); 2472 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2473 return; 2474 } 2475 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2476 } 2477 2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2479 unsigned Flags; 2480 if (Kind == OMPD_for) 2481 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2482 else if (Kind == OMPD_sections) 2483 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2484 else if (Kind == OMPD_single) 2485 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2486 else if (Kind == OMPD_barrier) 2487 Flags = OMP_IDENT_BARRIER_EXPL; 2488 else 2489 Flags = OMP_IDENT_BARRIER_IMPL; 2490 return Flags; 2491 } 2492 2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2494 CodeGenFunction &CGF, const OMPLoopDirective &S, 2495 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2496 // Check if the loop directive is actually a doacross loop directive. In this 2497 // case choose static, 1 schedule. 2498 if (llvm::any_of( 2499 S.getClausesOfKind<OMPOrderedClause>(), 2500 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2501 ScheduleKind = OMPC_SCHEDULE_static; 2502 // Chunk size is 1 in this case. 2503 llvm::APInt ChunkSize(32, 1); 2504 ChunkExpr = IntegerLiteral::Create( 2505 CGF.getContext(), ChunkSize, 2506 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2507 SourceLocation()); 2508 } 2509 } 2510 2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2512 OpenMPDirectiveKind Kind, bool EmitChecks, 2513 bool ForceSimpleCall) { 2514 // Check if we should use the OMPBuilder 2515 auto *OMPRegionInfo = 2516 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2517 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2518 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( 2519 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2520 return; 2521 } 2522 2523 if (!CGF.HaveInsertPoint()) 2524 return; 2525 // Build call __kmpc_cancel_barrier(loc, thread_id); 2526 // Build call __kmpc_barrier(loc, thread_id); 2527 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2528 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2529 // thread_id); 2530 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2531 getThreadID(CGF, Loc)}; 2532 if (OMPRegionInfo) { 2533 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2534 llvm::Value *Result = CGF.EmitRuntimeCall( 2535 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2536 OMPRTL___kmpc_cancel_barrier), 2537 Args); 2538 if (EmitChecks) { 2539 // if (__kmpc_cancel_barrier()) { 2540 // exit from construct; 2541 // } 2542 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2543 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2544 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2545 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2546 CGF.EmitBlock(ExitBB); 2547 // exit from construct; 2548 CodeGenFunction::JumpDest CancelDestination = 2549 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2550 CGF.EmitBranchThroughCleanup(CancelDestination); 2551 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2552 } 2553 return; 2554 } 2555 } 2556 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2557 CGM.getModule(), OMPRTL___kmpc_barrier), 2558 Args); 2559 } 2560 2561 /// Map the OpenMP loop schedule to the runtime enumeration. 2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2563 bool Chunked, bool Ordered) { 2564 switch (ScheduleKind) { 2565 case OMPC_SCHEDULE_static: 2566 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2567 : (Ordered ? OMP_ord_static : OMP_sch_static); 2568 case OMPC_SCHEDULE_dynamic: 2569 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2570 case OMPC_SCHEDULE_guided: 2571 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2572 case OMPC_SCHEDULE_runtime: 2573 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2574 case OMPC_SCHEDULE_auto: 2575 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2576 case OMPC_SCHEDULE_unknown: 2577 assert(!Chunked && "chunk was specified but schedule kind not known"); 2578 return Ordered ? OMP_ord_static : OMP_sch_static; 2579 } 2580 llvm_unreachable("Unexpected runtime schedule"); 2581 } 2582 2583 /// Map the OpenMP distribute schedule to the runtime enumeration. 2584 static OpenMPSchedType 2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2586 // only static is allowed for dist_schedule 2587 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2588 } 2589 2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2591 bool Chunked) const { 2592 OpenMPSchedType Schedule = 2593 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2594 return Schedule == OMP_sch_static; 2595 } 2596 2597 bool CGOpenMPRuntime::isStaticNonchunked( 2598 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2599 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2600 return Schedule == OMP_dist_sch_static; 2601 } 2602 2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2604 bool Chunked) const { 2605 OpenMPSchedType Schedule = 2606 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2607 return Schedule == OMP_sch_static_chunked; 2608 } 2609 2610 bool CGOpenMPRuntime::isStaticChunked( 2611 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2612 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2613 return Schedule == OMP_dist_sch_static_chunked; 2614 } 2615 2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2619 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2620 return Schedule != OMP_sch_static; 2621 } 2622 2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2624 OpenMPScheduleClauseModifier M1, 2625 OpenMPScheduleClauseModifier M2) { 2626 int Modifier = 0; 2627 switch (M1) { 2628 case OMPC_SCHEDULE_MODIFIER_monotonic: 2629 Modifier = OMP_sch_modifier_monotonic; 2630 break; 2631 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2632 Modifier = OMP_sch_modifier_nonmonotonic; 2633 break; 2634 case OMPC_SCHEDULE_MODIFIER_simd: 2635 if (Schedule == OMP_sch_static_chunked) 2636 Schedule = OMP_sch_static_balanced_chunked; 2637 break; 2638 case OMPC_SCHEDULE_MODIFIER_last: 2639 case OMPC_SCHEDULE_MODIFIER_unknown: 2640 break; 2641 } 2642 switch (M2) { 2643 case OMPC_SCHEDULE_MODIFIER_monotonic: 2644 Modifier = OMP_sch_modifier_monotonic; 2645 break; 2646 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2647 Modifier = OMP_sch_modifier_nonmonotonic; 2648 break; 2649 case OMPC_SCHEDULE_MODIFIER_simd: 2650 if (Schedule == OMP_sch_static_chunked) 2651 Schedule = OMP_sch_static_balanced_chunked; 2652 break; 2653 case OMPC_SCHEDULE_MODIFIER_last: 2654 case OMPC_SCHEDULE_MODIFIER_unknown: 2655 break; 2656 } 2657 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2658 // If the static schedule kind is specified or if the ordered clause is 2659 // specified, and if the nonmonotonic modifier is not specified, the effect is 2660 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2661 // modifier is specified, the effect is as if the nonmonotonic modifier is 2662 // specified. 2663 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2664 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2665 Schedule == OMP_sch_static_balanced_chunked || 2666 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2667 Schedule == OMP_dist_sch_static_chunked || 2668 Schedule == OMP_dist_sch_static)) 2669 Modifier = OMP_sch_modifier_nonmonotonic; 2670 } 2671 return Schedule | Modifier; 2672 } 2673 2674 void CGOpenMPRuntime::emitForDispatchInit( 2675 CodeGenFunction &CGF, SourceLocation Loc, 2676 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2677 bool Ordered, const DispatchRTInput &DispatchValues) { 2678 if (!CGF.HaveInsertPoint()) 2679 return; 2680 OpenMPSchedType Schedule = getRuntimeSchedule( 2681 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2682 assert(Ordered || 2683 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2684 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2685 Schedule != OMP_sch_static_balanced_chunked)); 2686 // Call __kmpc_dispatch_init( 2687 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2688 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2689 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2690 2691 // If the Chunk was not specified in the clause - use default value 1. 2692 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2693 : CGF.Builder.getIntN(IVSize, 1); 2694 llvm::Value *Args[] = { 2695 emitUpdateLocation(CGF, Loc), 2696 getThreadID(CGF, Loc), 2697 CGF.Builder.getInt32(addMonoNonMonoModifier( 2698 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2699 DispatchValues.LB, // Lower 2700 DispatchValues.UB, // Upper 2701 CGF.Builder.getIntN(IVSize, 1), // Stride 2702 Chunk // Chunk 2703 }; 2704 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2705 } 2706 2707 static void emitForStaticInitCall( 2708 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2709 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2710 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2711 const CGOpenMPRuntime::StaticRTInput &Values) { 2712 if (!CGF.HaveInsertPoint()) 2713 return; 2714 2715 assert(!Values.Ordered); 2716 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2717 Schedule == OMP_sch_static_balanced_chunked || 2718 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2719 Schedule == OMP_dist_sch_static || 2720 Schedule == OMP_dist_sch_static_chunked); 2721 2722 // Call __kmpc_for_static_init( 2723 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2724 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2725 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2726 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2727 llvm::Value *Chunk = Values.Chunk; 2728 if (Chunk == nullptr) { 2729 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2730 Schedule == OMP_dist_sch_static) && 2731 "expected static non-chunked schedule"); 2732 // If the Chunk was not specified in the clause - use default value 1. 2733 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2734 } else { 2735 assert((Schedule == OMP_sch_static_chunked || 2736 Schedule == OMP_sch_static_balanced_chunked || 2737 Schedule == OMP_ord_static_chunked || 2738 Schedule == OMP_dist_sch_static_chunked) && 2739 "expected static chunked schedule"); 2740 } 2741 llvm::Value *Args[] = { 2742 UpdateLocation, 2743 ThreadId, 2744 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2745 M2)), // Schedule type 2746 Values.IL.getPointer(), // &isLastIter 2747 Values.LB.getPointer(), // &LB 2748 Values.UB.getPointer(), // &UB 2749 Values.ST.getPointer(), // &Stride 2750 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2751 Chunk // Chunk 2752 }; 2753 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2754 } 2755 2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2757 SourceLocation Loc, 2758 OpenMPDirectiveKind DKind, 2759 const OpenMPScheduleTy &ScheduleKind, 2760 const StaticRTInput &Values) { 2761 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2762 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2763 assert(isOpenMPWorksharingDirective(DKind) && 2764 "Expected loop-based or sections-based directive."); 2765 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2766 isOpenMPLoopDirective(DKind) 2767 ? OMP_IDENT_WORK_LOOP 2768 : OMP_IDENT_WORK_SECTIONS); 2769 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2770 llvm::FunctionCallee StaticInitFunction = 2771 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2772 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2773 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2774 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2775 } 2776 2777 void CGOpenMPRuntime::emitDistributeStaticInit( 2778 CodeGenFunction &CGF, SourceLocation Loc, 2779 OpenMPDistScheduleClauseKind SchedKind, 2780 const CGOpenMPRuntime::StaticRTInput &Values) { 2781 OpenMPSchedType ScheduleNum = 2782 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2783 llvm::Value *UpdatedLocation = 2784 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2785 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2786 llvm::FunctionCallee StaticInitFunction = 2787 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2788 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2789 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2790 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2791 } 2792 2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2794 SourceLocation Loc, 2795 OpenMPDirectiveKind DKind) { 2796 if (!CGF.HaveInsertPoint()) 2797 return; 2798 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2799 llvm::Value *Args[] = { 2800 emitUpdateLocation(CGF, Loc, 2801 isOpenMPDistributeDirective(DKind) 2802 ? OMP_IDENT_WORK_DISTRIBUTE 2803 : isOpenMPLoopDirective(DKind) 2804 ? OMP_IDENT_WORK_LOOP 2805 : OMP_IDENT_WORK_SECTIONS), 2806 getThreadID(CGF, Loc)}; 2807 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2808 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2809 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2810 Args); 2811 } 2812 2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2814 SourceLocation Loc, 2815 unsigned IVSize, 2816 bool IVSigned) { 2817 if (!CGF.HaveInsertPoint()) 2818 return; 2819 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2820 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2821 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2822 } 2823 2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2825 SourceLocation Loc, unsigned IVSize, 2826 bool IVSigned, Address IL, 2827 Address LB, Address UB, 2828 Address ST) { 2829 // Call __kmpc_dispatch_next( 2830 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2831 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2832 // kmp_int[32|64] *p_stride); 2833 llvm::Value *Args[] = { 2834 emitUpdateLocation(CGF, Loc), 2835 getThreadID(CGF, Loc), 2836 IL.getPointer(), // &isLastIter 2837 LB.getPointer(), // &Lower 2838 UB.getPointer(), // &Upper 2839 ST.getPointer() // &Stride 2840 }; 2841 llvm::Value *Call = 2842 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2843 return CGF.EmitScalarConversion( 2844 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2845 CGF.getContext().BoolTy, Loc); 2846 } 2847 2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2849 llvm::Value *NumThreads, 2850 SourceLocation Loc) { 2851 if (!CGF.HaveInsertPoint()) 2852 return; 2853 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2854 llvm::Value *Args[] = { 2855 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2856 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2857 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2858 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2859 Args); 2860 } 2861 2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2863 ProcBindKind ProcBind, 2864 SourceLocation Loc) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2868 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2869 llvm::Value *Args[] = { 2870 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2871 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2872 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2873 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2874 Args); 2875 } 2876 2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2878 SourceLocation Loc, llvm::AtomicOrdering AO) { 2879 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2880 OMPBuilder.CreateFlush(CGF.Builder); 2881 } else { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Build call void __kmpc_flush(ident_t *loc) 2885 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2886 CGM.getModule(), OMPRTL___kmpc_flush), 2887 emitUpdateLocation(CGF, Loc)); 2888 } 2889 } 2890 2891 namespace { 2892 /// Indexes of fields for type kmp_task_t. 2893 enum KmpTaskTFields { 2894 /// List of shared variables. 2895 KmpTaskTShareds, 2896 /// Task routine. 2897 KmpTaskTRoutine, 2898 /// Partition id for the untied tasks. 2899 KmpTaskTPartId, 2900 /// Function with call of destructors for private variables. 2901 Data1, 2902 /// Task priority. 2903 Data2, 2904 /// (Taskloops only) Lower bound. 2905 KmpTaskTLowerBound, 2906 /// (Taskloops only) Upper bound. 2907 KmpTaskTUpperBound, 2908 /// (Taskloops only) Stride. 2909 KmpTaskTStride, 2910 /// (Taskloops only) Is last iteration flag. 2911 KmpTaskTLastIter, 2912 /// (Taskloops only) Reduction data. 2913 KmpTaskTReductions, 2914 }; 2915 } // anonymous namespace 2916 2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2918 return OffloadEntriesTargetRegion.empty() && 2919 OffloadEntriesDeviceGlobalVar.empty(); 2920 } 2921 2922 /// Initialize target region entry. 2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2924 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2925 StringRef ParentName, unsigned LineNum, 2926 unsigned Order) { 2927 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2928 "only required for the device " 2929 "code generation."); 2930 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2931 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2932 OMPTargetRegionEntryTargetRegion); 2933 ++OffloadingEntriesNum; 2934 } 2935 2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2937 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2938 StringRef ParentName, unsigned LineNum, 2939 llvm::Constant *Addr, llvm::Constant *ID, 2940 OMPTargetRegionEntryKind Flags) { 2941 // If we are emitting code for a target, the entry is already initialized, 2942 // only has to be registered. 2943 if (CGM.getLangOpts().OpenMPIsDevice) { 2944 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 2945 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2946 DiagnosticsEngine::Error, 2947 "Unable to find target region on line '%0' in the device code."); 2948 CGM.getDiags().Report(DiagID) << LineNum; 2949 return; 2950 } 2951 auto &Entry = 2952 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2953 assert(Entry.isValid() && "Entry not initialized!"); 2954 Entry.setAddress(Addr); 2955 Entry.setID(ID); 2956 Entry.setFlags(Flags); 2957 } else { 2958 if (Flags == 2959 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2960 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2961 /*IgnoreAddressId*/ true)) 2962 return; 2963 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2964 "Target region entry already registered!"); 2965 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2966 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2967 ++OffloadingEntriesNum; 2968 } 2969 } 2970 2971 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2972 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2973 bool IgnoreAddressId) const { 2974 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2975 if (PerDevice == OffloadEntriesTargetRegion.end()) 2976 return false; 2977 auto PerFile = PerDevice->second.find(FileID); 2978 if (PerFile == PerDevice->second.end()) 2979 return false; 2980 auto PerParentName = PerFile->second.find(ParentName); 2981 if (PerParentName == PerFile->second.end()) 2982 return false; 2983 auto PerLine = PerParentName->second.find(LineNum); 2984 if (PerLine == PerParentName->second.end()) 2985 return false; 2986 // Fail if this entry is already registered. 2987 if (!IgnoreAddressId && 2988 (PerLine->second.getAddress() || PerLine->second.getID())) 2989 return false; 2990 return true; 2991 } 2992 2993 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2994 const OffloadTargetRegionEntryInfoActTy &Action) { 2995 // Scan all target region entries and perform the provided action. 2996 for (const auto &D : OffloadEntriesTargetRegion) 2997 for (const auto &F : D.second) 2998 for (const auto &P : F.second) 2999 for (const auto &L : P.second) 3000 Action(D.first, F.first, P.first(), L.first, L.second); 3001 } 3002 3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3004 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3005 OMPTargetGlobalVarEntryKind Flags, 3006 unsigned Order) { 3007 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3008 "only required for the device " 3009 "code generation."); 3010 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3011 ++OffloadingEntriesNum; 3012 } 3013 3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3015 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3016 CharUnits VarSize, 3017 OMPTargetGlobalVarEntryKind Flags, 3018 llvm::GlobalValue::LinkageTypes Linkage) { 3019 if (CGM.getLangOpts().OpenMPIsDevice) { 3020 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3021 assert(Entry.isValid() && Entry.getFlags() == Flags && 3022 "Entry not initialized!"); 3023 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3024 "Resetting with the new address."); 3025 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3026 if (Entry.getVarSize().isZero()) { 3027 Entry.setVarSize(VarSize); 3028 Entry.setLinkage(Linkage); 3029 } 3030 return; 3031 } 3032 Entry.setVarSize(VarSize); 3033 Entry.setLinkage(Linkage); 3034 Entry.setAddress(Addr); 3035 } else { 3036 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3037 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3038 assert(Entry.isValid() && Entry.getFlags() == Flags && 3039 "Entry not initialized!"); 3040 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3041 "Resetting with the new address."); 3042 if (Entry.getVarSize().isZero()) { 3043 Entry.setVarSize(VarSize); 3044 Entry.setLinkage(Linkage); 3045 } 3046 return; 3047 } 3048 OffloadEntriesDeviceGlobalVar.try_emplace( 3049 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3050 ++OffloadingEntriesNum; 3051 } 3052 } 3053 3054 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3055 actOnDeviceGlobalVarEntriesInfo( 3056 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3057 // Scan all target region entries and perform the provided action. 3058 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3059 Action(E.getKey(), E.getValue()); 3060 } 3061 3062 void CGOpenMPRuntime::createOffloadEntry( 3063 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3064 llvm::GlobalValue::LinkageTypes Linkage) { 3065 StringRef Name = Addr->getName(); 3066 llvm::Module &M = CGM.getModule(); 3067 llvm::LLVMContext &C = M.getContext(); 3068 3069 // Create constant string with the name. 3070 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3071 3072 std::string StringName = getName({"omp_offloading", "entry_name"}); 3073 auto *Str = new llvm::GlobalVariable( 3074 M, StrPtrInit->getType(), /*isConstant=*/true, 3075 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3076 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3077 3078 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3079 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3080 llvm::ConstantInt::get(CGM.SizeTy, Size), 3081 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3082 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3083 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3084 llvm::GlobalVariable *Entry = createGlobalStruct( 3085 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3086 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3087 3088 // The entry has to be created in the section the linker expects it to be. 3089 Entry->setSection("omp_offloading_entries"); 3090 } 3091 3092 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3093 // Emit the offloading entries and metadata so that the device codegen side 3094 // can easily figure out what to emit. The produced metadata looks like 3095 // this: 3096 // 3097 // !omp_offload.info = !{!1, ...} 3098 // 3099 // Right now we only generate metadata for function that contain target 3100 // regions. 3101 3102 // If we are in simd mode or there are no entries, we don't need to do 3103 // anything. 3104 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3105 return; 3106 3107 llvm::Module &M = CGM.getModule(); 3108 llvm::LLVMContext &C = M.getContext(); 3109 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3110 SourceLocation, StringRef>, 3111 16> 3112 OrderedEntries(OffloadEntriesInfoManager.size()); 3113 llvm::SmallVector<StringRef, 16> ParentFunctions( 3114 OffloadEntriesInfoManager.size()); 3115 3116 // Auxiliary methods to create metadata values and strings. 3117 auto &&GetMDInt = [this](unsigned V) { 3118 return llvm::ConstantAsMetadata::get( 3119 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3120 }; 3121 3122 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3123 3124 // Create the offloading info metadata node. 3125 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3126 3127 // Create function that emits metadata for each target region entry; 3128 auto &&TargetRegionMetadataEmitter = 3129 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3130 &GetMDString]( 3131 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3132 unsigned Line, 3133 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3134 // Generate metadata for target regions. Each entry of this metadata 3135 // contains: 3136 // - Entry 0 -> Kind of this type of metadata (0). 3137 // - Entry 1 -> Device ID of the file where the entry was identified. 3138 // - Entry 2 -> File ID of the file where the entry was identified. 3139 // - Entry 3 -> Mangled name of the function where the entry was 3140 // identified. 3141 // - Entry 4 -> Line in the file where the entry was identified. 3142 // - Entry 5 -> Order the entry was created. 3143 // The first element of the metadata node is the kind. 3144 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3145 GetMDInt(FileID), GetMDString(ParentName), 3146 GetMDInt(Line), GetMDInt(E.getOrder())}; 3147 3148 SourceLocation Loc; 3149 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3150 E = CGM.getContext().getSourceManager().fileinfo_end(); 3151 I != E; ++I) { 3152 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3153 I->getFirst()->getUniqueID().getFile() == FileID) { 3154 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3155 I->getFirst(), Line, 1); 3156 break; 3157 } 3158 } 3159 // Save this entry in the right position of the ordered entries array. 3160 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3161 ParentFunctions[E.getOrder()] = ParentName; 3162 3163 // Add metadata to the named metadata node. 3164 MD->addOperand(llvm::MDNode::get(C, Ops)); 3165 }; 3166 3167 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3168 TargetRegionMetadataEmitter); 3169 3170 // Create function that emits metadata for each device global variable entry; 3171 auto &&DeviceGlobalVarMetadataEmitter = 3172 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3173 MD](StringRef MangledName, 3174 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3175 &E) { 3176 // Generate metadata for global variables. Each entry of this metadata 3177 // contains: 3178 // - Entry 0 -> Kind of this type of metadata (1). 3179 // - Entry 1 -> Mangled name of the variable. 3180 // - Entry 2 -> Declare target kind. 3181 // - Entry 3 -> Order the entry was created. 3182 // The first element of the metadata node is the kind. 3183 llvm::Metadata *Ops[] = { 3184 GetMDInt(E.getKind()), GetMDString(MangledName), 3185 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3186 3187 // Save this entry in the right position of the ordered entries array. 3188 OrderedEntries[E.getOrder()] = 3189 std::make_tuple(&E, SourceLocation(), MangledName); 3190 3191 // Add metadata to the named metadata node. 3192 MD->addOperand(llvm::MDNode::get(C, Ops)); 3193 }; 3194 3195 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3196 DeviceGlobalVarMetadataEmitter); 3197 3198 for (const auto &E : OrderedEntries) { 3199 assert(std::get<0>(E) && "All ordered entries must exist!"); 3200 if (const auto *CE = 3201 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3202 std::get<0>(E))) { 3203 if (!CE->getID() || !CE->getAddress()) { 3204 // Do not blame the entry if the parent funtion is not emitted. 3205 StringRef FnName = ParentFunctions[CE->getOrder()]; 3206 if (!CGM.GetGlobalValue(FnName)) 3207 continue; 3208 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3209 DiagnosticsEngine::Error, 3210 "Offloading entry for target region in %0 is incorrect: either the " 3211 "address or the ID is invalid."); 3212 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3213 continue; 3214 } 3215 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3216 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3217 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3218 OffloadEntryInfoDeviceGlobalVar>( 3219 std::get<0>(E))) { 3220 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3221 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3222 CE->getFlags()); 3223 switch (Flags) { 3224 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3225 if (CGM.getLangOpts().OpenMPIsDevice && 3226 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3227 continue; 3228 if (!CE->getAddress()) { 3229 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3230 DiagnosticsEngine::Error, "Offloading entry for declare target " 3231 "variable %0 is incorrect: the " 3232 "address is invalid."); 3233 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3234 continue; 3235 } 3236 // The vaiable has no definition - no need to add the entry. 3237 if (CE->getVarSize().isZero()) 3238 continue; 3239 break; 3240 } 3241 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3242 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3243 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3244 "Declaret target link address is set."); 3245 if (CGM.getLangOpts().OpenMPIsDevice) 3246 continue; 3247 if (!CE->getAddress()) { 3248 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3249 DiagnosticsEngine::Error, 3250 "Offloading entry for declare target variable is incorrect: the " 3251 "address is invalid."); 3252 CGM.getDiags().Report(DiagID); 3253 continue; 3254 } 3255 break; 3256 } 3257 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3258 CE->getVarSize().getQuantity(), Flags, 3259 CE->getLinkage()); 3260 } else { 3261 llvm_unreachable("Unsupported entry kind."); 3262 } 3263 } 3264 } 3265 3266 /// Loads all the offload entries information from the host IR 3267 /// metadata. 3268 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3269 // If we are in target mode, load the metadata from the host IR. This code has 3270 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3271 3272 if (!CGM.getLangOpts().OpenMPIsDevice) 3273 return; 3274 3275 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3276 return; 3277 3278 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3279 if (auto EC = Buf.getError()) { 3280 CGM.getDiags().Report(diag::err_cannot_open_file) 3281 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3282 return; 3283 } 3284 3285 llvm::LLVMContext C; 3286 auto ME = expectedToErrorOrAndEmitErrors( 3287 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3288 3289 if (auto EC = ME.getError()) { 3290 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3291 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3292 CGM.getDiags().Report(DiagID) 3293 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3294 return; 3295 } 3296 3297 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3298 if (!MD) 3299 return; 3300 3301 for (llvm::MDNode *MN : MD->operands()) { 3302 auto &&GetMDInt = [MN](unsigned Idx) { 3303 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3304 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3305 }; 3306 3307 auto &&GetMDString = [MN](unsigned Idx) { 3308 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3309 return V->getString(); 3310 }; 3311 3312 switch (GetMDInt(0)) { 3313 default: 3314 llvm_unreachable("Unexpected metadata!"); 3315 break; 3316 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3317 OffloadingEntryInfoTargetRegion: 3318 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3319 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3320 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3321 /*Order=*/GetMDInt(5)); 3322 break; 3323 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3324 OffloadingEntryInfoDeviceGlobalVar: 3325 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3326 /*MangledName=*/GetMDString(1), 3327 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3328 /*Flags=*/GetMDInt(2)), 3329 /*Order=*/GetMDInt(3)); 3330 break; 3331 } 3332 } 3333 } 3334 3335 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3336 if (!KmpRoutineEntryPtrTy) { 3337 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3338 ASTContext &C = CGM.getContext(); 3339 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3340 FunctionProtoType::ExtProtoInfo EPI; 3341 KmpRoutineEntryPtrQTy = C.getPointerType( 3342 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3343 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3344 } 3345 } 3346 3347 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3348 // Make sure the type of the entry is already created. This is the type we 3349 // have to create: 3350 // struct __tgt_offload_entry{ 3351 // void *addr; // Pointer to the offload entry info. 3352 // // (function or global) 3353 // char *name; // Name of the function or global. 3354 // size_t size; // Size of the entry info (0 if it a function). 3355 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3356 // int32_t reserved; // Reserved, to use by the runtime library. 3357 // }; 3358 if (TgtOffloadEntryQTy.isNull()) { 3359 ASTContext &C = CGM.getContext(); 3360 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3361 RD->startDefinition(); 3362 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3363 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3364 addFieldToRecordDecl(C, RD, C.getSizeType()); 3365 addFieldToRecordDecl( 3366 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3367 addFieldToRecordDecl( 3368 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3369 RD->completeDefinition(); 3370 RD->addAttr(PackedAttr::CreateImplicit(C)); 3371 TgtOffloadEntryQTy = C.getRecordType(RD); 3372 } 3373 return TgtOffloadEntryQTy; 3374 } 3375 3376 namespace { 3377 struct PrivateHelpersTy { 3378 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3379 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3380 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3381 PrivateElemInit(PrivateElemInit) {} 3382 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3383 const Expr *OriginalRef = nullptr; 3384 const VarDecl *Original = nullptr; 3385 const VarDecl *PrivateCopy = nullptr; 3386 const VarDecl *PrivateElemInit = nullptr; 3387 bool isLocalPrivate() const { 3388 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3389 } 3390 }; 3391 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3392 } // anonymous namespace 3393 3394 static bool isAllocatableDecl(const VarDecl *VD) { 3395 const VarDecl *CVD = VD->getCanonicalDecl(); 3396 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3397 return false; 3398 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3399 // Use the default allocation. 3400 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3401 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3402 !AA->getAllocator()); 3403 } 3404 3405 static RecordDecl * 3406 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3407 if (!Privates.empty()) { 3408 ASTContext &C = CGM.getContext(); 3409 // Build struct .kmp_privates_t. { 3410 // /* private vars */ 3411 // }; 3412 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3413 RD->startDefinition(); 3414 for (const auto &Pair : Privates) { 3415 const VarDecl *VD = Pair.second.Original; 3416 QualType Type = VD->getType().getNonReferenceType(); 3417 // If the private variable is a local variable with lvalue ref type, 3418 // allocate the pointer instead of the pointee type. 3419 if (Pair.second.isLocalPrivate()) { 3420 if (VD->getType()->isLValueReferenceType()) 3421 Type = C.getPointerType(Type); 3422 if (isAllocatableDecl(VD)) 3423 Type = C.getPointerType(Type); 3424 } 3425 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3426 if (VD->hasAttrs()) { 3427 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3428 E(VD->getAttrs().end()); 3429 I != E; ++I) 3430 FD->addAttr(*I); 3431 } 3432 } 3433 RD->completeDefinition(); 3434 return RD; 3435 } 3436 return nullptr; 3437 } 3438 3439 static RecordDecl * 3440 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3441 QualType KmpInt32Ty, 3442 QualType KmpRoutineEntryPointerQTy) { 3443 ASTContext &C = CGM.getContext(); 3444 // Build struct kmp_task_t { 3445 // void * shareds; 3446 // kmp_routine_entry_t routine; 3447 // kmp_int32 part_id; 3448 // kmp_cmplrdata_t data1; 3449 // kmp_cmplrdata_t data2; 3450 // For taskloops additional fields: 3451 // kmp_uint64 lb; 3452 // kmp_uint64 ub; 3453 // kmp_int64 st; 3454 // kmp_int32 liter; 3455 // void * reductions; 3456 // }; 3457 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3458 UD->startDefinition(); 3459 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3460 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3461 UD->completeDefinition(); 3462 QualType KmpCmplrdataTy = C.getRecordType(UD); 3463 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3464 RD->startDefinition(); 3465 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3466 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3467 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3468 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3469 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3470 if (isOpenMPTaskLoopDirective(Kind)) { 3471 QualType KmpUInt64Ty = 3472 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3473 QualType KmpInt64Ty = 3474 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3475 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3476 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3477 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3478 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3479 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3480 } 3481 RD->completeDefinition(); 3482 return RD; 3483 } 3484 3485 static RecordDecl * 3486 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3487 ArrayRef<PrivateDataTy> Privates) { 3488 ASTContext &C = CGM.getContext(); 3489 // Build struct kmp_task_t_with_privates { 3490 // kmp_task_t task_data; 3491 // .kmp_privates_t. privates; 3492 // }; 3493 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3494 RD->startDefinition(); 3495 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3496 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3497 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3498 RD->completeDefinition(); 3499 return RD; 3500 } 3501 3502 /// Emit a proxy function which accepts kmp_task_t as the second 3503 /// argument. 3504 /// \code 3505 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3506 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3507 /// For taskloops: 3508 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3509 /// tt->reductions, tt->shareds); 3510 /// return 0; 3511 /// } 3512 /// \endcode 3513 static llvm::Function * 3514 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3515 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3516 QualType KmpTaskTWithPrivatesPtrQTy, 3517 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3518 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3519 llvm::Value *TaskPrivatesMap) { 3520 ASTContext &C = CGM.getContext(); 3521 FunctionArgList Args; 3522 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3523 ImplicitParamDecl::Other); 3524 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3525 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3526 ImplicitParamDecl::Other); 3527 Args.push_back(&GtidArg); 3528 Args.push_back(&TaskTypeArg); 3529 const auto &TaskEntryFnInfo = 3530 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3531 llvm::FunctionType *TaskEntryTy = 3532 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3533 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3534 auto *TaskEntry = llvm::Function::Create( 3535 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3536 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3537 TaskEntry->setDoesNotRecurse(); 3538 CodeGenFunction CGF(CGM); 3539 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3540 Loc, Loc); 3541 3542 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3543 // tt, 3544 // For taskloops: 3545 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3546 // tt->task_data.shareds); 3547 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3548 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3549 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3550 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3551 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3552 const auto *KmpTaskTWithPrivatesQTyRD = 3553 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3554 LValue Base = 3555 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3556 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3557 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3558 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3559 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3560 3561 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3562 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3563 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3564 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3565 CGF.ConvertTypeForMem(SharedsPtrTy)); 3566 3567 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3568 llvm::Value *PrivatesParam; 3569 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3570 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3571 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3572 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3573 } else { 3574 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3575 } 3576 3577 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3578 TaskPrivatesMap, 3579 CGF.Builder 3580 .CreatePointerBitCastOrAddrSpaceCast( 3581 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3582 .getPointer()}; 3583 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3584 std::end(CommonArgs)); 3585 if (isOpenMPTaskLoopDirective(Kind)) { 3586 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3587 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3588 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3589 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3590 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3591 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3592 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3593 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3594 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3595 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3596 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3597 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3598 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3599 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3600 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3601 CallArgs.push_back(LBParam); 3602 CallArgs.push_back(UBParam); 3603 CallArgs.push_back(StParam); 3604 CallArgs.push_back(LIParam); 3605 CallArgs.push_back(RParam); 3606 } 3607 CallArgs.push_back(SharedsParam); 3608 3609 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3610 CallArgs); 3611 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3612 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3613 CGF.FinishFunction(); 3614 return TaskEntry; 3615 } 3616 3617 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3618 SourceLocation Loc, 3619 QualType KmpInt32Ty, 3620 QualType KmpTaskTWithPrivatesPtrQTy, 3621 QualType KmpTaskTWithPrivatesQTy) { 3622 ASTContext &C = CGM.getContext(); 3623 FunctionArgList Args; 3624 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3625 ImplicitParamDecl::Other); 3626 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3627 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3628 ImplicitParamDecl::Other); 3629 Args.push_back(&GtidArg); 3630 Args.push_back(&TaskTypeArg); 3631 const auto &DestructorFnInfo = 3632 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3633 llvm::FunctionType *DestructorFnTy = 3634 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3635 std::string Name = 3636 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3637 auto *DestructorFn = 3638 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3639 Name, &CGM.getModule()); 3640 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3641 DestructorFnInfo); 3642 DestructorFn->setDoesNotRecurse(); 3643 CodeGenFunction CGF(CGM); 3644 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3645 Args, Loc, Loc); 3646 3647 LValue Base = CGF.EmitLoadOfPointerLValue( 3648 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3649 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3650 const auto *KmpTaskTWithPrivatesQTyRD = 3651 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3652 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3653 Base = CGF.EmitLValueForField(Base, *FI); 3654 for (const auto *Field : 3655 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3656 if (QualType::DestructionKind DtorKind = 3657 Field->getType().isDestructedType()) { 3658 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3659 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3660 } 3661 } 3662 CGF.FinishFunction(); 3663 return DestructorFn; 3664 } 3665 3666 /// Emit a privates mapping function for correct handling of private and 3667 /// firstprivate variables. 3668 /// \code 3669 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3670 /// **noalias priv1,..., <tyn> **noalias privn) { 3671 /// *priv1 = &.privates.priv1; 3672 /// ...; 3673 /// *privn = &.privates.privn; 3674 /// } 3675 /// \endcode 3676 static llvm::Value * 3677 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3678 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3679 ArrayRef<PrivateDataTy> Privates) { 3680 ASTContext &C = CGM.getContext(); 3681 FunctionArgList Args; 3682 ImplicitParamDecl TaskPrivatesArg( 3683 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3684 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3685 ImplicitParamDecl::Other); 3686 Args.push_back(&TaskPrivatesArg); 3687 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3688 unsigned Counter = 1; 3689 for (const Expr *E : Data.PrivateVars) { 3690 Args.push_back(ImplicitParamDecl::Create( 3691 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3692 C.getPointerType(C.getPointerType(E->getType())) 3693 .withConst() 3694 .withRestrict(), 3695 ImplicitParamDecl::Other)); 3696 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3697 PrivateVarsPos[VD] = Counter; 3698 ++Counter; 3699 } 3700 for (const Expr *E : Data.FirstprivateVars) { 3701 Args.push_back(ImplicitParamDecl::Create( 3702 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3703 C.getPointerType(C.getPointerType(E->getType())) 3704 .withConst() 3705 .withRestrict(), 3706 ImplicitParamDecl::Other)); 3707 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3708 PrivateVarsPos[VD] = Counter; 3709 ++Counter; 3710 } 3711 for (const Expr *E : Data.LastprivateVars) { 3712 Args.push_back(ImplicitParamDecl::Create( 3713 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3714 C.getPointerType(C.getPointerType(E->getType())) 3715 .withConst() 3716 .withRestrict(), 3717 ImplicitParamDecl::Other)); 3718 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3719 PrivateVarsPos[VD] = Counter; 3720 ++Counter; 3721 } 3722 for (const VarDecl *VD : Data.PrivateLocals) { 3723 QualType Ty = VD->getType().getNonReferenceType(); 3724 if (VD->getType()->isLValueReferenceType()) 3725 Ty = C.getPointerType(Ty); 3726 if (isAllocatableDecl(VD)) 3727 Ty = C.getPointerType(Ty); 3728 Args.push_back(ImplicitParamDecl::Create( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3731 ImplicitParamDecl::Other)); 3732 PrivateVarsPos[VD] = Counter; 3733 ++Counter; 3734 } 3735 const auto &TaskPrivatesMapFnInfo = 3736 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3737 llvm::FunctionType *TaskPrivatesMapTy = 3738 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3739 std::string Name = 3740 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3741 auto *TaskPrivatesMap = llvm::Function::Create( 3742 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3743 &CGM.getModule()); 3744 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3745 TaskPrivatesMapFnInfo); 3746 if (CGM.getLangOpts().Optimize) { 3747 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3748 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3749 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3750 } 3751 CodeGenFunction CGF(CGM); 3752 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3753 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3754 3755 // *privi = &.privates.privi; 3756 LValue Base = CGF.EmitLoadOfPointerLValue( 3757 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3758 TaskPrivatesArg.getType()->castAs<PointerType>()); 3759 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3760 Counter = 0; 3761 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3762 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3763 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3764 LValue RefLVal = 3765 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3766 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3767 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3768 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3769 ++Counter; 3770 } 3771 CGF.FinishFunction(); 3772 return TaskPrivatesMap; 3773 } 3774 3775 /// Emit initialization for private variables in task-based directives. 3776 static void emitPrivatesInit(CodeGenFunction &CGF, 3777 const OMPExecutableDirective &D, 3778 Address KmpTaskSharedsPtr, LValue TDBase, 3779 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3780 QualType SharedsTy, QualType SharedsPtrTy, 3781 const OMPTaskDataTy &Data, 3782 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3783 ASTContext &C = CGF.getContext(); 3784 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3785 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3786 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3787 ? OMPD_taskloop 3788 : OMPD_task; 3789 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3790 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3791 LValue SrcBase; 3792 bool IsTargetTask = 3793 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3794 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3795 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3796 // PointersArray, SizesArray, and MappersArray. The original variables for 3797 // these arrays are not captured and we get their addresses explicitly. 3798 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3799 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3800 SrcBase = CGF.MakeAddrLValue( 3801 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3802 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3803 SharedsTy); 3804 } 3805 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3806 for (const PrivateDataTy &Pair : Privates) { 3807 // Do not initialize private locals. 3808 if (Pair.second.isLocalPrivate()) { 3809 ++FI; 3810 continue; 3811 } 3812 const VarDecl *VD = Pair.second.PrivateCopy; 3813 const Expr *Init = VD->getAnyInitializer(); 3814 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3815 !CGF.isTrivialInitializer(Init)))) { 3816 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3817 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3818 const VarDecl *OriginalVD = Pair.second.Original; 3819 // Check if the variable is the target-based BasePointersArray, 3820 // PointersArray, SizesArray, or MappersArray. 3821 LValue SharedRefLValue; 3822 QualType Type = PrivateLValue.getType(); 3823 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3824 if (IsTargetTask && !SharedField) { 3825 assert(isa<ImplicitParamDecl>(OriginalVD) && 3826 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3827 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3828 ->getNumParams() == 0 && 3829 isa<TranslationUnitDecl>( 3830 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3831 ->getDeclContext()) && 3832 "Expected artificial target data variable."); 3833 SharedRefLValue = 3834 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3835 } else if (ForDup) { 3836 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3837 SharedRefLValue = CGF.MakeAddrLValue( 3838 Address(SharedRefLValue.getPointer(CGF), 3839 C.getDeclAlign(OriginalVD)), 3840 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3841 SharedRefLValue.getTBAAInfo()); 3842 } else if (CGF.LambdaCaptureFields.count( 3843 Pair.second.Original->getCanonicalDecl()) > 0 || 3844 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3845 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3846 } else { 3847 // Processing for implicitly captured variables. 3848 InlinedOpenMPRegionRAII Region( 3849 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3850 /*HasCancel=*/false); 3851 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3852 } 3853 if (Type->isArrayType()) { 3854 // Initialize firstprivate array. 3855 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3856 // Perform simple memcpy. 3857 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3858 } else { 3859 // Initialize firstprivate array using element-by-element 3860 // initialization. 3861 CGF.EmitOMPAggregateAssign( 3862 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3863 Type, 3864 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3865 Address SrcElement) { 3866 // Clean up any temporaries needed by the initialization. 3867 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3868 InitScope.addPrivate( 3869 Elem, [SrcElement]() -> Address { return SrcElement; }); 3870 (void)InitScope.Privatize(); 3871 // Emit initialization for single element. 3872 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3873 CGF, &CapturesInfo); 3874 CGF.EmitAnyExprToMem(Init, DestElement, 3875 Init->getType().getQualifiers(), 3876 /*IsInitializer=*/false); 3877 }); 3878 } 3879 } else { 3880 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3881 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3882 return SharedRefLValue.getAddress(CGF); 3883 }); 3884 (void)InitScope.Privatize(); 3885 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3886 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3887 /*capturedByInit=*/false); 3888 } 3889 } else { 3890 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3891 } 3892 } 3893 ++FI; 3894 } 3895 } 3896 3897 /// Check if duplication function is required for taskloops. 3898 static bool checkInitIsRequired(CodeGenFunction &CGF, 3899 ArrayRef<PrivateDataTy> Privates) { 3900 bool InitRequired = false; 3901 for (const PrivateDataTy &Pair : Privates) { 3902 if (Pair.second.isLocalPrivate()) 3903 continue; 3904 const VarDecl *VD = Pair.second.PrivateCopy; 3905 const Expr *Init = VD->getAnyInitializer(); 3906 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3907 !CGF.isTrivialInitializer(Init)); 3908 if (InitRequired) 3909 break; 3910 } 3911 return InitRequired; 3912 } 3913 3914 3915 /// Emit task_dup function (for initialization of 3916 /// private/firstprivate/lastprivate vars and last_iter flag) 3917 /// \code 3918 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3919 /// lastpriv) { 3920 /// // setup lastprivate flag 3921 /// task_dst->last = lastpriv; 3922 /// // could be constructor calls here... 3923 /// } 3924 /// \endcode 3925 static llvm::Value * 3926 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3927 const OMPExecutableDirective &D, 3928 QualType KmpTaskTWithPrivatesPtrQTy, 3929 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3930 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3931 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3932 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3933 ASTContext &C = CGM.getContext(); 3934 FunctionArgList Args; 3935 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3936 KmpTaskTWithPrivatesPtrQTy, 3937 ImplicitParamDecl::Other); 3938 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3939 KmpTaskTWithPrivatesPtrQTy, 3940 ImplicitParamDecl::Other); 3941 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3942 ImplicitParamDecl::Other); 3943 Args.push_back(&DstArg); 3944 Args.push_back(&SrcArg); 3945 Args.push_back(&LastprivArg); 3946 const auto &TaskDupFnInfo = 3947 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3948 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3949 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3950 auto *TaskDup = llvm::Function::Create( 3951 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3952 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3953 TaskDup->setDoesNotRecurse(); 3954 CodeGenFunction CGF(CGM); 3955 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3956 Loc); 3957 3958 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3959 CGF.GetAddrOfLocalVar(&DstArg), 3960 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3961 // task_dst->liter = lastpriv; 3962 if (WithLastIter) { 3963 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3964 LValue Base = CGF.EmitLValueForField( 3965 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3966 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3967 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3968 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3969 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3970 } 3971 3972 // Emit initial values for private copies (if any). 3973 assert(!Privates.empty()); 3974 Address KmpTaskSharedsPtr = Address::invalid(); 3975 if (!Data.FirstprivateVars.empty()) { 3976 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3977 CGF.GetAddrOfLocalVar(&SrcArg), 3978 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3979 LValue Base = CGF.EmitLValueForField( 3980 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3981 KmpTaskSharedsPtr = Address( 3982 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3983 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3984 KmpTaskTShareds)), 3985 Loc), 3986 CGM.getNaturalTypeAlignment(SharedsTy)); 3987 } 3988 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3989 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3990 CGF.FinishFunction(); 3991 return TaskDup; 3992 } 3993 3994 /// Checks if destructor function is required to be generated. 3995 /// \return true if cleanups are required, false otherwise. 3996 static bool 3997 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3998 ArrayRef<PrivateDataTy> Privates) { 3999 for (const PrivateDataTy &P : Privates) { 4000 if (P.second.isLocalPrivate()) 4001 continue; 4002 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4003 if (Ty.isDestructedType()) 4004 return true; 4005 } 4006 return false; 4007 } 4008 4009 namespace { 4010 /// Loop generator for OpenMP iterator expression. 4011 class OMPIteratorGeneratorScope final 4012 : public CodeGenFunction::OMPPrivateScope { 4013 CodeGenFunction &CGF; 4014 const OMPIteratorExpr *E = nullptr; 4015 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4016 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4017 OMPIteratorGeneratorScope() = delete; 4018 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4019 4020 public: 4021 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4022 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4023 if (!E) 4024 return; 4025 SmallVector<llvm::Value *, 4> Uppers; 4026 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4027 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4028 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4029 addPrivate(VD, [&CGF, VD]() { 4030 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4031 }); 4032 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4033 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4034 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4035 "counter.addr"); 4036 }); 4037 } 4038 Privatize(); 4039 4040 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4041 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4042 LValue CLVal = 4043 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4044 HelperData.CounterVD->getType()); 4045 // Counter = 0; 4046 CGF.EmitStoreOfScalar( 4047 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4048 CLVal); 4049 CodeGenFunction::JumpDest &ContDest = 4050 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4051 CodeGenFunction::JumpDest &ExitDest = 4052 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4053 // N = <number-of_iterations>; 4054 llvm::Value *N = Uppers[I]; 4055 // cont: 4056 // if (Counter < N) goto body; else goto exit; 4057 CGF.EmitBlock(ContDest.getBlock()); 4058 auto *CVal = 4059 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4060 llvm::Value *Cmp = 4061 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4062 ? CGF.Builder.CreateICmpSLT(CVal, N) 4063 : CGF.Builder.CreateICmpULT(CVal, N); 4064 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4065 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4066 // body: 4067 CGF.EmitBlock(BodyBB); 4068 // Iteri = Begini + Counter * Stepi; 4069 CGF.EmitIgnoredExpr(HelperData.Update); 4070 } 4071 } 4072 ~OMPIteratorGeneratorScope() { 4073 if (!E) 4074 return; 4075 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4076 // Counter = Counter + 1; 4077 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4078 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4079 // goto cont; 4080 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4081 // exit: 4082 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4083 } 4084 } 4085 }; 4086 } // namespace 4087 4088 static std::pair<llvm::Value *, llvm::Value *> 4089 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4090 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4091 llvm::Value *Addr; 4092 if (OASE) { 4093 const Expr *Base = OASE->getBase(); 4094 Addr = CGF.EmitScalarExpr(Base); 4095 } else { 4096 Addr = CGF.EmitLValue(E).getPointer(CGF); 4097 } 4098 llvm::Value *SizeVal; 4099 QualType Ty = E->getType(); 4100 if (OASE) { 4101 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4102 for (const Expr *SE : OASE->getDimensions()) { 4103 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4104 Sz = CGF.EmitScalarConversion( 4105 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4106 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4107 } 4108 } else if (const auto *ASE = 4109 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4110 LValue UpAddrLVal = 4111 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4112 llvm::Value *UpAddr = 4113 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4114 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4115 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4116 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4117 } else { 4118 SizeVal = CGF.getTypeSize(Ty); 4119 } 4120 return std::make_pair(Addr, SizeVal); 4121 } 4122 4123 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4124 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4125 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4126 if (KmpTaskAffinityInfoTy.isNull()) { 4127 RecordDecl *KmpAffinityInfoRD = 4128 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4129 KmpAffinityInfoRD->startDefinition(); 4130 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4131 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4132 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4133 KmpAffinityInfoRD->completeDefinition(); 4134 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4135 } 4136 } 4137 4138 CGOpenMPRuntime::TaskResultTy 4139 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4140 const OMPExecutableDirective &D, 4141 llvm::Function *TaskFunction, QualType SharedsTy, 4142 Address Shareds, const OMPTaskDataTy &Data) { 4143 ASTContext &C = CGM.getContext(); 4144 llvm::SmallVector<PrivateDataTy, 4> Privates; 4145 // Aggregate privates and sort them by the alignment. 4146 const auto *I = Data.PrivateCopies.begin(); 4147 for (const Expr *E : Data.PrivateVars) { 4148 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4149 Privates.emplace_back( 4150 C.getDeclAlign(VD), 4151 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4152 /*PrivateElemInit=*/nullptr)); 4153 ++I; 4154 } 4155 I = Data.FirstprivateCopies.begin(); 4156 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4157 for (const Expr *E : Data.FirstprivateVars) { 4158 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4159 Privates.emplace_back( 4160 C.getDeclAlign(VD), 4161 PrivateHelpersTy( 4162 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4163 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4164 ++I; 4165 ++IElemInitRef; 4166 } 4167 I = Data.LastprivateCopies.begin(); 4168 for (const Expr *E : Data.LastprivateVars) { 4169 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4170 Privates.emplace_back( 4171 C.getDeclAlign(VD), 4172 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4173 /*PrivateElemInit=*/nullptr)); 4174 ++I; 4175 } 4176 for (const VarDecl *VD : Data.PrivateLocals) { 4177 if (isAllocatableDecl(VD)) 4178 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4179 else 4180 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4181 } 4182 llvm::stable_sort(Privates, 4183 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4184 return L.first > R.first; 4185 }); 4186 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4187 // Build type kmp_routine_entry_t (if not built yet). 4188 emitKmpRoutineEntryT(KmpInt32Ty); 4189 // Build type kmp_task_t (if not built yet). 4190 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4191 if (SavedKmpTaskloopTQTy.isNull()) { 4192 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4193 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4194 } 4195 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4196 } else { 4197 assert((D.getDirectiveKind() == OMPD_task || 4198 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4199 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4200 "Expected taskloop, task or target directive"); 4201 if (SavedKmpTaskTQTy.isNull()) { 4202 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4203 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4204 } 4205 KmpTaskTQTy = SavedKmpTaskTQTy; 4206 } 4207 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4208 // Build particular struct kmp_task_t for the given task. 4209 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4210 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4211 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4212 QualType KmpTaskTWithPrivatesPtrQTy = 4213 C.getPointerType(KmpTaskTWithPrivatesQTy); 4214 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4215 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4216 KmpTaskTWithPrivatesTy->getPointerTo(); 4217 llvm::Value *KmpTaskTWithPrivatesTySize = 4218 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4219 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4220 4221 // Emit initial values for private copies (if any). 4222 llvm::Value *TaskPrivatesMap = nullptr; 4223 llvm::Type *TaskPrivatesMapTy = 4224 std::next(TaskFunction->arg_begin(), 3)->getType(); 4225 if (!Privates.empty()) { 4226 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4227 TaskPrivatesMap = 4228 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4229 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4230 TaskPrivatesMap, TaskPrivatesMapTy); 4231 } else { 4232 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4233 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4234 } 4235 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4236 // kmp_task_t *tt); 4237 llvm::Function *TaskEntry = emitProxyTaskFunction( 4238 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4239 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4240 TaskPrivatesMap); 4241 4242 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4243 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4244 // kmp_routine_entry_t *task_entry); 4245 // Task flags. Format is taken from 4246 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4247 // description of kmp_tasking_flags struct. 4248 enum { 4249 TiedFlag = 0x1, 4250 FinalFlag = 0x2, 4251 DestructorsFlag = 0x8, 4252 PriorityFlag = 0x20, 4253 DetachableFlag = 0x40, 4254 }; 4255 unsigned Flags = Data.Tied ? TiedFlag : 0; 4256 bool NeedsCleanup = false; 4257 if (!Privates.empty()) { 4258 NeedsCleanup = 4259 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4260 if (NeedsCleanup) 4261 Flags = Flags | DestructorsFlag; 4262 } 4263 if (Data.Priority.getInt()) 4264 Flags = Flags | PriorityFlag; 4265 if (D.hasClausesOfKind<OMPDetachClause>()) 4266 Flags = Flags | DetachableFlag; 4267 llvm::Value *TaskFlags = 4268 Data.Final.getPointer() 4269 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4270 CGF.Builder.getInt32(FinalFlag), 4271 CGF.Builder.getInt32(/*C=*/0)) 4272 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4273 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4274 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4275 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4276 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4277 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4278 TaskEntry, KmpRoutineEntryPtrTy)}; 4279 llvm::Value *NewTask; 4280 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4281 // Check if we have any device clause associated with the directive. 4282 const Expr *Device = nullptr; 4283 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4284 Device = C->getDevice(); 4285 // Emit device ID if any otherwise use default value. 4286 llvm::Value *DeviceID; 4287 if (Device) 4288 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4289 CGF.Int64Ty, /*isSigned=*/true); 4290 else 4291 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4292 AllocArgs.push_back(DeviceID); 4293 NewTask = CGF.EmitRuntimeCall( 4294 OMPBuilder.getOrCreateRuntimeFunction( 4295 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4296 AllocArgs); 4297 } else { 4298 NewTask = 4299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4300 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4301 AllocArgs); 4302 } 4303 // Emit detach clause initialization. 4304 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4305 // task_descriptor); 4306 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4307 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4308 LValue EvtLVal = CGF.EmitLValue(Evt); 4309 4310 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4311 // int gtid, kmp_task_t *task); 4312 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4313 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4314 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4315 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4316 OMPBuilder.getOrCreateRuntimeFunction( 4317 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4318 {Loc, Tid, NewTask}); 4319 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4320 Evt->getExprLoc()); 4321 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4322 } 4323 // Process affinity clauses. 4324 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4325 // Process list of affinity data. 4326 ASTContext &C = CGM.getContext(); 4327 Address AffinitiesArray = Address::invalid(); 4328 // Calculate number of elements to form the array of affinity data. 4329 llvm::Value *NumOfElements = nullptr; 4330 unsigned NumAffinities = 0; 4331 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4332 if (const Expr *Modifier = C->getModifier()) { 4333 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4334 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4335 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4336 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4337 NumOfElements = 4338 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4339 } 4340 } else { 4341 NumAffinities += C->varlist_size(); 4342 } 4343 } 4344 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4345 // Fields ids in kmp_task_affinity_info record. 4346 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4347 4348 QualType KmpTaskAffinityInfoArrayTy; 4349 if (NumOfElements) { 4350 NumOfElements = CGF.Builder.CreateNUWAdd( 4351 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4352 OpaqueValueExpr OVE( 4353 Loc, 4354 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4355 VK_RValue); 4356 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4357 RValue::get(NumOfElements)); 4358 KmpTaskAffinityInfoArrayTy = 4359 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4360 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4361 // Properly emit variable-sized array. 4362 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4363 ImplicitParamDecl::Other); 4364 CGF.EmitVarDecl(*PD); 4365 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4366 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4367 /*isSigned=*/false); 4368 } else { 4369 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4370 KmpTaskAffinityInfoTy, 4371 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4372 ArrayType::Normal, /*IndexTypeQuals=*/0); 4373 AffinitiesArray = 4374 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4375 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4376 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4377 /*isSigned=*/false); 4378 } 4379 4380 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4381 // Fill array by elements without iterators. 4382 unsigned Pos = 0; 4383 bool HasIterator = false; 4384 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4385 if (C->getModifier()) { 4386 HasIterator = true; 4387 continue; 4388 } 4389 for (const Expr *E : C->varlists()) { 4390 llvm::Value *Addr; 4391 llvm::Value *Size; 4392 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4393 LValue Base = 4394 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4395 KmpTaskAffinityInfoTy); 4396 // affs[i].base_addr = &<Affinities[i].second>; 4397 LValue BaseAddrLVal = CGF.EmitLValueForField( 4398 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4399 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4400 BaseAddrLVal); 4401 // affs[i].len = sizeof(<Affinities[i].second>); 4402 LValue LenLVal = CGF.EmitLValueForField( 4403 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4404 CGF.EmitStoreOfScalar(Size, LenLVal); 4405 ++Pos; 4406 } 4407 } 4408 LValue PosLVal; 4409 if (HasIterator) { 4410 PosLVal = CGF.MakeAddrLValue( 4411 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4412 C.getSizeType()); 4413 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4414 } 4415 // Process elements with iterators. 4416 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4417 const Expr *Modifier = C->getModifier(); 4418 if (!Modifier) 4419 continue; 4420 OMPIteratorGeneratorScope IteratorScope( 4421 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4422 for (const Expr *E : C->varlists()) { 4423 llvm::Value *Addr; 4424 llvm::Value *Size; 4425 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4426 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4427 LValue Base = CGF.MakeAddrLValue( 4428 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4429 AffinitiesArray.getAlignment()), 4430 KmpTaskAffinityInfoTy); 4431 // affs[i].base_addr = &<Affinities[i].second>; 4432 LValue BaseAddrLVal = CGF.EmitLValueForField( 4433 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4434 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4435 BaseAddrLVal); 4436 // affs[i].len = sizeof(<Affinities[i].second>); 4437 LValue LenLVal = CGF.EmitLValueForField( 4438 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4439 CGF.EmitStoreOfScalar(Size, LenLVal); 4440 Idx = CGF.Builder.CreateNUWAdd( 4441 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4442 CGF.EmitStoreOfScalar(Idx, PosLVal); 4443 } 4444 } 4445 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4446 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4447 // naffins, kmp_task_affinity_info_t *affin_list); 4448 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4449 llvm::Value *GTid = getThreadID(CGF, Loc); 4450 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4451 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4452 // FIXME: Emit the function and ignore its result for now unless the 4453 // runtime function is properly implemented. 4454 (void)CGF.EmitRuntimeCall( 4455 OMPBuilder.getOrCreateRuntimeFunction( 4456 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4457 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4458 } 4459 llvm::Value *NewTaskNewTaskTTy = 4460 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4461 NewTask, KmpTaskTWithPrivatesPtrTy); 4462 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4463 KmpTaskTWithPrivatesQTy); 4464 LValue TDBase = 4465 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4466 // Fill the data in the resulting kmp_task_t record. 4467 // Copy shareds if there are any. 4468 Address KmpTaskSharedsPtr = Address::invalid(); 4469 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4470 KmpTaskSharedsPtr = 4471 Address(CGF.EmitLoadOfScalar( 4472 CGF.EmitLValueForField( 4473 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4474 KmpTaskTShareds)), 4475 Loc), 4476 CGM.getNaturalTypeAlignment(SharedsTy)); 4477 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4478 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4479 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4480 } 4481 // Emit initial values for private copies (if any). 4482 TaskResultTy Result; 4483 if (!Privates.empty()) { 4484 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4485 SharedsTy, SharedsPtrTy, Data, Privates, 4486 /*ForDup=*/false); 4487 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4488 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4489 Result.TaskDupFn = emitTaskDupFunction( 4490 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4491 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4492 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4493 } 4494 } 4495 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4496 enum { Priority = 0, Destructors = 1 }; 4497 // Provide pointer to function with destructors for privates. 4498 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4499 const RecordDecl *KmpCmplrdataUD = 4500 (*FI)->getType()->getAsUnionType()->getDecl(); 4501 if (NeedsCleanup) { 4502 llvm::Value *DestructorFn = emitDestructorsFunction( 4503 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4504 KmpTaskTWithPrivatesQTy); 4505 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4506 LValue DestructorsLV = CGF.EmitLValueForField( 4507 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4508 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4509 DestructorFn, KmpRoutineEntryPtrTy), 4510 DestructorsLV); 4511 } 4512 // Set priority. 4513 if (Data.Priority.getInt()) { 4514 LValue Data2LV = CGF.EmitLValueForField( 4515 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4516 LValue PriorityLV = CGF.EmitLValueForField( 4517 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4518 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4519 } 4520 Result.NewTask = NewTask; 4521 Result.TaskEntry = TaskEntry; 4522 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4523 Result.TDBase = TDBase; 4524 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4525 return Result; 4526 } 4527 4528 namespace { 4529 /// Dependence kind for RTL. 4530 enum RTLDependenceKindTy { 4531 DepIn = 0x01, 4532 DepInOut = 0x3, 4533 DepMutexInOutSet = 0x4 4534 }; 4535 /// Fields ids in kmp_depend_info record. 4536 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4537 } // namespace 4538 4539 /// Translates internal dependency kind into the runtime kind. 4540 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4541 RTLDependenceKindTy DepKind; 4542 switch (K) { 4543 case OMPC_DEPEND_in: 4544 DepKind = DepIn; 4545 break; 4546 // Out and InOut dependencies must use the same code. 4547 case OMPC_DEPEND_out: 4548 case OMPC_DEPEND_inout: 4549 DepKind = DepInOut; 4550 break; 4551 case OMPC_DEPEND_mutexinoutset: 4552 DepKind = DepMutexInOutSet; 4553 break; 4554 case OMPC_DEPEND_source: 4555 case OMPC_DEPEND_sink: 4556 case OMPC_DEPEND_depobj: 4557 case OMPC_DEPEND_unknown: 4558 llvm_unreachable("Unknown task dependence type"); 4559 } 4560 return DepKind; 4561 } 4562 4563 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4564 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4565 QualType &FlagsTy) { 4566 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4567 if (KmpDependInfoTy.isNull()) { 4568 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4569 KmpDependInfoRD->startDefinition(); 4570 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4571 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4572 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4573 KmpDependInfoRD->completeDefinition(); 4574 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4575 } 4576 } 4577 4578 std::pair<llvm::Value *, LValue> 4579 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4580 SourceLocation Loc) { 4581 ASTContext &C = CGM.getContext(); 4582 QualType FlagsTy; 4583 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4584 RecordDecl *KmpDependInfoRD = 4585 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4586 LValue Base = CGF.EmitLoadOfPointerLValue( 4587 DepobjLVal.getAddress(CGF), 4588 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4589 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4590 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4591 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4592 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4593 Base.getTBAAInfo()); 4594 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4595 Addr.getPointer(), 4596 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4597 LValue NumDepsBase = CGF.MakeAddrLValue( 4598 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4599 Base.getBaseInfo(), Base.getTBAAInfo()); 4600 // NumDeps = deps[i].base_addr; 4601 LValue BaseAddrLVal = CGF.EmitLValueForField( 4602 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4603 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4604 return std::make_pair(NumDeps, Base); 4605 } 4606 4607 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4608 llvm::PointerUnion<unsigned *, LValue *> Pos, 4609 const OMPTaskDataTy::DependData &Data, 4610 Address DependenciesArray) { 4611 CodeGenModule &CGM = CGF.CGM; 4612 ASTContext &C = CGM.getContext(); 4613 QualType FlagsTy; 4614 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4615 RecordDecl *KmpDependInfoRD = 4616 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4617 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4618 4619 OMPIteratorGeneratorScope IteratorScope( 4620 CGF, cast_or_null<OMPIteratorExpr>( 4621 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4622 : nullptr)); 4623 for (const Expr *E : Data.DepExprs) { 4624 llvm::Value *Addr; 4625 llvm::Value *Size; 4626 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4627 LValue Base; 4628 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4629 Base = CGF.MakeAddrLValue( 4630 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4631 } else { 4632 LValue &PosLVal = *Pos.get<LValue *>(); 4633 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4634 Base = CGF.MakeAddrLValue( 4635 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4636 DependenciesArray.getAlignment()), 4637 KmpDependInfoTy); 4638 } 4639 // deps[i].base_addr = &<Dependencies[i].second>; 4640 LValue BaseAddrLVal = CGF.EmitLValueForField( 4641 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4642 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4643 BaseAddrLVal); 4644 // deps[i].len = sizeof(<Dependencies[i].second>); 4645 LValue LenLVal = CGF.EmitLValueForField( 4646 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4647 CGF.EmitStoreOfScalar(Size, LenLVal); 4648 // deps[i].flags = <Dependencies[i].first>; 4649 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4650 LValue FlagsLVal = CGF.EmitLValueForField( 4651 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4652 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4653 FlagsLVal); 4654 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4655 ++(*P); 4656 } else { 4657 LValue &PosLVal = *Pos.get<LValue *>(); 4658 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4659 Idx = CGF.Builder.CreateNUWAdd(Idx, 4660 llvm::ConstantInt::get(Idx->getType(), 1)); 4661 CGF.EmitStoreOfScalar(Idx, PosLVal); 4662 } 4663 } 4664 } 4665 4666 static SmallVector<llvm::Value *, 4> 4667 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4668 const OMPTaskDataTy::DependData &Data) { 4669 assert(Data.DepKind == OMPC_DEPEND_depobj && 4670 "Expected depobj dependecy kind."); 4671 SmallVector<llvm::Value *, 4> Sizes; 4672 SmallVector<LValue, 4> SizeLVals; 4673 ASTContext &C = CGF.getContext(); 4674 QualType FlagsTy; 4675 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4676 RecordDecl *KmpDependInfoRD = 4677 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4678 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4679 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4680 { 4681 OMPIteratorGeneratorScope IteratorScope( 4682 CGF, cast_or_null<OMPIteratorExpr>( 4683 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4684 : nullptr)); 4685 for (const Expr *E : Data.DepExprs) { 4686 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4687 LValue Base = CGF.EmitLoadOfPointerLValue( 4688 DepobjLVal.getAddress(CGF), 4689 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4690 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4691 Base.getAddress(CGF), KmpDependInfoPtrT); 4692 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4693 Base.getTBAAInfo()); 4694 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4695 Addr.getPointer(), 4696 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4697 LValue NumDepsBase = CGF.MakeAddrLValue( 4698 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4699 Base.getBaseInfo(), Base.getTBAAInfo()); 4700 // NumDeps = deps[i].base_addr; 4701 LValue BaseAddrLVal = CGF.EmitLValueForField( 4702 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4703 llvm::Value *NumDeps = 4704 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4705 LValue NumLVal = CGF.MakeAddrLValue( 4706 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4707 C.getUIntPtrType()); 4708 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4709 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4710 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4711 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4712 CGF.EmitStoreOfScalar(Add, NumLVal); 4713 SizeLVals.push_back(NumLVal); 4714 } 4715 } 4716 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4717 llvm::Value *Size = 4718 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4719 Sizes.push_back(Size); 4720 } 4721 return Sizes; 4722 } 4723 4724 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4725 LValue PosLVal, 4726 const OMPTaskDataTy::DependData &Data, 4727 Address DependenciesArray) { 4728 assert(Data.DepKind == OMPC_DEPEND_depobj && 4729 "Expected depobj dependecy kind."); 4730 ASTContext &C = CGF.getContext(); 4731 QualType FlagsTy; 4732 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4733 RecordDecl *KmpDependInfoRD = 4734 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4735 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4736 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4737 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4738 { 4739 OMPIteratorGeneratorScope IteratorScope( 4740 CGF, cast_or_null<OMPIteratorExpr>( 4741 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4742 : nullptr)); 4743 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4744 const Expr *E = Data.DepExprs[I]; 4745 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4746 LValue Base = CGF.EmitLoadOfPointerLValue( 4747 DepobjLVal.getAddress(CGF), 4748 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4749 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4750 Base.getAddress(CGF), KmpDependInfoPtrT); 4751 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4752 Base.getTBAAInfo()); 4753 4754 // Get number of elements in a single depobj. 4755 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4756 Addr.getPointer(), 4757 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4758 LValue NumDepsBase = CGF.MakeAddrLValue( 4759 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4760 Base.getBaseInfo(), Base.getTBAAInfo()); 4761 // NumDeps = deps[i].base_addr; 4762 LValue BaseAddrLVal = CGF.EmitLValueForField( 4763 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4764 llvm::Value *NumDeps = 4765 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4766 4767 // memcopy dependency data. 4768 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4769 ElSize, 4770 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4771 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4772 Address DepAddr = 4773 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4774 DependenciesArray.getAlignment()); 4775 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4776 4777 // Increase pos. 4778 // pos += size; 4779 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4780 CGF.EmitStoreOfScalar(Add, PosLVal); 4781 } 4782 } 4783 } 4784 4785 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4786 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4787 SourceLocation Loc) { 4788 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4789 return D.DepExprs.empty(); 4790 })) 4791 return std::make_pair(nullptr, Address::invalid()); 4792 // Process list of dependencies. 4793 ASTContext &C = CGM.getContext(); 4794 Address DependenciesArray = Address::invalid(); 4795 llvm::Value *NumOfElements = nullptr; 4796 unsigned NumDependencies = std::accumulate( 4797 Dependencies.begin(), Dependencies.end(), 0, 4798 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4799 return D.DepKind == OMPC_DEPEND_depobj 4800 ? V 4801 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4802 }); 4803 QualType FlagsTy; 4804 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4805 bool HasDepobjDeps = false; 4806 bool HasRegularWithIterators = false; 4807 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4808 llvm::Value *NumOfRegularWithIterators = 4809 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4810 // Calculate number of depobj dependecies and regular deps with the iterators. 4811 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4812 if (D.DepKind == OMPC_DEPEND_depobj) { 4813 SmallVector<llvm::Value *, 4> Sizes = 4814 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4815 for (llvm::Value *Size : Sizes) { 4816 NumOfDepobjElements = 4817 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4818 } 4819 HasDepobjDeps = true; 4820 continue; 4821 } 4822 // Include number of iterations, if any. 4823 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4824 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4825 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4826 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4827 NumOfRegularWithIterators = 4828 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4829 } 4830 HasRegularWithIterators = true; 4831 continue; 4832 } 4833 } 4834 4835 QualType KmpDependInfoArrayTy; 4836 if (HasDepobjDeps || HasRegularWithIterators) { 4837 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4838 /*isSigned=*/false); 4839 if (HasDepobjDeps) { 4840 NumOfElements = 4841 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4842 } 4843 if (HasRegularWithIterators) { 4844 NumOfElements = 4845 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4846 } 4847 OpaqueValueExpr OVE(Loc, 4848 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4849 VK_RValue); 4850 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4851 RValue::get(NumOfElements)); 4852 KmpDependInfoArrayTy = 4853 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4854 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4855 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4856 // Properly emit variable-sized array. 4857 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4858 ImplicitParamDecl::Other); 4859 CGF.EmitVarDecl(*PD); 4860 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4861 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4862 /*isSigned=*/false); 4863 } else { 4864 KmpDependInfoArrayTy = C.getConstantArrayType( 4865 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4866 ArrayType::Normal, /*IndexTypeQuals=*/0); 4867 DependenciesArray = 4868 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4869 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4870 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4871 /*isSigned=*/false); 4872 } 4873 unsigned Pos = 0; 4874 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4875 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4876 Dependencies[I].IteratorExpr) 4877 continue; 4878 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4879 DependenciesArray); 4880 } 4881 // Copy regular dependecies with iterators. 4882 LValue PosLVal = CGF.MakeAddrLValue( 4883 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4884 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4885 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4886 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4887 !Dependencies[I].IteratorExpr) 4888 continue; 4889 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4890 DependenciesArray); 4891 } 4892 // Copy final depobj arrays without iterators. 4893 if (HasDepobjDeps) { 4894 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4895 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4896 continue; 4897 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4898 DependenciesArray); 4899 } 4900 } 4901 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4902 DependenciesArray, CGF.VoidPtrTy); 4903 return std::make_pair(NumOfElements, DependenciesArray); 4904 } 4905 4906 Address CGOpenMPRuntime::emitDepobjDependClause( 4907 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4908 SourceLocation Loc) { 4909 if (Dependencies.DepExprs.empty()) 4910 return Address::invalid(); 4911 // Process list of dependencies. 4912 ASTContext &C = CGM.getContext(); 4913 Address DependenciesArray = Address::invalid(); 4914 unsigned NumDependencies = Dependencies.DepExprs.size(); 4915 QualType FlagsTy; 4916 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4917 RecordDecl *KmpDependInfoRD = 4918 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4919 4920 llvm::Value *Size; 4921 // Define type kmp_depend_info[<Dependencies.size()>]; 4922 // For depobj reserve one extra element to store the number of elements. 4923 // It is required to handle depobj(x) update(in) construct. 4924 // kmp_depend_info[<Dependencies.size()>] deps; 4925 llvm::Value *NumDepsVal; 4926 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4927 if (const auto *IE = 4928 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4929 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4930 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4931 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4932 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4933 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4934 } 4935 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4936 NumDepsVal); 4937 CharUnits SizeInBytes = 4938 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4939 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4940 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4941 NumDepsVal = 4942 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4943 } else { 4944 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4945 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4946 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4947 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4948 Size = CGM.getSize(Sz.alignTo(Align)); 4949 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4950 } 4951 // Need to allocate on the dynamic memory. 4952 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4953 // Use default allocator. 4954 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4955 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4956 4957 llvm::Value *Addr = 4958 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4959 CGM.getModule(), OMPRTL___kmpc_alloc), 4960 Args, ".dep.arr.addr"); 4961 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4962 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4963 DependenciesArray = Address(Addr, Align); 4964 // Write number of elements in the first element of array for depobj. 4965 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4966 // deps[i].base_addr = NumDependencies; 4967 LValue BaseAddrLVal = CGF.EmitLValueForField( 4968 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4969 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4970 llvm::PointerUnion<unsigned *, LValue *> Pos; 4971 unsigned Idx = 1; 4972 LValue PosLVal; 4973 if (Dependencies.IteratorExpr) { 4974 PosLVal = CGF.MakeAddrLValue( 4975 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4976 C.getSizeType()); 4977 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4978 /*IsInit=*/true); 4979 Pos = &PosLVal; 4980 } else { 4981 Pos = &Idx; 4982 } 4983 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4984 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4985 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4986 return DependenciesArray; 4987 } 4988 4989 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4990 SourceLocation Loc) { 4991 ASTContext &C = CGM.getContext(); 4992 QualType FlagsTy; 4993 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4994 LValue Base = CGF.EmitLoadOfPointerLValue( 4995 DepobjLVal.getAddress(CGF), 4996 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4997 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4998 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4999 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5000 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5001 Addr.getPointer(), 5002 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5003 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5004 CGF.VoidPtrTy); 5005 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5006 // Use default allocator. 5007 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5008 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5009 5010 // _kmpc_free(gtid, addr, nullptr); 5011 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5012 CGM.getModule(), OMPRTL___kmpc_free), 5013 Args); 5014 } 5015 5016 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5017 OpenMPDependClauseKind NewDepKind, 5018 SourceLocation Loc) { 5019 ASTContext &C = CGM.getContext(); 5020 QualType FlagsTy; 5021 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5022 RecordDecl *KmpDependInfoRD = 5023 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5024 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5025 llvm::Value *NumDeps; 5026 LValue Base; 5027 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5028 5029 Address Begin = Base.getAddress(CGF); 5030 // Cast from pointer to array type to pointer to single element. 5031 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5032 // The basic structure here is a while-do loop. 5033 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5034 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5035 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5036 CGF.EmitBlock(BodyBB); 5037 llvm::PHINode *ElementPHI = 5038 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5039 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5040 Begin = Address(ElementPHI, Begin.getAlignment()); 5041 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5042 Base.getTBAAInfo()); 5043 // deps[i].flags = NewDepKind; 5044 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5045 LValue FlagsLVal = CGF.EmitLValueForField( 5046 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5047 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5048 FlagsLVal); 5049 5050 // Shift the address forward by one element. 5051 Address ElementNext = 5052 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5053 ElementPHI->addIncoming(ElementNext.getPointer(), 5054 CGF.Builder.GetInsertBlock()); 5055 llvm::Value *IsEmpty = 5056 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5057 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5058 // Done. 5059 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5060 } 5061 5062 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5063 const OMPExecutableDirective &D, 5064 llvm::Function *TaskFunction, 5065 QualType SharedsTy, Address Shareds, 5066 const Expr *IfCond, 5067 const OMPTaskDataTy &Data) { 5068 if (!CGF.HaveInsertPoint()) 5069 return; 5070 5071 TaskResultTy Result = 5072 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5073 llvm::Value *NewTask = Result.NewTask; 5074 llvm::Function *TaskEntry = Result.TaskEntry; 5075 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5076 LValue TDBase = Result.TDBase; 5077 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5078 // Process list of dependences. 5079 Address DependenciesArray = Address::invalid(); 5080 llvm::Value *NumOfElements; 5081 std::tie(NumOfElements, DependenciesArray) = 5082 emitDependClause(CGF, Data.Dependences, Loc); 5083 5084 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5085 // libcall. 5086 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5087 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5088 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5089 // list is not empty 5090 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5091 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5092 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5093 llvm::Value *DepTaskArgs[7]; 5094 if (!Data.Dependences.empty()) { 5095 DepTaskArgs[0] = UpLoc; 5096 DepTaskArgs[1] = ThreadID; 5097 DepTaskArgs[2] = NewTask; 5098 DepTaskArgs[3] = NumOfElements; 5099 DepTaskArgs[4] = DependenciesArray.getPointer(); 5100 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5101 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5102 } 5103 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5104 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5105 if (!Data.Tied) { 5106 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5107 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5108 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5109 } 5110 if (!Data.Dependences.empty()) { 5111 CGF.EmitRuntimeCall( 5112 OMPBuilder.getOrCreateRuntimeFunction( 5113 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5114 DepTaskArgs); 5115 } else { 5116 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5117 CGM.getModule(), OMPRTL___kmpc_omp_task), 5118 TaskArgs); 5119 } 5120 // Check if parent region is untied and build return for untied task; 5121 if (auto *Region = 5122 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5123 Region->emitUntiedSwitch(CGF); 5124 }; 5125 5126 llvm::Value *DepWaitTaskArgs[6]; 5127 if (!Data.Dependences.empty()) { 5128 DepWaitTaskArgs[0] = UpLoc; 5129 DepWaitTaskArgs[1] = ThreadID; 5130 DepWaitTaskArgs[2] = NumOfElements; 5131 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5132 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5133 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5134 } 5135 auto &M = CGM.getModule(); 5136 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5137 TaskEntry, &Data, &DepWaitTaskArgs, 5138 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5139 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5140 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5141 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5142 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5143 // is specified. 5144 if (!Data.Dependences.empty()) 5145 CGF.EmitRuntimeCall( 5146 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5147 DepWaitTaskArgs); 5148 // Call proxy_task_entry(gtid, new_task); 5149 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5150 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5151 Action.Enter(CGF); 5152 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5153 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5154 OutlinedFnArgs); 5155 }; 5156 5157 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5158 // kmp_task_t *new_task); 5159 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5160 // kmp_task_t *new_task); 5161 RegionCodeGenTy RCG(CodeGen); 5162 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5163 M, OMPRTL___kmpc_omp_task_begin_if0), 5164 TaskArgs, 5165 OMPBuilder.getOrCreateRuntimeFunction( 5166 M, OMPRTL___kmpc_omp_task_complete_if0), 5167 TaskArgs); 5168 RCG.setAction(Action); 5169 RCG(CGF); 5170 }; 5171 5172 if (IfCond) { 5173 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5174 } else { 5175 RegionCodeGenTy ThenRCG(ThenCodeGen); 5176 ThenRCG(CGF); 5177 } 5178 } 5179 5180 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5181 const OMPLoopDirective &D, 5182 llvm::Function *TaskFunction, 5183 QualType SharedsTy, Address Shareds, 5184 const Expr *IfCond, 5185 const OMPTaskDataTy &Data) { 5186 if (!CGF.HaveInsertPoint()) 5187 return; 5188 TaskResultTy Result = 5189 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5190 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5191 // libcall. 5192 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5193 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5194 // sched, kmp_uint64 grainsize, void *task_dup); 5195 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5196 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5197 llvm::Value *IfVal; 5198 if (IfCond) { 5199 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5200 /*isSigned=*/true); 5201 } else { 5202 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5203 } 5204 5205 LValue LBLVal = CGF.EmitLValueForField( 5206 Result.TDBase, 5207 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5208 const auto *LBVar = 5209 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5210 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5211 LBLVal.getQuals(), 5212 /*IsInitializer=*/true); 5213 LValue UBLVal = CGF.EmitLValueForField( 5214 Result.TDBase, 5215 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5216 const auto *UBVar = 5217 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5218 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5219 UBLVal.getQuals(), 5220 /*IsInitializer=*/true); 5221 LValue StLVal = CGF.EmitLValueForField( 5222 Result.TDBase, 5223 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5224 const auto *StVar = 5225 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5226 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5227 StLVal.getQuals(), 5228 /*IsInitializer=*/true); 5229 // Store reductions address. 5230 LValue RedLVal = CGF.EmitLValueForField( 5231 Result.TDBase, 5232 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5233 if (Data.Reductions) { 5234 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5235 } else { 5236 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5237 CGF.getContext().VoidPtrTy); 5238 } 5239 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5240 llvm::Value *TaskArgs[] = { 5241 UpLoc, 5242 ThreadID, 5243 Result.NewTask, 5244 IfVal, 5245 LBLVal.getPointer(CGF), 5246 UBLVal.getPointer(CGF), 5247 CGF.EmitLoadOfScalar(StLVal, Loc), 5248 llvm::ConstantInt::getSigned( 5249 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5250 llvm::ConstantInt::getSigned( 5251 CGF.IntTy, Data.Schedule.getPointer() 5252 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5253 : NoSchedule), 5254 Data.Schedule.getPointer() 5255 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5256 /*isSigned=*/false) 5257 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5258 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5259 Result.TaskDupFn, CGF.VoidPtrTy) 5260 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5261 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5262 CGM.getModule(), OMPRTL___kmpc_taskloop), 5263 TaskArgs); 5264 } 5265 5266 /// Emit reduction operation for each element of array (required for 5267 /// array sections) LHS op = RHS. 5268 /// \param Type Type of array. 5269 /// \param LHSVar Variable on the left side of the reduction operation 5270 /// (references element of array in original variable). 5271 /// \param RHSVar Variable on the right side of the reduction operation 5272 /// (references element of array in original variable). 5273 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5274 /// RHSVar. 5275 static void EmitOMPAggregateReduction( 5276 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5277 const VarDecl *RHSVar, 5278 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5279 const Expr *, const Expr *)> &RedOpGen, 5280 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5281 const Expr *UpExpr = nullptr) { 5282 // Perform element-by-element initialization. 5283 QualType ElementTy; 5284 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5285 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5286 5287 // Drill down to the base element type on both arrays. 5288 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5289 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5290 5291 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5292 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5293 // Cast from pointer to array type to pointer to single element. 5294 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5295 // The basic structure here is a while-do loop. 5296 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5297 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5298 llvm::Value *IsEmpty = 5299 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5300 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5301 5302 // Enter the loop body, making that address the current address. 5303 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5304 CGF.EmitBlock(BodyBB); 5305 5306 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5307 5308 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5309 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5310 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5311 Address RHSElementCurrent = 5312 Address(RHSElementPHI, 5313 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5314 5315 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5316 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5317 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5318 Address LHSElementCurrent = 5319 Address(LHSElementPHI, 5320 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5321 5322 // Emit copy. 5323 CodeGenFunction::OMPPrivateScope Scope(CGF); 5324 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5325 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5326 Scope.Privatize(); 5327 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5328 Scope.ForceCleanup(); 5329 5330 // Shift the address forward by one element. 5331 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5332 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5333 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5334 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5335 // Check whether we've reached the end. 5336 llvm::Value *Done = 5337 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5338 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5339 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5340 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5341 5342 // Done. 5343 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5344 } 5345 5346 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5347 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5348 /// UDR combiner function. 5349 static void emitReductionCombiner(CodeGenFunction &CGF, 5350 const Expr *ReductionOp) { 5351 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5352 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5353 if (const auto *DRE = 5354 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5355 if (const auto *DRD = 5356 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5357 std::pair<llvm::Function *, llvm::Function *> Reduction = 5358 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5359 RValue Func = RValue::get(Reduction.first); 5360 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5361 CGF.EmitIgnoredExpr(ReductionOp); 5362 return; 5363 } 5364 CGF.EmitIgnoredExpr(ReductionOp); 5365 } 5366 5367 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5368 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5369 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5370 ArrayRef<const Expr *> ReductionOps) { 5371 ASTContext &C = CGM.getContext(); 5372 5373 // void reduction_func(void *LHSArg, void *RHSArg); 5374 FunctionArgList Args; 5375 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5376 ImplicitParamDecl::Other); 5377 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5378 ImplicitParamDecl::Other); 5379 Args.push_back(&LHSArg); 5380 Args.push_back(&RHSArg); 5381 const auto &CGFI = 5382 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5383 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5384 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5385 llvm::GlobalValue::InternalLinkage, Name, 5386 &CGM.getModule()); 5387 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5388 Fn->setDoesNotRecurse(); 5389 CodeGenFunction CGF(CGM); 5390 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5391 5392 // Dst = (void*[n])(LHSArg); 5393 // Src = (void*[n])(RHSArg); 5394 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5395 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5396 ArgsType), CGF.getPointerAlign()); 5397 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5398 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5399 ArgsType), CGF.getPointerAlign()); 5400 5401 // ... 5402 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5403 // ... 5404 CodeGenFunction::OMPPrivateScope Scope(CGF); 5405 auto IPriv = Privates.begin(); 5406 unsigned Idx = 0; 5407 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5408 const auto *RHSVar = 5409 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5410 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5411 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5412 }); 5413 const auto *LHSVar = 5414 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5415 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5416 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5417 }); 5418 QualType PrivTy = (*IPriv)->getType(); 5419 if (PrivTy->isVariablyModifiedType()) { 5420 // Get array size and emit VLA type. 5421 ++Idx; 5422 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5423 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5424 const VariableArrayType *VLA = 5425 CGF.getContext().getAsVariableArrayType(PrivTy); 5426 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5427 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5428 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5429 CGF.EmitVariablyModifiedType(PrivTy); 5430 } 5431 } 5432 Scope.Privatize(); 5433 IPriv = Privates.begin(); 5434 auto ILHS = LHSExprs.begin(); 5435 auto IRHS = RHSExprs.begin(); 5436 for (const Expr *E : ReductionOps) { 5437 if ((*IPriv)->getType()->isArrayType()) { 5438 // Emit reduction for array section. 5439 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5440 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5441 EmitOMPAggregateReduction( 5442 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5443 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5444 emitReductionCombiner(CGF, E); 5445 }); 5446 } else { 5447 // Emit reduction for array subscript or single variable. 5448 emitReductionCombiner(CGF, E); 5449 } 5450 ++IPriv; 5451 ++ILHS; 5452 ++IRHS; 5453 } 5454 Scope.ForceCleanup(); 5455 CGF.FinishFunction(); 5456 return Fn; 5457 } 5458 5459 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5460 const Expr *ReductionOp, 5461 const Expr *PrivateRef, 5462 const DeclRefExpr *LHS, 5463 const DeclRefExpr *RHS) { 5464 if (PrivateRef->getType()->isArrayType()) { 5465 // Emit reduction for array section. 5466 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5467 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5468 EmitOMPAggregateReduction( 5469 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5470 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5471 emitReductionCombiner(CGF, ReductionOp); 5472 }); 5473 } else { 5474 // Emit reduction for array subscript or single variable. 5475 emitReductionCombiner(CGF, ReductionOp); 5476 } 5477 } 5478 5479 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5480 ArrayRef<const Expr *> Privates, 5481 ArrayRef<const Expr *> LHSExprs, 5482 ArrayRef<const Expr *> RHSExprs, 5483 ArrayRef<const Expr *> ReductionOps, 5484 ReductionOptionsTy Options) { 5485 if (!CGF.HaveInsertPoint()) 5486 return; 5487 5488 bool WithNowait = Options.WithNowait; 5489 bool SimpleReduction = Options.SimpleReduction; 5490 5491 // Next code should be emitted for reduction: 5492 // 5493 // static kmp_critical_name lock = { 0 }; 5494 // 5495 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5496 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5497 // ... 5498 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5499 // *(Type<n>-1*)rhs[<n>-1]); 5500 // } 5501 // 5502 // ... 5503 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5504 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5505 // RedList, reduce_func, &<lock>)) { 5506 // case 1: 5507 // ... 5508 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5509 // ... 5510 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5511 // break; 5512 // case 2: 5513 // ... 5514 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5515 // ... 5516 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5517 // break; 5518 // default:; 5519 // } 5520 // 5521 // if SimpleReduction is true, only the next code is generated: 5522 // ... 5523 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5524 // ... 5525 5526 ASTContext &C = CGM.getContext(); 5527 5528 if (SimpleReduction) { 5529 CodeGenFunction::RunCleanupsScope Scope(CGF); 5530 auto IPriv = Privates.begin(); 5531 auto ILHS = LHSExprs.begin(); 5532 auto IRHS = RHSExprs.begin(); 5533 for (const Expr *E : ReductionOps) { 5534 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5535 cast<DeclRefExpr>(*IRHS)); 5536 ++IPriv; 5537 ++ILHS; 5538 ++IRHS; 5539 } 5540 return; 5541 } 5542 5543 // 1. Build a list of reduction variables. 5544 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5545 auto Size = RHSExprs.size(); 5546 for (const Expr *E : Privates) { 5547 if (E->getType()->isVariablyModifiedType()) 5548 // Reserve place for array size. 5549 ++Size; 5550 } 5551 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5552 QualType ReductionArrayTy = 5553 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5554 /*IndexTypeQuals=*/0); 5555 Address ReductionList = 5556 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5557 auto IPriv = Privates.begin(); 5558 unsigned Idx = 0; 5559 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5560 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5561 CGF.Builder.CreateStore( 5562 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5563 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5564 Elem); 5565 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5566 // Store array size. 5567 ++Idx; 5568 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5569 llvm::Value *Size = CGF.Builder.CreateIntCast( 5570 CGF.getVLASize( 5571 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5572 .NumElts, 5573 CGF.SizeTy, /*isSigned=*/false); 5574 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5575 Elem); 5576 } 5577 } 5578 5579 // 2. Emit reduce_func(). 5580 llvm::Function *ReductionFn = emitReductionFunction( 5581 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5582 LHSExprs, RHSExprs, ReductionOps); 5583 5584 // 3. Create static kmp_critical_name lock = { 0 }; 5585 std::string Name = getName({"reduction"}); 5586 llvm::Value *Lock = getCriticalRegionLock(Name); 5587 5588 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5589 // RedList, reduce_func, &<lock>); 5590 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5591 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5592 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5593 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5594 ReductionList.getPointer(), CGF.VoidPtrTy); 5595 llvm::Value *Args[] = { 5596 IdentTLoc, // ident_t *<loc> 5597 ThreadId, // i32 <gtid> 5598 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5599 ReductionArrayTySize, // size_type sizeof(RedList) 5600 RL, // void *RedList 5601 ReductionFn, // void (*) (void *, void *) <reduce_func> 5602 Lock // kmp_critical_name *&<lock> 5603 }; 5604 llvm::Value *Res = CGF.EmitRuntimeCall( 5605 OMPBuilder.getOrCreateRuntimeFunction( 5606 CGM.getModule(), 5607 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5608 Args); 5609 5610 // 5. Build switch(res) 5611 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5612 llvm::SwitchInst *SwInst = 5613 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5614 5615 // 6. Build case 1: 5616 // ... 5617 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5618 // ... 5619 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5620 // break; 5621 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5622 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5623 CGF.EmitBlock(Case1BB); 5624 5625 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5626 llvm::Value *EndArgs[] = { 5627 IdentTLoc, // ident_t *<loc> 5628 ThreadId, // i32 <gtid> 5629 Lock // kmp_critical_name *&<lock> 5630 }; 5631 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5632 CodeGenFunction &CGF, PrePostActionTy &Action) { 5633 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5634 auto IPriv = Privates.begin(); 5635 auto ILHS = LHSExprs.begin(); 5636 auto IRHS = RHSExprs.begin(); 5637 for (const Expr *E : ReductionOps) { 5638 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5639 cast<DeclRefExpr>(*IRHS)); 5640 ++IPriv; 5641 ++ILHS; 5642 ++IRHS; 5643 } 5644 }; 5645 RegionCodeGenTy RCG(CodeGen); 5646 CommonActionTy Action( 5647 nullptr, llvm::None, 5648 OMPBuilder.getOrCreateRuntimeFunction( 5649 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5650 : OMPRTL___kmpc_end_reduce), 5651 EndArgs); 5652 RCG.setAction(Action); 5653 RCG(CGF); 5654 5655 CGF.EmitBranch(DefaultBB); 5656 5657 // 7. Build case 2: 5658 // ... 5659 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5660 // ... 5661 // break; 5662 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5663 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5664 CGF.EmitBlock(Case2BB); 5665 5666 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5667 CodeGenFunction &CGF, PrePostActionTy &Action) { 5668 auto ILHS = LHSExprs.begin(); 5669 auto IRHS = RHSExprs.begin(); 5670 auto IPriv = Privates.begin(); 5671 for (const Expr *E : ReductionOps) { 5672 const Expr *XExpr = nullptr; 5673 const Expr *EExpr = nullptr; 5674 const Expr *UpExpr = nullptr; 5675 BinaryOperatorKind BO = BO_Comma; 5676 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5677 if (BO->getOpcode() == BO_Assign) { 5678 XExpr = BO->getLHS(); 5679 UpExpr = BO->getRHS(); 5680 } 5681 } 5682 // Try to emit update expression as a simple atomic. 5683 const Expr *RHSExpr = UpExpr; 5684 if (RHSExpr) { 5685 // Analyze RHS part of the whole expression. 5686 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5687 RHSExpr->IgnoreParenImpCasts())) { 5688 // If this is a conditional operator, analyze its condition for 5689 // min/max reduction operator. 5690 RHSExpr = ACO->getCond(); 5691 } 5692 if (const auto *BORHS = 5693 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5694 EExpr = BORHS->getRHS(); 5695 BO = BORHS->getOpcode(); 5696 } 5697 } 5698 if (XExpr) { 5699 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5700 auto &&AtomicRedGen = [BO, VD, 5701 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5702 const Expr *EExpr, const Expr *UpExpr) { 5703 LValue X = CGF.EmitLValue(XExpr); 5704 RValue E; 5705 if (EExpr) 5706 E = CGF.EmitAnyExpr(EExpr); 5707 CGF.EmitOMPAtomicSimpleUpdateExpr( 5708 X, E, BO, /*IsXLHSInRHSPart=*/true, 5709 llvm::AtomicOrdering::Monotonic, Loc, 5710 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5711 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5712 PrivateScope.addPrivate( 5713 VD, [&CGF, VD, XRValue, Loc]() { 5714 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5715 CGF.emitOMPSimpleStore( 5716 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5717 VD->getType().getNonReferenceType(), Loc); 5718 return LHSTemp; 5719 }); 5720 (void)PrivateScope.Privatize(); 5721 return CGF.EmitAnyExpr(UpExpr); 5722 }); 5723 }; 5724 if ((*IPriv)->getType()->isArrayType()) { 5725 // Emit atomic reduction for array section. 5726 const auto *RHSVar = 5727 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5728 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5729 AtomicRedGen, XExpr, EExpr, UpExpr); 5730 } else { 5731 // Emit atomic reduction for array subscript or single variable. 5732 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5733 } 5734 } else { 5735 // Emit as a critical region. 5736 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5737 const Expr *, const Expr *) { 5738 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5739 std::string Name = RT.getName({"atomic_reduction"}); 5740 RT.emitCriticalRegion( 5741 CGF, Name, 5742 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5743 Action.Enter(CGF); 5744 emitReductionCombiner(CGF, E); 5745 }, 5746 Loc); 5747 }; 5748 if ((*IPriv)->getType()->isArrayType()) { 5749 const auto *LHSVar = 5750 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5751 const auto *RHSVar = 5752 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5753 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5754 CritRedGen); 5755 } else { 5756 CritRedGen(CGF, nullptr, nullptr, nullptr); 5757 } 5758 } 5759 ++ILHS; 5760 ++IRHS; 5761 ++IPriv; 5762 } 5763 }; 5764 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5765 if (!WithNowait) { 5766 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5767 llvm::Value *EndArgs[] = { 5768 IdentTLoc, // ident_t *<loc> 5769 ThreadId, // i32 <gtid> 5770 Lock // kmp_critical_name *&<lock> 5771 }; 5772 CommonActionTy Action(nullptr, llvm::None, 5773 OMPBuilder.getOrCreateRuntimeFunction( 5774 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5775 EndArgs); 5776 AtomicRCG.setAction(Action); 5777 AtomicRCG(CGF); 5778 } else { 5779 AtomicRCG(CGF); 5780 } 5781 5782 CGF.EmitBranch(DefaultBB); 5783 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5784 } 5785 5786 /// Generates unique name for artificial threadprivate variables. 5787 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5788 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5789 const Expr *Ref) { 5790 SmallString<256> Buffer; 5791 llvm::raw_svector_ostream Out(Buffer); 5792 const clang::DeclRefExpr *DE; 5793 const VarDecl *D = ::getBaseDecl(Ref, DE); 5794 if (!D) 5795 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5796 D = D->getCanonicalDecl(); 5797 std::string Name = CGM.getOpenMPRuntime().getName( 5798 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5799 Out << Prefix << Name << "_" 5800 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5801 return std::string(Out.str()); 5802 } 5803 5804 /// Emits reduction initializer function: 5805 /// \code 5806 /// void @.red_init(void* %arg, void* %orig) { 5807 /// %0 = bitcast void* %arg to <type>* 5808 /// store <type> <init>, <type>* %0 5809 /// ret void 5810 /// } 5811 /// \endcode 5812 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5813 SourceLocation Loc, 5814 ReductionCodeGen &RCG, unsigned N) { 5815 ASTContext &C = CGM.getContext(); 5816 QualType VoidPtrTy = C.VoidPtrTy; 5817 VoidPtrTy.addRestrict(); 5818 FunctionArgList Args; 5819 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5820 ImplicitParamDecl::Other); 5821 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5822 ImplicitParamDecl::Other); 5823 Args.emplace_back(&Param); 5824 Args.emplace_back(&ParamOrig); 5825 const auto &FnInfo = 5826 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5827 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5828 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5829 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5830 Name, &CGM.getModule()); 5831 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5832 Fn->setDoesNotRecurse(); 5833 CodeGenFunction CGF(CGM); 5834 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5835 Address PrivateAddr = CGF.EmitLoadOfPointer( 5836 CGF.GetAddrOfLocalVar(&Param), 5837 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5838 llvm::Value *Size = nullptr; 5839 // If the size of the reduction item is non-constant, load it from global 5840 // threadprivate variable. 5841 if (RCG.getSizes(N).second) { 5842 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5843 CGF, CGM.getContext().getSizeType(), 5844 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5845 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5846 CGM.getContext().getSizeType(), Loc); 5847 } 5848 RCG.emitAggregateType(CGF, N, Size); 5849 LValue OrigLVal; 5850 // If initializer uses initializer from declare reduction construct, emit a 5851 // pointer to the address of the original reduction item (reuired by reduction 5852 // initializer) 5853 if (RCG.usesReductionInitializer(N)) { 5854 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5855 SharedAddr = CGF.EmitLoadOfPointer( 5856 SharedAddr, 5857 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5858 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5859 } else { 5860 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5861 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5862 CGM.getContext().VoidPtrTy); 5863 } 5864 // Emit the initializer: 5865 // %0 = bitcast void* %arg to <type>* 5866 // store <type> <init>, <type>* %0 5867 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5868 [](CodeGenFunction &) { return false; }); 5869 CGF.FinishFunction(); 5870 return Fn; 5871 } 5872 5873 /// Emits reduction combiner function: 5874 /// \code 5875 /// void @.red_comb(void* %arg0, void* %arg1) { 5876 /// %lhs = bitcast void* %arg0 to <type>* 5877 /// %rhs = bitcast void* %arg1 to <type>* 5878 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5879 /// store <type> %2, <type>* %lhs 5880 /// ret void 5881 /// } 5882 /// \endcode 5883 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5884 SourceLocation Loc, 5885 ReductionCodeGen &RCG, unsigned N, 5886 const Expr *ReductionOp, 5887 const Expr *LHS, const Expr *RHS, 5888 const Expr *PrivateRef) { 5889 ASTContext &C = CGM.getContext(); 5890 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5891 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5892 FunctionArgList Args; 5893 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5894 C.VoidPtrTy, ImplicitParamDecl::Other); 5895 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5896 ImplicitParamDecl::Other); 5897 Args.emplace_back(&ParamInOut); 5898 Args.emplace_back(&ParamIn); 5899 const auto &FnInfo = 5900 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5901 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5902 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5903 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5904 Name, &CGM.getModule()); 5905 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5906 Fn->setDoesNotRecurse(); 5907 CodeGenFunction CGF(CGM); 5908 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5909 llvm::Value *Size = nullptr; 5910 // If the size of the reduction item is non-constant, load it from global 5911 // threadprivate variable. 5912 if (RCG.getSizes(N).second) { 5913 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5914 CGF, CGM.getContext().getSizeType(), 5915 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5916 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5917 CGM.getContext().getSizeType(), Loc); 5918 } 5919 RCG.emitAggregateType(CGF, N, Size); 5920 // Remap lhs and rhs variables to the addresses of the function arguments. 5921 // %lhs = bitcast void* %arg0 to <type>* 5922 // %rhs = bitcast void* %arg1 to <type>* 5923 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5924 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5925 // Pull out the pointer to the variable. 5926 Address PtrAddr = CGF.EmitLoadOfPointer( 5927 CGF.GetAddrOfLocalVar(&ParamInOut), 5928 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5929 return CGF.Builder.CreateElementBitCast( 5930 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5931 }); 5932 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5933 // Pull out the pointer to the variable. 5934 Address PtrAddr = CGF.EmitLoadOfPointer( 5935 CGF.GetAddrOfLocalVar(&ParamIn), 5936 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5937 return CGF.Builder.CreateElementBitCast( 5938 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5939 }); 5940 PrivateScope.Privatize(); 5941 // Emit the combiner body: 5942 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5943 // store <type> %2, <type>* %lhs 5944 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5945 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5946 cast<DeclRefExpr>(RHS)); 5947 CGF.FinishFunction(); 5948 return Fn; 5949 } 5950 5951 /// Emits reduction finalizer function: 5952 /// \code 5953 /// void @.red_fini(void* %arg) { 5954 /// %0 = bitcast void* %arg to <type>* 5955 /// <destroy>(<type>* %0) 5956 /// ret void 5957 /// } 5958 /// \endcode 5959 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5960 SourceLocation Loc, 5961 ReductionCodeGen &RCG, unsigned N) { 5962 if (!RCG.needCleanups(N)) 5963 return nullptr; 5964 ASTContext &C = CGM.getContext(); 5965 FunctionArgList Args; 5966 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5967 ImplicitParamDecl::Other); 5968 Args.emplace_back(&Param); 5969 const auto &FnInfo = 5970 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5971 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5972 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5973 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5974 Name, &CGM.getModule()); 5975 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5976 Fn->setDoesNotRecurse(); 5977 CodeGenFunction CGF(CGM); 5978 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5979 Address PrivateAddr = CGF.EmitLoadOfPointer( 5980 CGF.GetAddrOfLocalVar(&Param), 5981 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5982 llvm::Value *Size = nullptr; 5983 // If the size of the reduction item is non-constant, load it from global 5984 // threadprivate variable. 5985 if (RCG.getSizes(N).second) { 5986 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5987 CGF, CGM.getContext().getSizeType(), 5988 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5989 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5990 CGM.getContext().getSizeType(), Loc); 5991 } 5992 RCG.emitAggregateType(CGF, N, Size); 5993 // Emit the finalizer body: 5994 // <destroy>(<type>* %0) 5995 RCG.emitCleanups(CGF, N, PrivateAddr); 5996 CGF.FinishFunction(Loc); 5997 return Fn; 5998 } 5999 6000 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6001 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6002 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6003 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6004 return nullptr; 6005 6006 // Build typedef struct: 6007 // kmp_taskred_input { 6008 // void *reduce_shar; // shared reduction item 6009 // void *reduce_orig; // original reduction item used for initialization 6010 // size_t reduce_size; // size of data item 6011 // void *reduce_init; // data initialization routine 6012 // void *reduce_fini; // data finalization routine 6013 // void *reduce_comb; // data combiner routine 6014 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6015 // } kmp_taskred_input_t; 6016 ASTContext &C = CGM.getContext(); 6017 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6018 RD->startDefinition(); 6019 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6020 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6021 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6022 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6023 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6024 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6025 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6026 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6027 RD->completeDefinition(); 6028 QualType RDType = C.getRecordType(RD); 6029 unsigned Size = Data.ReductionVars.size(); 6030 llvm::APInt ArraySize(/*numBits=*/64, Size); 6031 QualType ArrayRDType = C.getConstantArrayType( 6032 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6033 // kmp_task_red_input_t .rd_input.[Size]; 6034 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6035 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6036 Data.ReductionCopies, Data.ReductionOps); 6037 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6038 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6039 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6040 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6041 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6042 TaskRedInput.getPointer(), Idxs, 6043 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6044 ".rd_input.gep."); 6045 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6046 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6047 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6048 RCG.emitSharedOrigLValue(CGF, Cnt); 6049 llvm::Value *CastedShared = 6050 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6051 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6052 // ElemLVal.reduce_orig = &Origs[Cnt]; 6053 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6054 llvm::Value *CastedOrig = 6055 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6056 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6057 RCG.emitAggregateType(CGF, Cnt); 6058 llvm::Value *SizeValInChars; 6059 llvm::Value *SizeVal; 6060 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6061 // We use delayed creation/initialization for VLAs and array sections. It is 6062 // required because runtime does not provide the way to pass the sizes of 6063 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6064 // threadprivate global variables are used to store these values and use 6065 // them in the functions. 6066 bool DelayedCreation = !!SizeVal; 6067 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6068 /*isSigned=*/false); 6069 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6070 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6071 // ElemLVal.reduce_init = init; 6072 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6073 llvm::Value *InitAddr = 6074 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6075 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6076 // ElemLVal.reduce_fini = fini; 6077 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6078 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6079 llvm::Value *FiniAddr = Fini 6080 ? CGF.EmitCastToVoidPtr(Fini) 6081 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6082 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6083 // ElemLVal.reduce_comb = comb; 6084 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6085 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6086 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6087 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6088 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6089 // ElemLVal.flags = 0; 6090 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6091 if (DelayedCreation) { 6092 CGF.EmitStoreOfScalar( 6093 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6094 FlagsLVal); 6095 } else 6096 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6097 FlagsLVal.getType()); 6098 } 6099 if (Data.IsReductionWithTaskMod) { 6100 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6101 // is_ws, int num, void *data); 6102 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6103 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6104 CGM.IntTy, /*isSigned=*/true); 6105 llvm::Value *Args[] = { 6106 IdentTLoc, GTid, 6107 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6108 /*isSigned=*/true), 6109 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6110 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6111 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6112 return CGF.EmitRuntimeCall( 6113 OMPBuilder.getOrCreateRuntimeFunction( 6114 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6115 Args); 6116 } 6117 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6118 llvm::Value *Args[] = { 6119 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6120 /*isSigned=*/true), 6121 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6122 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6123 CGM.VoidPtrTy)}; 6124 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6125 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6126 Args); 6127 } 6128 6129 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6130 SourceLocation Loc, 6131 bool IsWorksharingReduction) { 6132 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6133 // is_ws, int num, void *data); 6134 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6135 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6136 CGM.IntTy, /*isSigned=*/true); 6137 llvm::Value *Args[] = {IdentTLoc, GTid, 6138 llvm::ConstantInt::get(CGM.IntTy, 6139 IsWorksharingReduction ? 1 : 0, 6140 /*isSigned=*/true)}; 6141 (void)CGF.EmitRuntimeCall( 6142 OMPBuilder.getOrCreateRuntimeFunction( 6143 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6144 Args); 6145 } 6146 6147 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6148 SourceLocation Loc, 6149 ReductionCodeGen &RCG, 6150 unsigned N) { 6151 auto Sizes = RCG.getSizes(N); 6152 // Emit threadprivate global variable if the type is non-constant 6153 // (Sizes.second = nullptr). 6154 if (Sizes.second) { 6155 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6156 /*isSigned=*/false); 6157 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6158 CGF, CGM.getContext().getSizeType(), 6159 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6160 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6161 } 6162 } 6163 6164 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6165 SourceLocation Loc, 6166 llvm::Value *ReductionsPtr, 6167 LValue SharedLVal) { 6168 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6169 // *d); 6170 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6171 CGM.IntTy, 6172 /*isSigned=*/true), 6173 ReductionsPtr, 6174 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6175 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6176 return Address( 6177 CGF.EmitRuntimeCall( 6178 OMPBuilder.getOrCreateRuntimeFunction( 6179 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6180 Args), 6181 SharedLVal.getAlignment()); 6182 } 6183 6184 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6185 SourceLocation Loc) { 6186 if (!CGF.HaveInsertPoint()) 6187 return; 6188 6189 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6190 OMPBuilder.CreateTaskwait(CGF.Builder); 6191 } else { 6192 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6193 // global_tid); 6194 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6195 // Ignore return result until untied tasks are supported. 6196 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6197 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6198 Args); 6199 } 6200 6201 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6202 Region->emitUntiedSwitch(CGF); 6203 } 6204 6205 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6206 OpenMPDirectiveKind InnerKind, 6207 const RegionCodeGenTy &CodeGen, 6208 bool HasCancel) { 6209 if (!CGF.HaveInsertPoint()) 6210 return; 6211 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6212 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6213 } 6214 6215 namespace { 6216 enum RTCancelKind { 6217 CancelNoreq = 0, 6218 CancelParallel = 1, 6219 CancelLoop = 2, 6220 CancelSections = 3, 6221 CancelTaskgroup = 4 6222 }; 6223 } // anonymous namespace 6224 6225 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6226 RTCancelKind CancelKind = CancelNoreq; 6227 if (CancelRegion == OMPD_parallel) 6228 CancelKind = CancelParallel; 6229 else if (CancelRegion == OMPD_for) 6230 CancelKind = CancelLoop; 6231 else if (CancelRegion == OMPD_sections) 6232 CancelKind = CancelSections; 6233 else { 6234 assert(CancelRegion == OMPD_taskgroup); 6235 CancelKind = CancelTaskgroup; 6236 } 6237 return CancelKind; 6238 } 6239 6240 void CGOpenMPRuntime::emitCancellationPointCall( 6241 CodeGenFunction &CGF, SourceLocation Loc, 6242 OpenMPDirectiveKind CancelRegion) { 6243 if (!CGF.HaveInsertPoint()) 6244 return; 6245 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6246 // global_tid, kmp_int32 cncl_kind); 6247 if (auto *OMPRegionInfo = 6248 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6249 // For 'cancellation point taskgroup', the task region info may not have a 6250 // cancel. This may instead happen in another adjacent task. 6251 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6252 llvm::Value *Args[] = { 6253 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6254 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6255 // Ignore return result until untied tasks are supported. 6256 llvm::Value *Result = CGF.EmitRuntimeCall( 6257 OMPBuilder.getOrCreateRuntimeFunction( 6258 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6259 Args); 6260 // if (__kmpc_cancellationpoint()) { 6261 // exit from construct; 6262 // } 6263 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6264 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6265 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6266 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6267 CGF.EmitBlock(ExitBB); 6268 // exit from construct; 6269 CodeGenFunction::JumpDest CancelDest = 6270 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6271 CGF.EmitBranchThroughCleanup(CancelDest); 6272 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6273 } 6274 } 6275 } 6276 6277 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6278 const Expr *IfCond, 6279 OpenMPDirectiveKind CancelRegion) { 6280 if (!CGF.HaveInsertPoint()) 6281 return; 6282 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6283 // kmp_int32 cncl_kind); 6284 auto &M = CGM.getModule(); 6285 if (auto *OMPRegionInfo = 6286 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6287 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6288 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6289 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6290 llvm::Value *Args[] = { 6291 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6292 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6293 // Ignore return result until untied tasks are supported. 6294 llvm::Value *Result = CGF.EmitRuntimeCall( 6295 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6296 // if (__kmpc_cancel()) { 6297 // exit from construct; 6298 // } 6299 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6300 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6301 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6302 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6303 CGF.EmitBlock(ExitBB); 6304 // exit from construct; 6305 CodeGenFunction::JumpDest CancelDest = 6306 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6307 CGF.EmitBranchThroughCleanup(CancelDest); 6308 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6309 }; 6310 if (IfCond) { 6311 emitIfClause(CGF, IfCond, ThenGen, 6312 [](CodeGenFunction &, PrePostActionTy &) {}); 6313 } else { 6314 RegionCodeGenTy ThenRCG(ThenGen); 6315 ThenRCG(CGF); 6316 } 6317 } 6318 } 6319 6320 namespace { 6321 /// Cleanup action for uses_allocators support. 6322 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6323 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6324 6325 public: 6326 OMPUsesAllocatorsActionTy( 6327 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6328 : Allocators(Allocators) {} 6329 void Enter(CodeGenFunction &CGF) override { 6330 if (!CGF.HaveInsertPoint()) 6331 return; 6332 for (const auto &AllocatorData : Allocators) { 6333 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6334 CGF, AllocatorData.first, AllocatorData.second); 6335 } 6336 } 6337 void Exit(CodeGenFunction &CGF) override { 6338 if (!CGF.HaveInsertPoint()) 6339 return; 6340 for (const auto &AllocatorData : Allocators) { 6341 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6342 AllocatorData.first); 6343 } 6344 } 6345 }; 6346 } // namespace 6347 6348 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6349 const OMPExecutableDirective &D, StringRef ParentName, 6350 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6351 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6352 assert(!ParentName.empty() && "Invalid target region parent name!"); 6353 HasEmittedTargetRegion = true; 6354 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6355 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6356 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6357 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6358 if (!D.AllocatorTraits) 6359 continue; 6360 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6361 } 6362 } 6363 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6364 CodeGen.setAction(UsesAllocatorAction); 6365 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6366 IsOffloadEntry, CodeGen); 6367 } 6368 6369 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6370 const Expr *Allocator, 6371 const Expr *AllocatorTraits) { 6372 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6373 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6374 // Use default memspace handle. 6375 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6376 llvm::Value *NumTraits = llvm::ConstantInt::get( 6377 CGF.IntTy, cast<ConstantArrayType>( 6378 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6379 ->getSize() 6380 .getLimitedValue()); 6381 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6382 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6383 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6384 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6385 AllocatorTraitsLVal.getBaseInfo(), 6386 AllocatorTraitsLVal.getTBAAInfo()); 6387 llvm::Value *Traits = 6388 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6389 6390 llvm::Value *AllocatorVal = 6391 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6392 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6393 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6394 // Store to allocator. 6395 CGF.EmitVarDecl(*cast<VarDecl>( 6396 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6397 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6398 AllocatorVal = 6399 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6400 Allocator->getType(), Allocator->getExprLoc()); 6401 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6402 } 6403 6404 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6405 const Expr *Allocator) { 6406 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6407 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6408 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6409 llvm::Value *AllocatorVal = 6410 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6411 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6412 CGF.getContext().VoidPtrTy, 6413 Allocator->getExprLoc()); 6414 (void)CGF.EmitRuntimeCall( 6415 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6416 OMPRTL___kmpc_destroy_allocator), 6417 {ThreadId, AllocatorVal}); 6418 } 6419 6420 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6421 const OMPExecutableDirective &D, StringRef ParentName, 6422 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6423 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6424 // Create a unique name for the entry function using the source location 6425 // information of the current target region. The name will be something like: 6426 // 6427 // __omp_offloading_DD_FFFF_PP_lBB 6428 // 6429 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6430 // mangled name of the function that encloses the target region and BB is the 6431 // line number of the target region. 6432 6433 unsigned DeviceID; 6434 unsigned FileID; 6435 unsigned Line; 6436 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6437 Line); 6438 SmallString<64> EntryFnName; 6439 { 6440 llvm::raw_svector_ostream OS(EntryFnName); 6441 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6442 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6443 } 6444 6445 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6446 6447 CodeGenFunction CGF(CGM, true); 6448 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6449 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6450 6451 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6452 6453 // If this target outline function is not an offload entry, we don't need to 6454 // register it. 6455 if (!IsOffloadEntry) 6456 return; 6457 6458 // The target region ID is used by the runtime library to identify the current 6459 // target region, so it only has to be unique and not necessarily point to 6460 // anything. It could be the pointer to the outlined function that implements 6461 // the target region, but we aren't using that so that the compiler doesn't 6462 // need to keep that, and could therefore inline the host function if proven 6463 // worthwhile during optimization. In the other hand, if emitting code for the 6464 // device, the ID has to be the function address so that it can retrieved from 6465 // the offloading entry and launched by the runtime library. We also mark the 6466 // outlined function to have external linkage in case we are emitting code for 6467 // the device, because these functions will be entry points to the device. 6468 6469 if (CGM.getLangOpts().OpenMPIsDevice) { 6470 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6471 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6472 OutlinedFn->setDSOLocal(false); 6473 } else { 6474 std::string Name = getName({EntryFnName, "region_id"}); 6475 OutlinedFnID = new llvm::GlobalVariable( 6476 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6477 llvm::GlobalValue::WeakAnyLinkage, 6478 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6479 } 6480 6481 // Register the information for the entry associated with this target region. 6482 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6483 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6484 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6485 } 6486 6487 /// Checks if the expression is constant or does not have non-trivial function 6488 /// calls. 6489 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6490 // We can skip constant expressions. 6491 // We can skip expressions with trivial calls or simple expressions. 6492 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6493 !E->hasNonTrivialCall(Ctx)) && 6494 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6495 } 6496 6497 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6498 const Stmt *Body) { 6499 const Stmt *Child = Body->IgnoreContainers(); 6500 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6501 Child = nullptr; 6502 for (const Stmt *S : C->body()) { 6503 if (const auto *E = dyn_cast<Expr>(S)) { 6504 if (isTrivial(Ctx, E)) 6505 continue; 6506 } 6507 // Some of the statements can be ignored. 6508 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6509 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6510 continue; 6511 // Analyze declarations. 6512 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6513 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6514 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6515 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6516 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6517 isa<UsingDirectiveDecl>(D) || 6518 isa<OMPDeclareReductionDecl>(D) || 6519 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6520 return true; 6521 const auto *VD = dyn_cast<VarDecl>(D); 6522 if (!VD) 6523 return false; 6524 return VD->isConstexpr() || 6525 ((VD->getType().isTrivialType(Ctx) || 6526 VD->getType()->isReferenceType()) && 6527 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6528 })) 6529 continue; 6530 } 6531 // Found multiple children - cannot get the one child only. 6532 if (Child) 6533 return nullptr; 6534 Child = S; 6535 } 6536 if (Child) 6537 Child = Child->IgnoreContainers(); 6538 } 6539 return Child; 6540 } 6541 6542 /// Emit the number of teams for a target directive. Inspect the num_teams 6543 /// clause associated with a teams construct combined or closely nested 6544 /// with the target directive. 6545 /// 6546 /// Emit a team of size one for directives such as 'target parallel' that 6547 /// have no associated teams construct. 6548 /// 6549 /// Otherwise, return nullptr. 6550 static llvm::Value * 6551 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6552 const OMPExecutableDirective &D) { 6553 assert(!CGF.getLangOpts().OpenMPIsDevice && 6554 "Clauses associated with the teams directive expected to be emitted " 6555 "only for the host!"); 6556 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6557 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6558 "Expected target-based executable directive."); 6559 CGBuilderTy &Bld = CGF.Builder; 6560 switch (DirectiveKind) { 6561 case OMPD_target: { 6562 const auto *CS = D.getInnermostCapturedStmt(); 6563 const auto *Body = 6564 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6565 const Stmt *ChildStmt = 6566 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6567 if (const auto *NestedDir = 6568 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6569 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6570 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6571 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6572 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6573 const Expr *NumTeams = 6574 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6575 llvm::Value *NumTeamsVal = 6576 CGF.EmitScalarExpr(NumTeams, 6577 /*IgnoreResultAssign*/ true); 6578 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6579 /*isSigned=*/true); 6580 } 6581 return Bld.getInt32(0); 6582 } 6583 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6584 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6585 return Bld.getInt32(1); 6586 return Bld.getInt32(0); 6587 } 6588 return nullptr; 6589 } 6590 case OMPD_target_teams: 6591 case OMPD_target_teams_distribute: 6592 case OMPD_target_teams_distribute_simd: 6593 case OMPD_target_teams_distribute_parallel_for: 6594 case OMPD_target_teams_distribute_parallel_for_simd: { 6595 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6596 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6597 const Expr *NumTeams = 6598 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6599 llvm::Value *NumTeamsVal = 6600 CGF.EmitScalarExpr(NumTeams, 6601 /*IgnoreResultAssign*/ true); 6602 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6603 /*isSigned=*/true); 6604 } 6605 return Bld.getInt32(0); 6606 } 6607 case OMPD_target_parallel: 6608 case OMPD_target_parallel_for: 6609 case OMPD_target_parallel_for_simd: 6610 case OMPD_target_simd: 6611 return Bld.getInt32(1); 6612 case OMPD_parallel: 6613 case OMPD_for: 6614 case OMPD_parallel_for: 6615 case OMPD_parallel_master: 6616 case OMPD_parallel_sections: 6617 case OMPD_for_simd: 6618 case OMPD_parallel_for_simd: 6619 case OMPD_cancel: 6620 case OMPD_cancellation_point: 6621 case OMPD_ordered: 6622 case OMPD_threadprivate: 6623 case OMPD_allocate: 6624 case OMPD_task: 6625 case OMPD_simd: 6626 case OMPD_sections: 6627 case OMPD_section: 6628 case OMPD_single: 6629 case OMPD_master: 6630 case OMPD_critical: 6631 case OMPD_taskyield: 6632 case OMPD_barrier: 6633 case OMPD_taskwait: 6634 case OMPD_taskgroup: 6635 case OMPD_atomic: 6636 case OMPD_flush: 6637 case OMPD_depobj: 6638 case OMPD_scan: 6639 case OMPD_teams: 6640 case OMPD_target_data: 6641 case OMPD_target_exit_data: 6642 case OMPD_target_enter_data: 6643 case OMPD_distribute: 6644 case OMPD_distribute_simd: 6645 case OMPD_distribute_parallel_for: 6646 case OMPD_distribute_parallel_for_simd: 6647 case OMPD_teams_distribute: 6648 case OMPD_teams_distribute_simd: 6649 case OMPD_teams_distribute_parallel_for: 6650 case OMPD_teams_distribute_parallel_for_simd: 6651 case OMPD_target_update: 6652 case OMPD_declare_simd: 6653 case OMPD_declare_variant: 6654 case OMPD_begin_declare_variant: 6655 case OMPD_end_declare_variant: 6656 case OMPD_declare_target: 6657 case OMPD_end_declare_target: 6658 case OMPD_declare_reduction: 6659 case OMPD_declare_mapper: 6660 case OMPD_taskloop: 6661 case OMPD_taskloop_simd: 6662 case OMPD_master_taskloop: 6663 case OMPD_master_taskloop_simd: 6664 case OMPD_parallel_master_taskloop: 6665 case OMPD_parallel_master_taskloop_simd: 6666 case OMPD_requires: 6667 case OMPD_unknown: 6668 break; 6669 default: 6670 break; 6671 } 6672 llvm_unreachable("Unexpected directive kind."); 6673 } 6674 6675 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6676 llvm::Value *DefaultThreadLimitVal) { 6677 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6678 CGF.getContext(), CS->getCapturedStmt()); 6679 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6680 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6681 llvm::Value *NumThreads = nullptr; 6682 llvm::Value *CondVal = nullptr; 6683 // Handle if clause. If if clause present, the number of threads is 6684 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6685 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6686 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6687 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6688 const OMPIfClause *IfClause = nullptr; 6689 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6690 if (C->getNameModifier() == OMPD_unknown || 6691 C->getNameModifier() == OMPD_parallel) { 6692 IfClause = C; 6693 break; 6694 } 6695 } 6696 if (IfClause) { 6697 const Expr *Cond = IfClause->getCondition(); 6698 bool Result; 6699 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6700 if (!Result) 6701 return CGF.Builder.getInt32(1); 6702 } else { 6703 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6704 if (const auto *PreInit = 6705 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6706 for (const auto *I : PreInit->decls()) { 6707 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6708 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6709 } else { 6710 CodeGenFunction::AutoVarEmission Emission = 6711 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6712 CGF.EmitAutoVarCleanups(Emission); 6713 } 6714 } 6715 } 6716 CondVal = CGF.EvaluateExprAsBool(Cond); 6717 } 6718 } 6719 } 6720 // Check the value of num_threads clause iff if clause was not specified 6721 // or is not evaluated to false. 6722 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6723 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6724 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6725 const auto *NumThreadsClause = 6726 Dir->getSingleClause<OMPNumThreadsClause>(); 6727 CodeGenFunction::LexicalScope Scope( 6728 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6729 if (const auto *PreInit = 6730 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6731 for (const auto *I : PreInit->decls()) { 6732 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6733 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6734 } else { 6735 CodeGenFunction::AutoVarEmission Emission = 6736 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6737 CGF.EmitAutoVarCleanups(Emission); 6738 } 6739 } 6740 } 6741 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6742 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6743 /*isSigned=*/false); 6744 if (DefaultThreadLimitVal) 6745 NumThreads = CGF.Builder.CreateSelect( 6746 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6747 DefaultThreadLimitVal, NumThreads); 6748 } else { 6749 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6750 : CGF.Builder.getInt32(0); 6751 } 6752 // Process condition of the if clause. 6753 if (CondVal) { 6754 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6755 CGF.Builder.getInt32(1)); 6756 } 6757 return NumThreads; 6758 } 6759 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6760 return CGF.Builder.getInt32(1); 6761 return DefaultThreadLimitVal; 6762 } 6763 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6764 : CGF.Builder.getInt32(0); 6765 } 6766 6767 /// Emit the number of threads for a target directive. Inspect the 6768 /// thread_limit clause associated with a teams construct combined or closely 6769 /// nested with the target directive. 6770 /// 6771 /// Emit the num_threads clause for directives such as 'target parallel' that 6772 /// have no associated teams construct. 6773 /// 6774 /// Otherwise, return nullptr. 6775 static llvm::Value * 6776 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6777 const OMPExecutableDirective &D) { 6778 assert(!CGF.getLangOpts().OpenMPIsDevice && 6779 "Clauses associated with the teams directive expected to be emitted " 6780 "only for the host!"); 6781 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6782 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6783 "Expected target-based executable directive."); 6784 CGBuilderTy &Bld = CGF.Builder; 6785 llvm::Value *ThreadLimitVal = nullptr; 6786 llvm::Value *NumThreadsVal = nullptr; 6787 switch (DirectiveKind) { 6788 case OMPD_target: { 6789 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6790 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6791 return NumThreads; 6792 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6793 CGF.getContext(), CS->getCapturedStmt()); 6794 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6795 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6796 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6797 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6798 const auto *ThreadLimitClause = 6799 Dir->getSingleClause<OMPThreadLimitClause>(); 6800 CodeGenFunction::LexicalScope Scope( 6801 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6802 if (const auto *PreInit = 6803 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6804 for (const auto *I : PreInit->decls()) { 6805 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6806 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6807 } else { 6808 CodeGenFunction::AutoVarEmission Emission = 6809 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6810 CGF.EmitAutoVarCleanups(Emission); 6811 } 6812 } 6813 } 6814 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6815 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6816 ThreadLimitVal = 6817 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6818 } 6819 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6820 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6821 CS = Dir->getInnermostCapturedStmt(); 6822 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6823 CGF.getContext(), CS->getCapturedStmt()); 6824 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6825 } 6826 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6827 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6828 CS = Dir->getInnermostCapturedStmt(); 6829 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6830 return NumThreads; 6831 } 6832 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6833 return Bld.getInt32(1); 6834 } 6835 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6836 } 6837 case OMPD_target_teams: { 6838 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6839 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6840 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6841 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6842 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6843 ThreadLimitVal = 6844 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6845 } 6846 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6847 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6848 return NumThreads; 6849 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6850 CGF.getContext(), CS->getCapturedStmt()); 6851 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6852 if (Dir->getDirectiveKind() == OMPD_distribute) { 6853 CS = Dir->getInnermostCapturedStmt(); 6854 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6855 return NumThreads; 6856 } 6857 } 6858 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6859 } 6860 case OMPD_target_teams_distribute: 6861 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6862 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6863 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6864 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6865 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6866 ThreadLimitVal = 6867 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6868 } 6869 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6870 case OMPD_target_parallel: 6871 case OMPD_target_parallel_for: 6872 case OMPD_target_parallel_for_simd: 6873 case OMPD_target_teams_distribute_parallel_for: 6874 case OMPD_target_teams_distribute_parallel_for_simd: { 6875 llvm::Value *CondVal = nullptr; 6876 // Handle if clause. If if clause present, the number of threads is 6877 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6878 if (D.hasClausesOfKind<OMPIfClause>()) { 6879 const OMPIfClause *IfClause = nullptr; 6880 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6881 if (C->getNameModifier() == OMPD_unknown || 6882 C->getNameModifier() == OMPD_parallel) { 6883 IfClause = C; 6884 break; 6885 } 6886 } 6887 if (IfClause) { 6888 const Expr *Cond = IfClause->getCondition(); 6889 bool Result; 6890 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6891 if (!Result) 6892 return Bld.getInt32(1); 6893 } else { 6894 CodeGenFunction::RunCleanupsScope Scope(CGF); 6895 CondVal = CGF.EvaluateExprAsBool(Cond); 6896 } 6897 } 6898 } 6899 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6900 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6901 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6902 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6903 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6904 ThreadLimitVal = 6905 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6906 } 6907 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6908 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6909 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6910 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6911 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6912 NumThreadsVal = 6913 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6914 ThreadLimitVal = ThreadLimitVal 6915 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6916 ThreadLimitVal), 6917 NumThreadsVal, ThreadLimitVal) 6918 : NumThreadsVal; 6919 } 6920 if (!ThreadLimitVal) 6921 ThreadLimitVal = Bld.getInt32(0); 6922 if (CondVal) 6923 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6924 return ThreadLimitVal; 6925 } 6926 case OMPD_target_teams_distribute_simd: 6927 case OMPD_target_simd: 6928 return Bld.getInt32(1); 6929 case OMPD_parallel: 6930 case OMPD_for: 6931 case OMPD_parallel_for: 6932 case OMPD_parallel_master: 6933 case OMPD_parallel_sections: 6934 case OMPD_for_simd: 6935 case OMPD_parallel_for_simd: 6936 case OMPD_cancel: 6937 case OMPD_cancellation_point: 6938 case OMPD_ordered: 6939 case OMPD_threadprivate: 6940 case OMPD_allocate: 6941 case OMPD_task: 6942 case OMPD_simd: 6943 case OMPD_sections: 6944 case OMPD_section: 6945 case OMPD_single: 6946 case OMPD_master: 6947 case OMPD_critical: 6948 case OMPD_taskyield: 6949 case OMPD_barrier: 6950 case OMPD_taskwait: 6951 case OMPD_taskgroup: 6952 case OMPD_atomic: 6953 case OMPD_flush: 6954 case OMPD_depobj: 6955 case OMPD_scan: 6956 case OMPD_teams: 6957 case OMPD_target_data: 6958 case OMPD_target_exit_data: 6959 case OMPD_target_enter_data: 6960 case OMPD_distribute: 6961 case OMPD_distribute_simd: 6962 case OMPD_distribute_parallel_for: 6963 case OMPD_distribute_parallel_for_simd: 6964 case OMPD_teams_distribute: 6965 case OMPD_teams_distribute_simd: 6966 case OMPD_teams_distribute_parallel_for: 6967 case OMPD_teams_distribute_parallel_for_simd: 6968 case OMPD_target_update: 6969 case OMPD_declare_simd: 6970 case OMPD_declare_variant: 6971 case OMPD_begin_declare_variant: 6972 case OMPD_end_declare_variant: 6973 case OMPD_declare_target: 6974 case OMPD_end_declare_target: 6975 case OMPD_declare_reduction: 6976 case OMPD_declare_mapper: 6977 case OMPD_taskloop: 6978 case OMPD_taskloop_simd: 6979 case OMPD_master_taskloop: 6980 case OMPD_master_taskloop_simd: 6981 case OMPD_parallel_master_taskloop: 6982 case OMPD_parallel_master_taskloop_simd: 6983 case OMPD_requires: 6984 case OMPD_unknown: 6985 break; 6986 default: 6987 break; 6988 } 6989 llvm_unreachable("Unsupported directive kind."); 6990 } 6991 6992 namespace { 6993 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6994 6995 // Utility to handle information from clauses associated with a given 6996 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6997 // It provides a convenient interface to obtain the information and generate 6998 // code for that information. 6999 class MappableExprsHandler { 7000 public: 7001 /// Values for bit flags used to specify the mapping type for 7002 /// offloading. 7003 enum OpenMPOffloadMappingFlags : uint64_t { 7004 /// No flags 7005 OMP_MAP_NONE = 0x0, 7006 /// Allocate memory on the device and move data from host to device. 7007 OMP_MAP_TO = 0x01, 7008 /// Allocate memory on the device and move data from device to host. 7009 OMP_MAP_FROM = 0x02, 7010 /// Always perform the requested mapping action on the element, even 7011 /// if it was already mapped before. 7012 OMP_MAP_ALWAYS = 0x04, 7013 /// Delete the element from the device environment, ignoring the 7014 /// current reference count associated with the element. 7015 OMP_MAP_DELETE = 0x08, 7016 /// The element being mapped is a pointer-pointee pair; both the 7017 /// pointer and the pointee should be mapped. 7018 OMP_MAP_PTR_AND_OBJ = 0x10, 7019 /// This flags signals that the base address of an entry should be 7020 /// passed to the target kernel as an argument. 7021 OMP_MAP_TARGET_PARAM = 0x20, 7022 /// Signal that the runtime library has to return the device pointer 7023 /// in the current position for the data being mapped. Used when we have the 7024 /// use_device_ptr or use_device_addr clause. 7025 OMP_MAP_RETURN_PARAM = 0x40, 7026 /// This flag signals that the reference being passed is a pointer to 7027 /// private data. 7028 OMP_MAP_PRIVATE = 0x80, 7029 /// Pass the element to the device by value. 7030 OMP_MAP_LITERAL = 0x100, 7031 /// Implicit map 7032 OMP_MAP_IMPLICIT = 0x200, 7033 /// Close is a hint to the runtime to allocate memory close to 7034 /// the target device. 7035 OMP_MAP_CLOSE = 0x400, 7036 /// 0x800 is reserved for compatibility with XLC. 7037 /// Produce a runtime error if the data is not already allocated. 7038 OMP_MAP_PRESENT = 0x1000, 7039 /// The 16 MSBs of the flags indicate whether the entry is member of some 7040 /// struct/class. 7041 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7042 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7043 }; 7044 7045 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7046 static unsigned getFlagMemberOffset() { 7047 unsigned Offset = 0; 7048 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7049 Remain = Remain >> 1) 7050 Offset++; 7051 return Offset; 7052 } 7053 7054 /// Class that associates information with a base pointer to be passed to the 7055 /// runtime library. 7056 class BasePointerInfo { 7057 /// The base pointer. 7058 llvm::Value *Ptr = nullptr; 7059 /// The base declaration that refers to this device pointer, or null if 7060 /// there is none. 7061 const ValueDecl *DevPtrDecl = nullptr; 7062 7063 public: 7064 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7065 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7066 llvm::Value *operator*() const { return Ptr; } 7067 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7068 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7069 }; 7070 7071 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7072 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7073 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7074 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7075 7076 /// This structure contains combined information generated for mappable 7077 /// clauses, including base pointers, pointers, sizes, map types, and 7078 /// user-defined mappers. 7079 struct MapCombinedInfoTy { 7080 MapBaseValuesArrayTy BasePointers; 7081 MapValuesArrayTy Pointers; 7082 MapValuesArrayTy Sizes; 7083 MapFlagsArrayTy Types; 7084 MapMappersArrayTy Mappers; 7085 7086 /// Append arrays in \a CurInfo. 7087 void append(MapCombinedInfoTy &CurInfo) { 7088 BasePointers.append(CurInfo.BasePointers.begin(), 7089 CurInfo.BasePointers.end()); 7090 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7091 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7092 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7093 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7094 } 7095 }; 7096 7097 /// Map between a struct and the its lowest & highest elements which have been 7098 /// mapped. 7099 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7100 /// HE(FieldIndex, Pointer)} 7101 struct StructRangeInfoTy { 7102 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7103 0, Address::invalid()}; 7104 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7105 0, Address::invalid()}; 7106 Address Base = Address::invalid(); 7107 }; 7108 7109 private: 7110 /// Kind that defines how a device pointer has to be returned. 7111 struct MapInfo { 7112 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7113 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7114 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7115 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7116 bool ReturnDevicePointer = false; 7117 bool IsImplicit = false; 7118 const ValueDecl *Mapper = nullptr; 7119 bool ForDeviceAddr = false; 7120 7121 MapInfo() = default; 7122 MapInfo( 7123 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7124 OpenMPMapClauseKind MapType, 7125 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7126 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7127 bool ReturnDevicePointer, bool IsImplicit, 7128 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false) 7129 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7130 MotionModifiers(MotionModifiers), 7131 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7132 Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {} 7133 }; 7134 7135 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7136 /// member and there is no map information about it, then emission of that 7137 /// entry is deferred until the whole struct has been processed. 7138 struct DeferredDevicePtrEntryTy { 7139 const Expr *IE = nullptr; 7140 const ValueDecl *VD = nullptr; 7141 bool ForDeviceAddr = false; 7142 7143 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7144 bool ForDeviceAddr) 7145 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7146 }; 7147 7148 /// The target directive from where the mappable clauses were extracted. It 7149 /// is either a executable directive or a user-defined mapper directive. 7150 llvm::PointerUnion<const OMPExecutableDirective *, 7151 const OMPDeclareMapperDecl *> 7152 CurDir; 7153 7154 /// Function the directive is being generated for. 7155 CodeGenFunction &CGF; 7156 7157 /// Set of all first private variables in the current directive. 7158 /// bool data is set to true if the variable is implicitly marked as 7159 /// firstprivate, false otherwise. 7160 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7161 7162 /// Map between device pointer declarations and their expression components. 7163 /// The key value for declarations in 'this' is null. 7164 llvm::DenseMap< 7165 const ValueDecl *, 7166 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7167 DevPointersMap; 7168 7169 llvm::Value *getExprTypeSize(const Expr *E) const { 7170 QualType ExprTy = E->getType().getCanonicalType(); 7171 7172 // Calculate the size for array shaping expression. 7173 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7174 llvm::Value *Size = 7175 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7176 for (const Expr *SE : OAE->getDimensions()) { 7177 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7178 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7179 CGF.getContext().getSizeType(), 7180 SE->getExprLoc()); 7181 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7182 } 7183 return Size; 7184 } 7185 7186 // Reference types are ignored for mapping purposes. 7187 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7188 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7189 7190 // Given that an array section is considered a built-in type, we need to 7191 // do the calculation based on the length of the section instead of relying 7192 // on CGF.getTypeSize(E->getType()). 7193 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7194 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7195 OAE->getBase()->IgnoreParenImpCasts()) 7196 .getCanonicalType(); 7197 7198 // If there is no length associated with the expression and lower bound is 7199 // not specified too, that means we are using the whole length of the 7200 // base. 7201 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7202 !OAE->getLowerBound()) 7203 return CGF.getTypeSize(BaseTy); 7204 7205 llvm::Value *ElemSize; 7206 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7207 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7208 } else { 7209 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7210 assert(ATy && "Expecting array type if not a pointer type."); 7211 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7212 } 7213 7214 // If we don't have a length at this point, that is because we have an 7215 // array section with a single element. 7216 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7217 return ElemSize; 7218 7219 if (const Expr *LenExpr = OAE->getLength()) { 7220 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7221 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7222 CGF.getContext().getSizeType(), 7223 LenExpr->getExprLoc()); 7224 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7225 } 7226 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7227 OAE->getLowerBound() && "expected array_section[lb:]."); 7228 // Size = sizetype - lb * elemtype; 7229 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7230 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7231 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7232 CGF.getContext().getSizeType(), 7233 OAE->getLowerBound()->getExprLoc()); 7234 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7235 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7236 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7237 LengthVal = CGF.Builder.CreateSelect( 7238 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7239 return LengthVal; 7240 } 7241 return CGF.getTypeSize(ExprTy); 7242 } 7243 7244 /// Return the corresponding bits for a given map clause modifier. Add 7245 /// a flag marking the map as a pointer if requested. Add a flag marking the 7246 /// map as the first one of a series of maps that relate to the same map 7247 /// expression. 7248 OpenMPOffloadMappingFlags getMapTypeBits( 7249 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7250 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7251 bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7252 OpenMPOffloadMappingFlags Bits = 7253 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7254 switch (MapType) { 7255 case OMPC_MAP_alloc: 7256 case OMPC_MAP_release: 7257 // alloc and release is the default behavior in the runtime library, i.e. 7258 // if we don't pass any bits alloc/release that is what the runtime is 7259 // going to do. Therefore, we don't need to signal anything for these two 7260 // type modifiers. 7261 break; 7262 case OMPC_MAP_to: 7263 Bits |= OMP_MAP_TO; 7264 break; 7265 case OMPC_MAP_from: 7266 Bits |= OMP_MAP_FROM; 7267 break; 7268 case OMPC_MAP_tofrom: 7269 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7270 break; 7271 case OMPC_MAP_delete: 7272 Bits |= OMP_MAP_DELETE; 7273 break; 7274 case OMPC_MAP_unknown: 7275 llvm_unreachable("Unexpected map type!"); 7276 } 7277 if (AddPtrFlag) 7278 Bits |= OMP_MAP_PTR_AND_OBJ; 7279 if (AddIsTargetParamFlag) 7280 Bits |= OMP_MAP_TARGET_PARAM; 7281 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7282 != MapModifiers.end()) 7283 Bits |= OMP_MAP_ALWAYS; 7284 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7285 != MapModifiers.end()) 7286 Bits |= OMP_MAP_CLOSE; 7287 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7288 != MapModifiers.end()) 7289 Bits |= OMP_MAP_PRESENT; 7290 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7291 != MotionModifiers.end()) 7292 Bits |= OMP_MAP_PRESENT; 7293 return Bits; 7294 } 7295 7296 /// Return true if the provided expression is a final array section. A 7297 /// final array section, is one whose length can't be proved to be one. 7298 bool isFinalArraySectionExpression(const Expr *E) const { 7299 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7300 7301 // It is not an array section and therefore not a unity-size one. 7302 if (!OASE) 7303 return false; 7304 7305 // An array section with no colon always refer to a single element. 7306 if (OASE->getColonLocFirst().isInvalid()) 7307 return false; 7308 7309 const Expr *Length = OASE->getLength(); 7310 7311 // If we don't have a length we have to check if the array has size 1 7312 // for this dimension. Also, we should always expect a length if the 7313 // base type is pointer. 7314 if (!Length) { 7315 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7316 OASE->getBase()->IgnoreParenImpCasts()) 7317 .getCanonicalType(); 7318 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7319 return ATy->getSize().getSExtValue() != 1; 7320 // If we don't have a constant dimension length, we have to consider 7321 // the current section as having any size, so it is not necessarily 7322 // unitary. If it happen to be unity size, that's user fault. 7323 return true; 7324 } 7325 7326 // Check if the length evaluates to 1. 7327 Expr::EvalResult Result; 7328 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7329 return true; // Can have more that size 1. 7330 7331 llvm::APSInt ConstLength = Result.Val.getInt(); 7332 return ConstLength.getSExtValue() != 1; 7333 } 7334 7335 /// Generate the base pointers, section pointers, sizes, map type bits, and 7336 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7337 /// map type, map or motion modifiers, and expression components. 7338 /// \a IsFirstComponent should be set to true if the provided set of 7339 /// components is the first associated with a capture. 7340 void generateInfoForComponentList( 7341 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7342 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7343 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7344 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7345 bool IsFirstComponentList, bool IsImplicit, 7346 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7347 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7348 OverlappedElements = llvm::None) const { 7349 // The following summarizes what has to be generated for each map and the 7350 // types below. The generated information is expressed in this order: 7351 // base pointer, section pointer, size, flags 7352 // (to add to the ones that come from the map type and modifier). 7353 // 7354 // double d; 7355 // int i[100]; 7356 // float *p; 7357 // 7358 // struct S1 { 7359 // int i; 7360 // float f[50]; 7361 // } 7362 // struct S2 { 7363 // int i; 7364 // float f[50]; 7365 // S1 s; 7366 // double *p; 7367 // struct S2 *ps; 7368 // } 7369 // S2 s; 7370 // S2 *ps; 7371 // 7372 // map(d) 7373 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7374 // 7375 // map(i) 7376 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7377 // 7378 // map(i[1:23]) 7379 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7380 // 7381 // map(p) 7382 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7383 // 7384 // map(p[1:24]) 7385 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7386 // in unified shared memory mode or for local pointers 7387 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7388 // 7389 // map(s) 7390 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7391 // 7392 // map(s.i) 7393 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7394 // 7395 // map(s.s.f) 7396 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7397 // 7398 // map(s.p) 7399 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7400 // 7401 // map(to: s.p[:22]) 7402 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7403 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7404 // &(s.p), &(s.p[0]), 22*sizeof(double), 7405 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7406 // (*) alloc space for struct members, only this is a target parameter 7407 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7408 // optimizes this entry out, same in the examples below) 7409 // (***) map the pointee (map: to) 7410 // 7411 // map(s.ps) 7412 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7413 // 7414 // map(from: s.ps->s.i) 7415 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7416 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7417 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7418 // 7419 // map(to: s.ps->ps) 7420 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7421 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7422 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7423 // 7424 // map(s.ps->ps->ps) 7425 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7426 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7427 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7428 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7429 // 7430 // map(to: s.ps->ps->s.f[:22]) 7431 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7432 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7433 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7434 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7435 // 7436 // map(ps) 7437 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7438 // 7439 // map(ps->i) 7440 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7441 // 7442 // map(ps->s.f) 7443 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7444 // 7445 // map(from: ps->p) 7446 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7447 // 7448 // map(to: ps->p[:22]) 7449 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7450 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7451 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7452 // 7453 // map(ps->ps) 7454 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7455 // 7456 // map(from: ps->ps->s.i) 7457 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7458 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7459 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7460 // 7461 // map(from: ps->ps->ps) 7462 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7463 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7464 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7465 // 7466 // map(ps->ps->ps->ps) 7467 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7468 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7469 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7470 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7471 // 7472 // map(to: ps->ps->ps->s.f[:22]) 7473 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7474 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7475 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7476 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7477 // 7478 // map(to: s.f[:22]) map(from: s.p[:33]) 7479 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7480 // sizeof(double*) (**), TARGET_PARAM 7481 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7482 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7483 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7484 // (*) allocate contiguous space needed to fit all mapped members even if 7485 // we allocate space for members not mapped (in this example, 7486 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7487 // them as well because they fall between &s.f[0] and &s.p) 7488 // 7489 // map(from: s.f[:22]) map(to: ps->p[:33]) 7490 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7491 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7492 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7493 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7494 // (*) the struct this entry pertains to is the 2nd element in the list of 7495 // arguments, hence MEMBER_OF(2) 7496 // 7497 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7498 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7499 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7500 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7501 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7502 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7503 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7504 // (*) the struct this entry pertains to is the 4th element in the list 7505 // of arguments, hence MEMBER_OF(4) 7506 7507 // Track if the map information being generated is the first for a capture. 7508 bool IsCaptureFirstInfo = IsFirstComponentList; 7509 // When the variable is on a declare target link or in a to clause with 7510 // unified memory, a reference is needed to hold the host/device address 7511 // of the variable. 7512 bool RequiresReference = false; 7513 7514 // Scan the components from the base to the complete expression. 7515 auto CI = Components.rbegin(); 7516 auto CE = Components.rend(); 7517 auto I = CI; 7518 7519 // Track if the map information being generated is the first for a list of 7520 // components. 7521 bool IsExpressionFirstInfo = true; 7522 bool FirstPointerInComplexData = false; 7523 Address BP = Address::invalid(); 7524 const Expr *AssocExpr = I->getAssociatedExpression(); 7525 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7526 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7527 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7528 7529 if (isa<MemberExpr>(AssocExpr)) { 7530 // The base is the 'this' pointer. The content of the pointer is going 7531 // to be the base of the field being mapped. 7532 BP = CGF.LoadCXXThisAddress(); 7533 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7534 (OASE && 7535 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7536 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7537 } else if (OAShE && 7538 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7539 BP = Address( 7540 CGF.EmitScalarExpr(OAShE->getBase()), 7541 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7542 } else { 7543 // The base is the reference to the variable. 7544 // BP = &Var. 7545 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7546 if (const auto *VD = 7547 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7548 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7549 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7550 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7551 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7552 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7553 RequiresReference = true; 7554 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7555 } 7556 } 7557 } 7558 7559 // If the variable is a pointer and is being dereferenced (i.e. is not 7560 // the last component), the base has to be the pointer itself, not its 7561 // reference. References are ignored for mapping purposes. 7562 QualType Ty = 7563 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7564 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7565 // No need to generate individual map information for the pointer, it 7566 // can be associated with the combined storage if shared memory mode is 7567 // active or the base declaration is not global variable. 7568 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7569 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7570 !VD || VD->hasLocalStorage()) 7571 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7572 else 7573 FirstPointerInComplexData = true; 7574 ++I; 7575 } 7576 } 7577 7578 // Track whether a component of the list should be marked as MEMBER_OF some 7579 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7580 // in a component list should be marked as MEMBER_OF, all subsequent entries 7581 // do not belong to the base struct. E.g. 7582 // struct S2 s; 7583 // s.ps->ps->ps->f[:] 7584 // (1) (2) (3) (4) 7585 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7586 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7587 // is the pointee of ps(2) which is not member of struct s, so it should not 7588 // be marked as such (it is still PTR_AND_OBJ). 7589 // The variable is initialized to false so that PTR_AND_OBJ entries which 7590 // are not struct members are not considered (e.g. array of pointers to 7591 // data). 7592 bool ShouldBeMemberOf = false; 7593 7594 // Variable keeping track of whether or not we have encountered a component 7595 // in the component list which is a member expression. Useful when we have a 7596 // pointer or a final array section, in which case it is the previous 7597 // component in the list which tells us whether we have a member expression. 7598 // E.g. X.f[:] 7599 // While processing the final array section "[:]" it is "f" which tells us 7600 // whether we are dealing with a member of a declared struct. 7601 const MemberExpr *EncounteredME = nullptr; 7602 7603 for (; I != CE; ++I) { 7604 // If the current component is member of a struct (parent struct) mark it. 7605 if (!EncounteredME) { 7606 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7607 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7608 // as MEMBER_OF the parent struct. 7609 if (EncounteredME) { 7610 ShouldBeMemberOf = true; 7611 // Do not emit as complex pointer if this is actually not array-like 7612 // expression. 7613 if (FirstPointerInComplexData) { 7614 QualType Ty = std::prev(I) 7615 ->getAssociatedDeclaration() 7616 ->getType() 7617 .getNonReferenceType(); 7618 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7619 FirstPointerInComplexData = false; 7620 } 7621 } 7622 } 7623 7624 auto Next = std::next(I); 7625 7626 // We need to generate the addresses and sizes if this is the last 7627 // component, if the component is a pointer or if it is an array section 7628 // whose length can't be proved to be one. If this is a pointer, it 7629 // becomes the base address for the following components. 7630 7631 // A final array section, is one whose length can't be proved to be one. 7632 bool IsFinalArraySection = 7633 isFinalArraySectionExpression(I->getAssociatedExpression()); 7634 7635 // Get information on whether the element is a pointer. Have to do a 7636 // special treatment for array sections given that they are built-in 7637 // types. 7638 const auto *OASE = 7639 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7640 const auto *OAShE = 7641 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7642 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7643 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7644 bool IsPointer = 7645 OAShE || 7646 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7647 .getCanonicalType() 7648 ->isAnyPointerType()) || 7649 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7650 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7651 7652 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7653 // If this is not the last component, we expect the pointer to be 7654 // associated with an array expression or member expression. 7655 assert((Next == CE || 7656 isa<MemberExpr>(Next->getAssociatedExpression()) || 7657 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7658 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7659 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7660 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7661 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7662 "Unexpected expression"); 7663 7664 Address LB = Address::invalid(); 7665 if (OAShE) { 7666 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7667 CGF.getContext().getTypeAlignInChars( 7668 OAShE->getBase()->getType())); 7669 } else { 7670 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7671 .getAddress(CGF); 7672 } 7673 7674 // If this component is a pointer inside the base struct then we don't 7675 // need to create any entry for it - it will be combined with the object 7676 // it is pointing to into a single PTR_AND_OBJ entry. 7677 bool IsMemberPointerOrAddr = 7678 (IsPointer || ForDeviceAddr) && EncounteredME && 7679 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7680 EncounteredME); 7681 if (!OverlappedElements.empty()) { 7682 // Handle base element with the info for overlapped elements. 7683 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7684 assert(Next == CE && 7685 "Expected last element for the overlapped elements."); 7686 assert(!IsPointer && 7687 "Unexpected base element with the pointer type."); 7688 // Mark the whole struct as the struct that requires allocation on the 7689 // device. 7690 PartialStruct.LowestElem = {0, LB}; 7691 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7692 I->getAssociatedExpression()->getType()); 7693 Address HB = CGF.Builder.CreateConstGEP( 7694 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7695 CGF.VoidPtrTy), 7696 TypeSize.getQuantity() - 1); 7697 PartialStruct.HighestElem = { 7698 std::numeric_limits<decltype( 7699 PartialStruct.HighestElem.first)>::max(), 7700 HB}; 7701 PartialStruct.Base = BP; 7702 // Emit data for non-overlapped data. 7703 OpenMPOffloadMappingFlags Flags = 7704 OMP_MAP_MEMBER_OF | 7705 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7706 /*AddPtrFlag=*/false, 7707 /*AddIsTargetParamFlag=*/false); 7708 LB = BP; 7709 llvm::Value *Size = nullptr; 7710 // Do bitcopy of all non-overlapped structure elements. 7711 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7712 Component : OverlappedElements) { 7713 Address ComponentLB = Address::invalid(); 7714 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7715 Component) { 7716 if (MC.getAssociatedDeclaration()) { 7717 ComponentLB = 7718 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7719 .getAddress(CGF); 7720 Size = CGF.Builder.CreatePtrDiff( 7721 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7722 CGF.EmitCastToVoidPtr(LB.getPointer())); 7723 break; 7724 } 7725 } 7726 assert(Size && "Failed to determine structure size"); 7727 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7728 CombinedInfo.Pointers.push_back(LB.getPointer()); 7729 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7730 Size, CGF.Int64Ty, /*isSigned=*/true)); 7731 CombinedInfo.Types.push_back(Flags); 7732 CombinedInfo.Mappers.push_back(nullptr); 7733 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7734 } 7735 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7736 CombinedInfo.Pointers.push_back(LB.getPointer()); 7737 Size = CGF.Builder.CreatePtrDiff( 7738 CGF.EmitCastToVoidPtr( 7739 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7740 CGF.EmitCastToVoidPtr(LB.getPointer())); 7741 CombinedInfo.Sizes.push_back( 7742 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7743 CombinedInfo.Types.push_back(Flags); 7744 CombinedInfo.Mappers.push_back(nullptr); 7745 break; 7746 } 7747 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7748 if (!IsMemberPointerOrAddr) { 7749 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7750 CombinedInfo.Pointers.push_back(LB.getPointer()); 7751 CombinedInfo.Sizes.push_back( 7752 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7753 7754 // If Mapper is valid, the last component inherits the mapper. 7755 bool HasMapper = Mapper && Next == CE; 7756 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7757 7758 // We need to add a pointer flag for each map that comes from the 7759 // same expression except for the first one. We also need to signal 7760 // this map is the first one that relates with the current capture 7761 // (there is a set of entries for each capture). 7762 OpenMPOffloadMappingFlags Flags = 7763 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7764 !IsExpressionFirstInfo || RequiresReference || 7765 FirstPointerInComplexData, 7766 IsCaptureFirstInfo && !RequiresReference); 7767 7768 if (!IsExpressionFirstInfo) { 7769 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7770 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7771 if (IsPointer) 7772 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7773 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7774 7775 if (ShouldBeMemberOf) { 7776 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7777 // should be later updated with the correct value of MEMBER_OF. 7778 Flags |= OMP_MAP_MEMBER_OF; 7779 // From now on, all subsequent PTR_AND_OBJ entries should not be 7780 // marked as MEMBER_OF. 7781 ShouldBeMemberOf = false; 7782 } 7783 } 7784 7785 CombinedInfo.Types.push_back(Flags); 7786 } 7787 7788 // If we have encountered a member expression so far, keep track of the 7789 // mapped member. If the parent is "*this", then the value declaration 7790 // is nullptr. 7791 if (EncounteredME) { 7792 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7793 unsigned FieldIndex = FD->getFieldIndex(); 7794 7795 // Update info about the lowest and highest elements for this struct 7796 if (!PartialStruct.Base.isValid()) { 7797 PartialStruct.LowestElem = {FieldIndex, LB}; 7798 if (IsFinalArraySection) { 7799 Address HB = 7800 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7801 .getAddress(CGF); 7802 PartialStruct.HighestElem = {FieldIndex, HB}; 7803 } else { 7804 PartialStruct.HighestElem = {FieldIndex, LB}; 7805 } 7806 PartialStruct.Base = BP; 7807 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7808 PartialStruct.LowestElem = {FieldIndex, LB}; 7809 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7810 PartialStruct.HighestElem = {FieldIndex, LB}; 7811 } 7812 } 7813 7814 // If we have a final array section, we are done with this expression. 7815 if (IsFinalArraySection) 7816 break; 7817 7818 // The pointer becomes the base for the next element. 7819 if (Next != CE) 7820 BP = LB; 7821 7822 IsExpressionFirstInfo = false; 7823 IsCaptureFirstInfo = false; 7824 FirstPointerInComplexData = false; 7825 } 7826 } 7827 } 7828 7829 /// Return the adjusted map modifiers if the declaration a capture refers to 7830 /// appears in a first-private clause. This is expected to be used only with 7831 /// directives that start with 'target'. 7832 MappableExprsHandler::OpenMPOffloadMappingFlags 7833 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7834 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7835 7836 // A first private variable captured by reference will use only the 7837 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7838 // declaration is known as first-private in this handler. 7839 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7840 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7841 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7842 return MappableExprsHandler::OMP_MAP_ALWAYS | 7843 MappableExprsHandler::OMP_MAP_TO; 7844 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7845 return MappableExprsHandler::OMP_MAP_TO | 7846 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7847 return MappableExprsHandler::OMP_MAP_PRIVATE | 7848 MappableExprsHandler::OMP_MAP_TO; 7849 } 7850 return MappableExprsHandler::OMP_MAP_TO | 7851 MappableExprsHandler::OMP_MAP_FROM; 7852 } 7853 7854 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7855 // Rotate by getFlagMemberOffset() bits. 7856 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7857 << getFlagMemberOffset()); 7858 } 7859 7860 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7861 OpenMPOffloadMappingFlags MemberOfFlag) { 7862 // If the entry is PTR_AND_OBJ but has not been marked with the special 7863 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7864 // marked as MEMBER_OF. 7865 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7866 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7867 return; 7868 7869 // Reset the placeholder value to prepare the flag for the assignment of the 7870 // proper MEMBER_OF value. 7871 Flags &= ~OMP_MAP_MEMBER_OF; 7872 Flags |= MemberOfFlag; 7873 } 7874 7875 void getPlainLayout(const CXXRecordDecl *RD, 7876 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7877 bool AsBase) const { 7878 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7879 7880 llvm::StructType *St = 7881 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7882 7883 unsigned NumElements = St->getNumElements(); 7884 llvm::SmallVector< 7885 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7886 RecordLayout(NumElements); 7887 7888 // Fill bases. 7889 for (const auto &I : RD->bases()) { 7890 if (I.isVirtual()) 7891 continue; 7892 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7893 // Ignore empty bases. 7894 if (Base->isEmpty() || CGF.getContext() 7895 .getASTRecordLayout(Base) 7896 .getNonVirtualSize() 7897 .isZero()) 7898 continue; 7899 7900 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7901 RecordLayout[FieldIndex] = Base; 7902 } 7903 // Fill in virtual bases. 7904 for (const auto &I : RD->vbases()) { 7905 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7906 // Ignore empty bases. 7907 if (Base->isEmpty()) 7908 continue; 7909 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7910 if (RecordLayout[FieldIndex]) 7911 continue; 7912 RecordLayout[FieldIndex] = Base; 7913 } 7914 // Fill in all the fields. 7915 assert(!RD->isUnion() && "Unexpected union."); 7916 for (const auto *Field : RD->fields()) { 7917 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7918 // will fill in later.) 7919 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7920 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7921 RecordLayout[FieldIndex] = Field; 7922 } 7923 } 7924 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7925 &Data : RecordLayout) { 7926 if (Data.isNull()) 7927 continue; 7928 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7929 getPlainLayout(Base, Layout, /*AsBase=*/true); 7930 else 7931 Layout.push_back(Data.get<const FieldDecl *>()); 7932 } 7933 } 7934 7935 public: 7936 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7937 : CurDir(&Dir), CGF(CGF) { 7938 // Extract firstprivate clause information. 7939 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7940 for (const auto *D : C->varlists()) 7941 FirstPrivateDecls.try_emplace( 7942 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7943 // Extract implicit firstprivates from uses_allocators clauses. 7944 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7945 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7946 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7947 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7948 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7949 /*Implicit=*/true); 7950 else if (const auto *VD = dyn_cast<VarDecl>( 7951 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7952 ->getDecl())) 7953 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7954 } 7955 } 7956 // Extract device pointer clause information. 7957 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7958 for (auto L : C->component_lists()) 7959 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 7960 } 7961 7962 /// Constructor for the declare mapper directive. 7963 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7964 : CurDir(&Dir), CGF(CGF) {} 7965 7966 /// Generate code for the combined entry if we have a partially mapped struct 7967 /// and take care of the mapping flags of the arguments corresponding to 7968 /// individual struct members. 7969 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 7970 MapFlagsArrayTy &CurTypes, 7971 const StructRangeInfoTy &PartialStruct, 7972 bool NotTargetParams = false) const { 7973 // Base is the base of the struct 7974 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 7975 // Pointer is the address of the lowest element 7976 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7977 CombinedInfo.Pointers.push_back(LB); 7978 // There should not be a mapper for a combined entry. 7979 CombinedInfo.Mappers.push_back(nullptr); 7980 // Size is (addr of {highest+1} element) - (addr of lowest element) 7981 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7982 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7983 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7984 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7985 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7986 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7987 /*isSigned=*/false); 7988 CombinedInfo.Sizes.push_back(Size); 7989 // Map type is always TARGET_PARAM, if generate info for captures. 7990 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 7991 : OMP_MAP_TARGET_PARAM); 7992 // If any element has the present modifier, then make sure the runtime 7993 // doesn't attempt to allocate the struct. 7994 if (CurTypes.end() != 7995 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 7996 return Type & OMP_MAP_PRESENT; 7997 })) 7998 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 7999 // Remove TARGET_PARAM flag from the first element 8000 CurTypes.front() &= ~OMP_MAP_TARGET_PARAM; 8001 8002 // All other current entries will be MEMBER_OF the combined entry 8003 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8004 // 0xFFFF in the MEMBER_OF field). 8005 OpenMPOffloadMappingFlags MemberOfFlag = 8006 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8007 for (auto &M : CurTypes) 8008 setCorrectMemberOfFlag(M, MemberOfFlag); 8009 } 8010 8011 /// Generate all the base pointers, section pointers, sizes, map types, and 8012 /// mappers for the extracted mappable expressions (all included in \a 8013 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8014 /// pair of the relevant declaration and index where it occurs is appended to 8015 /// the device pointers info array. 8016 void generateAllInfo( 8017 MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false, 8018 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8019 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8020 // We have to process the component lists that relate with the same 8021 // declaration in a single chunk so that we can generate the map flags 8022 // correctly. Therefore, we organize all lists in a map. 8023 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8024 8025 // Helper function to fill the information map for the different supported 8026 // clauses. 8027 auto &&InfoGen = 8028 [&Info, &SkipVarSet]( 8029 const ValueDecl *D, 8030 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8031 OpenMPMapClauseKind MapType, 8032 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8033 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8034 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8035 bool ForDeviceAddr = false) { 8036 const ValueDecl *VD = 8037 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8038 if (SkipVarSet.count(VD)) 8039 return; 8040 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 8041 ReturnDevicePointer, IsImplicit, Mapper, 8042 ForDeviceAddr); 8043 }; 8044 8045 assert(CurDir.is<const OMPExecutableDirective *>() && 8046 "Expect a executable directive"); 8047 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8048 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8049 for (const auto L : C->component_lists()) { 8050 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8051 C->getMapTypeModifiers(), llvm::None, 8052 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); 8053 } 8054 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8055 for (const auto L : C->component_lists()) { 8056 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8057 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8058 C->isImplicit(), std::get<2>(L)); 8059 } 8060 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8061 for (const auto L : C->component_lists()) { 8062 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8063 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8064 C->isImplicit(), std::get<2>(L)); 8065 } 8066 8067 // Look at the use_device_ptr clause information and mark the existing map 8068 // entries as such. If there is no map information for an entry in the 8069 // use_device_ptr list, we create one with map type 'alloc' and zero size 8070 // section. It is the user fault if that was not mapped before. If there is 8071 // no map information and the pointer is a struct member, then we defer the 8072 // emission of that entry until the whole struct has been processed. 8073 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8074 DeferredInfo; 8075 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8076 8077 for (const auto *C : 8078 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8079 for (const auto L : C->component_lists()) { 8080 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8081 std::get<1>(L); 8082 assert(!Components.empty() && 8083 "Not expecting empty list of components!"); 8084 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8085 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8086 const Expr *IE = Components.back().getAssociatedExpression(); 8087 // If the first component is a member expression, we have to look into 8088 // 'this', which maps to null in the map of map information. Otherwise 8089 // look directly for the information. 8090 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8091 8092 // We potentially have map information for this declaration already. 8093 // Look for the first set of components that refer to it. 8094 if (It != Info.end()) { 8095 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8096 return MI.Components.back().getAssociatedDeclaration() == VD; 8097 }); 8098 // If we found a map entry, signal that the pointer has to be returned 8099 // and move on to the next declaration. 8100 // Exclude cases where the base pointer is mapped as array subscript, 8101 // array section or array shaping. The base address is passed as a 8102 // pointer to base in this case and cannot be used as a base for 8103 // use_device_ptr list item. 8104 if (CI != It->second.end()) { 8105 auto PrevCI = std::next(CI->Components.rbegin()); 8106 const auto *VarD = dyn_cast<VarDecl>(VD); 8107 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8108 isa<MemberExpr>(IE) || 8109 !VD->getType().getNonReferenceType()->isPointerType() || 8110 PrevCI == CI->Components.rend() || 8111 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8112 VarD->hasLocalStorage()) { 8113 CI->ReturnDevicePointer = true; 8114 continue; 8115 } 8116 } 8117 } 8118 8119 // We didn't find any match in our map information - generate a zero 8120 // size array section - if the pointer is a struct member we defer this 8121 // action until the whole struct has been processed. 8122 if (isa<MemberExpr>(IE)) { 8123 // Insert the pointer into Info to be processed by 8124 // generateInfoForComponentList. Because it is a member pointer 8125 // without a pointee, no entry will be generated for it, therefore 8126 // we need to generate one after the whole struct has been processed. 8127 // Nonetheless, generateInfoForComponentList must be called to take 8128 // the pointer into account for the calculation of the range of the 8129 // partial struct. 8130 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8131 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8132 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8133 } else { 8134 llvm::Value *Ptr = 8135 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8136 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8137 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8138 UseDevicePtrCombinedInfo.Sizes.push_back( 8139 llvm::Constant::getNullValue(CGF.Int64Ty)); 8140 UseDevicePtrCombinedInfo.Types.push_back( 8141 OMP_MAP_RETURN_PARAM | 8142 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8143 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8144 } 8145 } 8146 } 8147 8148 // Look at the use_device_addr clause information and mark the existing map 8149 // entries as such. If there is no map information for an entry in the 8150 // use_device_addr list, we create one with map type 'alloc' and zero size 8151 // section. It is the user fault if that was not mapped before. If there is 8152 // no map information and the pointer is a struct member, then we defer the 8153 // emission of that entry until the whole struct has been processed. 8154 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8155 for (const auto *C : 8156 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8157 for (const auto L : C->component_lists()) { 8158 assert(!std::get<1>(L).empty() && 8159 "Not expecting empty list of components!"); 8160 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8161 if (!Processed.insert(VD).second) 8162 continue; 8163 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8164 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8165 // If the first component is a member expression, we have to look into 8166 // 'this', which maps to null in the map of map information. Otherwise 8167 // look directly for the information. 8168 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8169 8170 // We potentially have map information for this declaration already. 8171 // Look for the first set of components that refer to it. 8172 if (It != Info.end()) { 8173 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8174 return MI.Components.back().getAssociatedDeclaration() == VD; 8175 }); 8176 // If we found a map entry, signal that the pointer has to be returned 8177 // and move on to the next declaration. 8178 if (CI != It->second.end()) { 8179 CI->ReturnDevicePointer = true; 8180 continue; 8181 } 8182 } 8183 8184 // We didn't find any match in our map information - generate a zero 8185 // size array section - if the pointer is a struct member we defer this 8186 // action until the whole struct has been processed. 8187 if (isa<MemberExpr>(IE)) { 8188 // Insert the pointer into Info to be processed by 8189 // generateInfoForComponentList. Because it is a member pointer 8190 // without a pointee, no entry will be generated for it, therefore 8191 // we need to generate one after the whole struct has been processed. 8192 // Nonetheless, generateInfoForComponentList must be called to take 8193 // the pointer into account for the calculation of the range of the 8194 // partial struct. 8195 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8196 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8197 nullptr, /*ForDeviceAddr=*/true); 8198 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8199 } else { 8200 llvm::Value *Ptr; 8201 if (IE->isGLValue()) 8202 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8203 else 8204 Ptr = CGF.EmitScalarExpr(IE); 8205 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8206 CombinedInfo.Pointers.push_back(Ptr); 8207 CombinedInfo.Sizes.push_back( 8208 llvm::Constant::getNullValue(CGF.Int64Ty)); 8209 CombinedInfo.Types.push_back( 8210 OMP_MAP_RETURN_PARAM | 8211 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8212 CombinedInfo.Mappers.push_back(nullptr); 8213 } 8214 } 8215 } 8216 8217 for (const auto &M : Info) { 8218 // We need to know when we generate information for the first component 8219 // associated with a capture, because the mapping flags depend on it. 8220 bool IsFirstComponentList = !NotTargetParams; 8221 8222 // Temporary generated information. 8223 MapCombinedInfoTy CurInfo; 8224 StructRangeInfoTy PartialStruct; 8225 8226 for (const MapInfo &L : M.second) { 8227 assert(!L.Components.empty() && 8228 "Not expecting declaration with no component lists."); 8229 8230 // Remember the current base pointer index. 8231 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8232 generateInfoForComponentList(L.MapType, L.MapModifiers, 8233 L.MotionModifiers, L.Components, CurInfo, 8234 PartialStruct, IsFirstComponentList, 8235 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8236 8237 // If this entry relates with a device pointer, set the relevant 8238 // declaration and add the 'return pointer' flag. 8239 if (L.ReturnDevicePointer) { 8240 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8241 "Unexpected number of mapped base pointers."); 8242 8243 const ValueDecl *RelevantVD = 8244 L.Components.back().getAssociatedDeclaration(); 8245 assert(RelevantVD && 8246 "No relevant declaration related with device pointer??"); 8247 8248 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8249 RelevantVD); 8250 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8251 } 8252 IsFirstComponentList = false; 8253 } 8254 8255 // Append any pending zero-length pointers which are struct members and 8256 // used with use_device_ptr or use_device_addr. 8257 auto CI = DeferredInfo.find(M.first); 8258 if (CI != DeferredInfo.end()) { 8259 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8260 llvm::Value *BasePtr; 8261 llvm::Value *Ptr; 8262 if (L.ForDeviceAddr) { 8263 if (L.IE->isGLValue()) 8264 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8265 else 8266 Ptr = this->CGF.EmitScalarExpr(L.IE); 8267 BasePtr = Ptr; 8268 // Entry is RETURN_PARAM. Also, set the placeholder value 8269 // MEMBER_OF=FFFF so that the entry is later updated with the 8270 // correct value of MEMBER_OF. 8271 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8272 } else { 8273 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8274 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8275 L.IE->getExprLoc()); 8276 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8277 // value MEMBER_OF=FFFF so that the entry is later updated with the 8278 // correct value of MEMBER_OF. 8279 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8280 OMP_MAP_MEMBER_OF); 8281 } 8282 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8283 CurInfo.Pointers.push_back(Ptr); 8284 CurInfo.Sizes.push_back( 8285 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8286 CurInfo.Mappers.push_back(nullptr); 8287 } 8288 } 8289 8290 // If there is an entry in PartialStruct it means we have a struct with 8291 // individual members mapped. Emit an extra combined entry. 8292 if (PartialStruct.Base.isValid()) 8293 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, 8294 NotTargetParams); 8295 8296 // We need to append the results of this capture to what we already have. 8297 CombinedInfo.append(CurInfo); 8298 } 8299 // Append data for use_device_ptr clauses. 8300 CombinedInfo.append(UseDevicePtrCombinedInfo); 8301 } 8302 8303 /// Generate all the base pointers, section pointers, sizes, map types, and 8304 /// mappers for the extracted map clauses of user-defined mapper (all included 8305 /// in \a CombinedInfo). 8306 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8307 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8308 "Expect a declare mapper directive"); 8309 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8310 // We have to process the component lists that relate with the same 8311 // declaration in a single chunk so that we can generate the map flags 8312 // correctly. Therefore, we organize all lists in a map. 8313 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8314 8315 // Fill the information map for map clauses. 8316 for (const auto *C : CurMapperDir->clauselists()) { 8317 const auto *MC = cast<OMPMapClause>(C); 8318 for (const auto L : MC->component_lists()) { 8319 const ValueDecl *VD = 8320 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8321 : nullptr; 8322 // Get the corresponding user-defined mapper. 8323 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8324 MC->getMapTypeModifiers(), llvm::None, 8325 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8326 std::get<2>(L)); 8327 } 8328 } 8329 8330 for (const auto &M : Info) { 8331 // We need to know when we generate information for the first component 8332 // associated with a capture, because the mapping flags depend on it. 8333 bool IsFirstComponentList = true; 8334 8335 // Temporary generated information. 8336 MapCombinedInfoTy CurInfo; 8337 StructRangeInfoTy PartialStruct; 8338 8339 for (const MapInfo &L : M.second) { 8340 assert(!L.Components.empty() && 8341 "Not expecting declaration with no component lists."); 8342 generateInfoForComponentList(L.MapType, L.MapModifiers, 8343 L.MotionModifiers, L.Components, CurInfo, 8344 PartialStruct, IsFirstComponentList, 8345 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8346 IsFirstComponentList = false; 8347 } 8348 8349 // If there is an entry in PartialStruct it means we have a struct with 8350 // individual members mapped. Emit an extra combined entry. 8351 if (PartialStruct.Base.isValid()) 8352 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8353 8354 // We need to append the results of this capture to what we already have. 8355 CombinedInfo.append(CurInfo); 8356 } 8357 } 8358 8359 /// Emit capture info for lambdas for variables captured by reference. 8360 void generateInfoForLambdaCaptures( 8361 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8362 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8363 const auto *RD = VD->getType() 8364 .getCanonicalType() 8365 .getNonReferenceType() 8366 ->getAsCXXRecordDecl(); 8367 if (!RD || !RD->isLambda()) 8368 return; 8369 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8370 LValue VDLVal = CGF.MakeAddrLValue( 8371 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8372 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8373 FieldDecl *ThisCapture = nullptr; 8374 RD->getCaptureFields(Captures, ThisCapture); 8375 if (ThisCapture) { 8376 LValue ThisLVal = 8377 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8378 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8379 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8380 VDLVal.getPointer(CGF)); 8381 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8382 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8383 CombinedInfo.Sizes.push_back( 8384 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8385 CGF.Int64Ty, /*isSigned=*/true)); 8386 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8387 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8388 CombinedInfo.Mappers.push_back(nullptr); 8389 } 8390 for (const LambdaCapture &LC : RD->captures()) { 8391 if (!LC.capturesVariable()) 8392 continue; 8393 const VarDecl *VD = LC.getCapturedVar(); 8394 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8395 continue; 8396 auto It = Captures.find(VD); 8397 assert(It != Captures.end() && "Found lambda capture without field."); 8398 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8399 if (LC.getCaptureKind() == LCK_ByRef) { 8400 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8401 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8402 VDLVal.getPointer(CGF)); 8403 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8404 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8405 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8406 CGF.getTypeSize( 8407 VD->getType().getCanonicalType().getNonReferenceType()), 8408 CGF.Int64Ty, /*isSigned=*/true)); 8409 } else { 8410 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8411 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8412 VDLVal.getPointer(CGF)); 8413 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8414 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8415 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8416 } 8417 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8418 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8419 CombinedInfo.Mappers.push_back(nullptr); 8420 } 8421 } 8422 8423 /// Set correct indices for lambdas captures. 8424 void adjustMemberOfForLambdaCaptures( 8425 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8426 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8427 MapFlagsArrayTy &Types) const { 8428 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8429 // Set correct member_of idx for all implicit lambda captures. 8430 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8431 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8432 continue; 8433 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8434 assert(BasePtr && "Unable to find base lambda address."); 8435 int TgtIdx = -1; 8436 for (unsigned J = I; J > 0; --J) { 8437 unsigned Idx = J - 1; 8438 if (Pointers[Idx] != BasePtr) 8439 continue; 8440 TgtIdx = Idx; 8441 break; 8442 } 8443 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8444 // All other current entries will be MEMBER_OF the combined entry 8445 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8446 // 0xFFFF in the MEMBER_OF field). 8447 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8448 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8449 } 8450 } 8451 8452 /// Generate the base pointers, section pointers, sizes, map types, and 8453 /// mappers associated to a given capture (all included in \a CombinedInfo). 8454 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8455 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8456 StructRangeInfoTy &PartialStruct) const { 8457 assert(!Cap->capturesVariableArrayType() && 8458 "Not expecting to generate map info for a variable array type!"); 8459 8460 // We need to know when we generating information for the first component 8461 const ValueDecl *VD = Cap->capturesThis() 8462 ? nullptr 8463 : Cap->getCapturedVar()->getCanonicalDecl(); 8464 8465 // If this declaration appears in a is_device_ptr clause we just have to 8466 // pass the pointer by value. If it is a reference to a declaration, we just 8467 // pass its value. 8468 if (DevPointersMap.count(VD)) { 8469 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8470 CombinedInfo.Pointers.push_back(Arg); 8471 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8472 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8473 /*isSigned=*/true)); 8474 CombinedInfo.Types.push_back( 8475 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8476 OMP_MAP_TARGET_PARAM); 8477 CombinedInfo.Mappers.push_back(nullptr); 8478 return; 8479 } 8480 8481 using MapData = 8482 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8483 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8484 const ValueDecl *>; 8485 SmallVector<MapData, 4> DeclComponentLists; 8486 assert(CurDir.is<const OMPExecutableDirective *>() && 8487 "Expect a executable directive"); 8488 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8489 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8490 for (const auto L : C->decl_component_lists(VD)) { 8491 const ValueDecl *VDecl, *Mapper; 8492 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8493 std::tie(VDecl, Components, Mapper) = L; 8494 assert(VDecl == VD && "We got information for the wrong declaration??"); 8495 assert(!Components.empty() && 8496 "Not expecting declaration with no component lists."); 8497 DeclComponentLists.emplace_back(Components, C->getMapType(), 8498 C->getMapTypeModifiers(), 8499 C->isImplicit(), Mapper); 8500 } 8501 } 8502 8503 // Find overlapping elements (including the offset from the base element). 8504 llvm::SmallDenseMap< 8505 const MapData *, 8506 llvm::SmallVector< 8507 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8508 4> 8509 OverlappedData; 8510 size_t Count = 0; 8511 for (const MapData &L : DeclComponentLists) { 8512 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8513 OpenMPMapClauseKind MapType; 8514 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8515 bool IsImplicit; 8516 const ValueDecl *Mapper; 8517 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8518 ++Count; 8519 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8520 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8521 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1; 8522 auto CI = Components.rbegin(); 8523 auto CE = Components.rend(); 8524 auto SI = Components1.rbegin(); 8525 auto SE = Components1.rend(); 8526 for (; CI != CE && SI != SE; ++CI, ++SI) { 8527 if (CI->getAssociatedExpression()->getStmtClass() != 8528 SI->getAssociatedExpression()->getStmtClass()) 8529 break; 8530 // Are we dealing with different variables/fields? 8531 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8532 break; 8533 } 8534 // Found overlapping if, at least for one component, reached the head of 8535 // the components list. 8536 if (CI == CE || SI == SE) { 8537 assert((CI != CE || SI != SE) && 8538 "Unexpected full match of the mapping components."); 8539 const MapData &BaseData = CI == CE ? L : L1; 8540 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8541 SI == SE ? Components : Components1; 8542 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8543 OverlappedElements.getSecond().push_back(SubData); 8544 } 8545 } 8546 } 8547 // Sort the overlapped elements for each item. 8548 llvm::SmallVector<const FieldDecl *, 4> Layout; 8549 if (!OverlappedData.empty()) { 8550 if (const auto *CRD = 8551 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8552 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8553 else { 8554 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8555 Layout.append(RD->field_begin(), RD->field_end()); 8556 } 8557 } 8558 for (auto &Pair : OverlappedData) { 8559 llvm::sort( 8560 Pair.getSecond(), 8561 [&Layout]( 8562 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8563 OMPClauseMappableExprCommon::MappableExprComponentListRef 8564 Second) { 8565 auto CI = First.rbegin(); 8566 auto CE = First.rend(); 8567 auto SI = Second.rbegin(); 8568 auto SE = Second.rend(); 8569 for (; CI != CE && SI != SE; ++CI, ++SI) { 8570 if (CI->getAssociatedExpression()->getStmtClass() != 8571 SI->getAssociatedExpression()->getStmtClass()) 8572 break; 8573 // Are we dealing with different variables/fields? 8574 if (CI->getAssociatedDeclaration() != 8575 SI->getAssociatedDeclaration()) 8576 break; 8577 } 8578 8579 // Lists contain the same elements. 8580 if (CI == CE && SI == SE) 8581 return false; 8582 8583 // List with less elements is less than list with more elements. 8584 if (CI == CE || SI == SE) 8585 return CI == CE; 8586 8587 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8588 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8589 if (FD1->getParent() == FD2->getParent()) 8590 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8591 const auto It = 8592 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8593 return FD == FD1 || FD == FD2; 8594 }); 8595 return *It == FD1; 8596 }); 8597 } 8598 8599 // Associated with a capture, because the mapping flags depend on it. 8600 // Go through all of the elements with the overlapped elements. 8601 for (const auto &Pair : OverlappedData) { 8602 const MapData &L = *Pair.getFirst(); 8603 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8604 OpenMPMapClauseKind MapType; 8605 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8606 bool IsImplicit; 8607 const ValueDecl *Mapper; 8608 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8609 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8610 OverlappedComponents = Pair.getSecond(); 8611 bool IsFirstComponentList = true; 8612 generateInfoForComponentList( 8613 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8614 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8615 /*ForDeviceAddr=*/false, OverlappedComponents); 8616 } 8617 // Go through other elements without overlapped elements. 8618 bool IsFirstComponentList = OverlappedData.empty(); 8619 for (const MapData &L : DeclComponentLists) { 8620 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8621 OpenMPMapClauseKind MapType; 8622 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8623 bool IsImplicit; 8624 const ValueDecl *Mapper; 8625 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8626 auto It = OverlappedData.find(&L); 8627 if (It == OverlappedData.end()) 8628 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8629 Components, CombinedInfo, PartialStruct, 8630 IsFirstComponentList, IsImplicit, Mapper); 8631 IsFirstComponentList = false; 8632 } 8633 } 8634 8635 /// Generate the default map information for a given capture \a CI, 8636 /// record field declaration \a RI and captured value \a CV. 8637 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8638 const FieldDecl &RI, llvm::Value *CV, 8639 MapCombinedInfoTy &CombinedInfo) const { 8640 bool IsImplicit = true; 8641 // Do the default mapping. 8642 if (CI.capturesThis()) { 8643 CombinedInfo.BasePointers.push_back(CV); 8644 CombinedInfo.Pointers.push_back(CV); 8645 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8646 CombinedInfo.Sizes.push_back( 8647 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8648 CGF.Int64Ty, /*isSigned=*/true)); 8649 // Default map type. 8650 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8651 } else if (CI.capturesVariableByCopy()) { 8652 CombinedInfo.BasePointers.push_back(CV); 8653 CombinedInfo.Pointers.push_back(CV); 8654 if (!RI.getType()->isAnyPointerType()) { 8655 // We have to signal to the runtime captures passed by value that are 8656 // not pointers. 8657 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8658 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8659 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8660 } else { 8661 // Pointers are implicitly mapped with a zero size and no flags 8662 // (other than first map that is added for all implicit maps). 8663 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8664 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8665 } 8666 const VarDecl *VD = CI.getCapturedVar(); 8667 auto I = FirstPrivateDecls.find(VD); 8668 if (I != FirstPrivateDecls.end()) 8669 IsImplicit = I->getSecond(); 8670 } else { 8671 assert(CI.capturesVariable() && "Expected captured reference."); 8672 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8673 QualType ElementType = PtrTy->getPointeeType(); 8674 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8675 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8676 // The default map type for a scalar/complex type is 'to' because by 8677 // default the value doesn't have to be retrieved. For an aggregate 8678 // type, the default is 'tofrom'. 8679 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8680 const VarDecl *VD = CI.getCapturedVar(); 8681 auto I = FirstPrivateDecls.find(VD); 8682 if (I != FirstPrivateDecls.end() && 8683 VD->getType().isConstant(CGF.getContext())) { 8684 llvm::Constant *Addr = 8685 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8686 // Copy the value of the original variable to the new global copy. 8687 CGF.Builder.CreateMemCpy( 8688 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8689 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8690 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 8691 // Use new global variable as the base pointers. 8692 CombinedInfo.BasePointers.push_back(Addr); 8693 CombinedInfo.Pointers.push_back(Addr); 8694 } else { 8695 CombinedInfo.BasePointers.push_back(CV); 8696 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8697 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8698 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8699 AlignmentSource::Decl)); 8700 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8701 } else { 8702 CombinedInfo.Pointers.push_back(CV); 8703 } 8704 } 8705 if (I != FirstPrivateDecls.end()) 8706 IsImplicit = I->getSecond(); 8707 } 8708 // Every default map produces a single argument which is a target parameter. 8709 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 8710 8711 // Add flag stating this is an implicit map. 8712 if (IsImplicit) 8713 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 8714 8715 // No user-defined mapper for default mapping. 8716 CombinedInfo.Mappers.push_back(nullptr); 8717 } 8718 }; 8719 } // anonymous namespace 8720 8721 /// Emit the arrays used to pass the captures and map information to the 8722 /// offloading runtime library. If there is no map or capture information, 8723 /// return nullptr by reference. 8724 static void 8725 emitOffloadingArrays(CodeGenFunction &CGF, 8726 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8727 CGOpenMPRuntime::TargetDataInfo &Info) { 8728 CodeGenModule &CGM = CGF.CGM; 8729 ASTContext &Ctx = CGF.getContext(); 8730 8731 // Reset the array information. 8732 Info.clearArrayInfo(); 8733 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8734 8735 if (Info.NumberOfPtrs) { 8736 // Detect if we have any capture size requiring runtime evaluation of the 8737 // size so that a constant array could be eventually used. 8738 bool hasRuntimeEvaluationCaptureSize = false; 8739 for (llvm::Value *S : CombinedInfo.Sizes) 8740 if (!isa<llvm::Constant>(S)) { 8741 hasRuntimeEvaluationCaptureSize = true; 8742 break; 8743 } 8744 8745 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8746 QualType PointerArrayType = Ctx.getConstantArrayType( 8747 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8748 /*IndexTypeQuals=*/0); 8749 8750 Info.BasePointersArray = 8751 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8752 Info.PointersArray = 8753 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8754 Address MappersArray = 8755 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 8756 Info.MappersArray = MappersArray.getPointer(); 8757 8758 // If we don't have any VLA types or other types that require runtime 8759 // evaluation, we can use a constant array for the map sizes, otherwise we 8760 // need to fill up the arrays as we do for the pointers. 8761 QualType Int64Ty = 8762 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8763 if (hasRuntimeEvaluationCaptureSize) { 8764 QualType SizeArrayType = Ctx.getConstantArrayType( 8765 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8766 /*IndexTypeQuals=*/0); 8767 Info.SizesArray = 8768 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8769 } else { 8770 // We expect all the sizes to be constant, so we collect them to create 8771 // a constant array. 8772 SmallVector<llvm::Constant *, 16> ConstSizes; 8773 for (llvm::Value *S : CombinedInfo.Sizes) 8774 ConstSizes.push_back(cast<llvm::Constant>(S)); 8775 8776 auto *SizesArrayInit = llvm::ConstantArray::get( 8777 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8778 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8779 auto *SizesArrayGbl = new llvm::GlobalVariable( 8780 CGM.getModule(), SizesArrayInit->getType(), 8781 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8782 SizesArrayInit, Name); 8783 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8784 Info.SizesArray = SizesArrayGbl; 8785 } 8786 8787 // The map types are always constant so we don't need to generate code to 8788 // fill arrays. Instead, we create an array constant. 8789 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 8790 llvm::copy(CombinedInfo.Types, Mapping.begin()); 8791 llvm::Constant *MapTypesArrayInit = 8792 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8793 std::string MaptypesName = 8794 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8795 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8796 CGM.getModule(), MapTypesArrayInit->getType(), 8797 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8798 MapTypesArrayInit, MaptypesName); 8799 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8800 Info.MapTypesArray = MapTypesArrayGbl; 8801 8802 // If there's a present map type modifier, it must not be applied to the end 8803 // of a region, so generate a separate map type array in that case. 8804 if (Info.separateBeginEndCalls()) { 8805 bool EndMapTypesDiffer = false; 8806 for (uint64_t &Type : Mapping) { 8807 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 8808 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 8809 EndMapTypesDiffer = true; 8810 } 8811 } 8812 if (EndMapTypesDiffer) { 8813 MapTypesArrayInit = 8814 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8815 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8816 MapTypesArrayGbl = new llvm::GlobalVariable( 8817 CGM.getModule(), MapTypesArrayInit->getType(), 8818 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8819 MapTypesArrayInit, MaptypesName); 8820 MapTypesArrayGbl->setUnnamedAddr( 8821 llvm::GlobalValue::UnnamedAddr::Global); 8822 Info.MapTypesArrayEnd = MapTypesArrayGbl; 8823 } 8824 } 8825 8826 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8827 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 8828 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8829 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8830 Info.BasePointersArray, 0, I); 8831 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8832 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8833 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8834 CGF.Builder.CreateStore(BPVal, BPAddr); 8835 8836 if (Info.requiresDevicePointerInfo()) 8837 if (const ValueDecl *DevVD = 8838 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 8839 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8840 8841 llvm::Value *PVal = CombinedInfo.Pointers[I]; 8842 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8843 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8844 Info.PointersArray, 0, I); 8845 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8846 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8847 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8848 CGF.Builder.CreateStore(PVal, PAddr); 8849 8850 if (hasRuntimeEvaluationCaptureSize) { 8851 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8852 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8853 Info.SizesArray, 8854 /*Idx0=*/0, 8855 /*Idx1=*/I); 8856 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8857 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 8858 CGM.Int64Ty, 8859 /*isSigned=*/true), 8860 SAddr); 8861 } 8862 8863 // Fill up the mapper array. 8864 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 8865 if (CombinedInfo.Mappers[I]) { 8866 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8867 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8868 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 8869 Info.HasMapper = true; 8870 } 8871 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 8872 CGF.Builder.CreateStore(MFunc, MAddr); 8873 } 8874 } 8875 } 8876 8877 namespace { 8878 /// Additional arguments for emitOffloadingArraysArgument function. 8879 struct ArgumentsOptions { 8880 bool ForEndCall = false; 8881 ArgumentsOptions() = default; 8882 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 8883 }; 8884 } // namespace 8885 8886 /// Emit the arguments to be passed to the runtime library based on the 8887 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 8888 /// ForEndCall, emit map types to be passed for the end of the region instead of 8889 /// the beginning. 8890 static void emitOffloadingArraysArgument( 8891 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8892 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8893 llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg, 8894 CGOpenMPRuntime::TargetDataInfo &Info, 8895 const ArgumentsOptions &Options = ArgumentsOptions()) { 8896 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 8897 "expected region end call to runtime only when end call is separate"); 8898 CodeGenModule &CGM = CGF.CGM; 8899 if (Info.NumberOfPtrs) { 8900 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8901 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8902 Info.BasePointersArray, 8903 /*Idx0=*/0, /*Idx1=*/0); 8904 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8905 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8906 Info.PointersArray, 8907 /*Idx0=*/0, 8908 /*Idx1=*/0); 8909 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8910 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8911 /*Idx0=*/0, /*Idx1=*/0); 8912 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8913 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8914 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 8915 : Info.MapTypesArray, 8916 /*Idx0=*/0, 8917 /*Idx1=*/0); 8918 // If there is no user-defined mapper, set the mapper array to nullptr to 8919 // avoid an unnecessary data privatization 8920 if (!Info.HasMapper) 8921 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8922 else 8923 MappersArrayArg = 8924 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 8925 } else { 8926 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8927 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8928 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8929 MapTypesArrayArg = 8930 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8931 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8932 } 8933 } 8934 8935 /// Check for inner distribute directive. 8936 static const OMPExecutableDirective * 8937 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8938 const auto *CS = D.getInnermostCapturedStmt(); 8939 const auto *Body = 8940 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8941 const Stmt *ChildStmt = 8942 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8943 8944 if (const auto *NestedDir = 8945 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8946 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8947 switch (D.getDirectiveKind()) { 8948 case OMPD_target: 8949 if (isOpenMPDistributeDirective(DKind)) 8950 return NestedDir; 8951 if (DKind == OMPD_teams) { 8952 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8953 /*IgnoreCaptured=*/true); 8954 if (!Body) 8955 return nullptr; 8956 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8957 if (const auto *NND = 8958 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8959 DKind = NND->getDirectiveKind(); 8960 if (isOpenMPDistributeDirective(DKind)) 8961 return NND; 8962 } 8963 } 8964 return nullptr; 8965 case OMPD_target_teams: 8966 if (isOpenMPDistributeDirective(DKind)) 8967 return NestedDir; 8968 return nullptr; 8969 case OMPD_target_parallel: 8970 case OMPD_target_simd: 8971 case OMPD_target_parallel_for: 8972 case OMPD_target_parallel_for_simd: 8973 return nullptr; 8974 case OMPD_target_teams_distribute: 8975 case OMPD_target_teams_distribute_simd: 8976 case OMPD_target_teams_distribute_parallel_for: 8977 case OMPD_target_teams_distribute_parallel_for_simd: 8978 case OMPD_parallel: 8979 case OMPD_for: 8980 case OMPD_parallel_for: 8981 case OMPD_parallel_master: 8982 case OMPD_parallel_sections: 8983 case OMPD_for_simd: 8984 case OMPD_parallel_for_simd: 8985 case OMPD_cancel: 8986 case OMPD_cancellation_point: 8987 case OMPD_ordered: 8988 case OMPD_threadprivate: 8989 case OMPD_allocate: 8990 case OMPD_task: 8991 case OMPD_simd: 8992 case OMPD_sections: 8993 case OMPD_section: 8994 case OMPD_single: 8995 case OMPD_master: 8996 case OMPD_critical: 8997 case OMPD_taskyield: 8998 case OMPD_barrier: 8999 case OMPD_taskwait: 9000 case OMPD_taskgroup: 9001 case OMPD_atomic: 9002 case OMPD_flush: 9003 case OMPD_depobj: 9004 case OMPD_scan: 9005 case OMPD_teams: 9006 case OMPD_target_data: 9007 case OMPD_target_exit_data: 9008 case OMPD_target_enter_data: 9009 case OMPD_distribute: 9010 case OMPD_distribute_simd: 9011 case OMPD_distribute_parallel_for: 9012 case OMPD_distribute_parallel_for_simd: 9013 case OMPD_teams_distribute: 9014 case OMPD_teams_distribute_simd: 9015 case OMPD_teams_distribute_parallel_for: 9016 case OMPD_teams_distribute_parallel_for_simd: 9017 case OMPD_target_update: 9018 case OMPD_declare_simd: 9019 case OMPD_declare_variant: 9020 case OMPD_begin_declare_variant: 9021 case OMPD_end_declare_variant: 9022 case OMPD_declare_target: 9023 case OMPD_end_declare_target: 9024 case OMPD_declare_reduction: 9025 case OMPD_declare_mapper: 9026 case OMPD_taskloop: 9027 case OMPD_taskloop_simd: 9028 case OMPD_master_taskloop: 9029 case OMPD_master_taskloop_simd: 9030 case OMPD_parallel_master_taskloop: 9031 case OMPD_parallel_master_taskloop_simd: 9032 case OMPD_requires: 9033 case OMPD_unknown: 9034 default: 9035 llvm_unreachable("Unexpected directive."); 9036 } 9037 } 9038 9039 return nullptr; 9040 } 9041 9042 /// Emit the user-defined mapper function. The code generation follows the 9043 /// pattern in the example below. 9044 /// \code 9045 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9046 /// void *base, void *begin, 9047 /// int64_t size, int64_t type) { 9048 /// // Allocate space for an array section first. 9049 /// if (size > 1 && !maptype.IsDelete) 9050 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9051 /// size*sizeof(Ty), clearToFrom(type)); 9052 /// // Map members. 9053 /// for (unsigned i = 0; i < size; i++) { 9054 /// // For each component specified by this mapper: 9055 /// for (auto c : all_components) { 9056 /// if (c.hasMapper()) 9057 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9058 /// c.arg_type); 9059 /// else 9060 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9061 /// c.arg_begin, c.arg_size, c.arg_type); 9062 /// } 9063 /// } 9064 /// // Delete the array section. 9065 /// if (size > 1 && maptype.IsDelete) 9066 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9067 /// size*sizeof(Ty), clearToFrom(type)); 9068 /// } 9069 /// \endcode 9070 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9071 CodeGenFunction *CGF) { 9072 if (UDMMap.count(D) > 0) 9073 return; 9074 ASTContext &C = CGM.getContext(); 9075 QualType Ty = D->getType(); 9076 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9077 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9078 auto *MapperVarDecl = 9079 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9080 SourceLocation Loc = D->getLocation(); 9081 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9082 9083 // Prepare mapper function arguments and attributes. 9084 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9085 C.VoidPtrTy, ImplicitParamDecl::Other); 9086 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9087 ImplicitParamDecl::Other); 9088 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9089 C.VoidPtrTy, ImplicitParamDecl::Other); 9090 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9091 ImplicitParamDecl::Other); 9092 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9093 ImplicitParamDecl::Other); 9094 FunctionArgList Args; 9095 Args.push_back(&HandleArg); 9096 Args.push_back(&BaseArg); 9097 Args.push_back(&BeginArg); 9098 Args.push_back(&SizeArg); 9099 Args.push_back(&TypeArg); 9100 const CGFunctionInfo &FnInfo = 9101 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9102 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9103 SmallString<64> TyStr; 9104 llvm::raw_svector_ostream Out(TyStr); 9105 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9106 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9107 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9108 Name, &CGM.getModule()); 9109 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9110 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9111 // Start the mapper function code generation. 9112 CodeGenFunction MapperCGF(CGM); 9113 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9114 // Compute the starting and end addreses of array elements. 9115 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9116 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9117 C.getPointerType(Int64Ty), Loc); 9118 // Convert the size in bytes into the number of array elements. 9119 Size = MapperCGF.Builder.CreateExactUDiv( 9120 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9121 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9122 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9123 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9124 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9125 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9126 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9127 C.getPointerType(Int64Ty), Loc); 9128 // Prepare common arguments for array initiation and deletion. 9129 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9130 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9131 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9132 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9133 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9134 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9135 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9136 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9137 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9138 9139 // Emit array initiation if this is an array section and \p MapType indicates 9140 // that memory allocation is required. 9141 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9142 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9143 ElementSize, HeadBB, /*IsInit=*/true); 9144 9145 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9146 9147 // Emit the loop header block. 9148 MapperCGF.EmitBlock(HeadBB); 9149 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9150 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9151 // Evaluate whether the initial condition is satisfied. 9152 llvm::Value *IsEmpty = 9153 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9154 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9155 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9156 9157 // Emit the loop body block. 9158 MapperCGF.EmitBlock(BodyBB); 9159 llvm::BasicBlock *LastBB = BodyBB; 9160 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9161 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9162 PtrPHI->addIncoming(PtrBegin, EntryBB); 9163 Address PtrCurrent = 9164 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9165 .getAlignment() 9166 .alignmentOfArrayElement(ElementSize)); 9167 // Privatize the declared variable of mapper to be the current array element. 9168 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9169 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9170 return MapperCGF 9171 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9172 .getAddress(MapperCGF); 9173 }); 9174 (void)Scope.Privatize(); 9175 9176 // Get map clause information. Fill up the arrays with all mapped variables. 9177 MappableExprsHandler::MapCombinedInfoTy Info; 9178 MappableExprsHandler MEHandler(*D, MapperCGF); 9179 MEHandler.generateAllInfoForMapper(Info); 9180 9181 // Call the runtime API __tgt_mapper_num_components to get the number of 9182 // pre-existing components. 9183 llvm::Value *OffloadingArgs[] = {Handle}; 9184 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9185 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9186 OMPRTL___tgt_mapper_num_components), 9187 OffloadingArgs); 9188 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9189 PreviousSize, 9190 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9191 9192 // Fill up the runtime mapper handle for all components. 9193 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9194 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9195 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9196 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9197 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9198 llvm::Value *CurSizeArg = Info.Sizes[I]; 9199 9200 // Extract the MEMBER_OF field from the map type. 9201 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9202 MapperCGF.EmitBlock(MemberBB); 9203 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9204 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9205 OriMapType, 9206 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9207 llvm::BasicBlock *MemberCombineBB = 9208 MapperCGF.createBasicBlock("omp.member.combine"); 9209 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9210 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9211 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9212 // Add the number of pre-existing components to the MEMBER_OF field if it 9213 // is valid. 9214 MapperCGF.EmitBlock(MemberCombineBB); 9215 llvm::Value *CombinedMember = 9216 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9217 // Do nothing if it is not a member of previous components. 9218 MapperCGF.EmitBlock(TypeBB); 9219 llvm::PHINode *MemberMapType = 9220 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9221 MemberMapType->addIncoming(OriMapType, MemberBB); 9222 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9223 9224 // Combine the map type inherited from user-defined mapper with that 9225 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9226 // bits of the \a MapType, which is the input argument of the mapper 9227 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9228 // bits of MemberMapType. 9229 // [OpenMP 5.0], 1.2.6. map-type decay. 9230 // | alloc | to | from | tofrom | release | delete 9231 // ---------------------------------------------------------- 9232 // alloc | alloc | alloc | alloc | alloc | release | delete 9233 // to | alloc | to | alloc | to | release | delete 9234 // from | alloc | alloc | from | from | release | delete 9235 // tofrom | alloc | to | from | tofrom | release | delete 9236 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9237 MapType, 9238 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9239 MappableExprsHandler::OMP_MAP_FROM)); 9240 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9241 llvm::BasicBlock *AllocElseBB = 9242 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9243 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9244 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9245 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9246 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9247 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9248 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9249 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9250 MapperCGF.EmitBlock(AllocBB); 9251 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9252 MemberMapType, 9253 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9254 MappableExprsHandler::OMP_MAP_FROM))); 9255 MapperCGF.Builder.CreateBr(EndBB); 9256 MapperCGF.EmitBlock(AllocElseBB); 9257 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9258 LeftToFrom, 9259 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9260 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9261 // In case of to, clear OMP_MAP_FROM. 9262 MapperCGF.EmitBlock(ToBB); 9263 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9264 MemberMapType, 9265 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9266 MapperCGF.Builder.CreateBr(EndBB); 9267 MapperCGF.EmitBlock(ToElseBB); 9268 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9269 LeftToFrom, 9270 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9271 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9272 // In case of from, clear OMP_MAP_TO. 9273 MapperCGF.EmitBlock(FromBB); 9274 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9275 MemberMapType, 9276 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9277 // In case of tofrom, do nothing. 9278 MapperCGF.EmitBlock(EndBB); 9279 LastBB = EndBB; 9280 llvm::PHINode *CurMapType = 9281 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9282 CurMapType->addIncoming(AllocMapType, AllocBB); 9283 CurMapType->addIncoming(ToMapType, ToBB); 9284 CurMapType->addIncoming(FromMapType, FromBB); 9285 CurMapType->addIncoming(MemberMapType, ToElseBB); 9286 9287 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9288 CurSizeArg, CurMapType}; 9289 if (Info.Mappers[I]) { 9290 // Call the corresponding mapper function. 9291 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9292 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9293 assert(MapperFunc && "Expect a valid mapper function is available."); 9294 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9295 } else { 9296 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9297 // data structure. 9298 MapperCGF.EmitRuntimeCall( 9299 OMPBuilder.getOrCreateRuntimeFunction( 9300 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9301 OffloadingArgs); 9302 } 9303 } 9304 9305 // Update the pointer to point to the next element that needs to be mapped, 9306 // and check whether we have mapped all elements. 9307 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9308 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9309 PtrPHI->addIncoming(PtrNext, LastBB); 9310 llvm::Value *IsDone = 9311 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9312 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9313 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9314 9315 MapperCGF.EmitBlock(ExitBB); 9316 // Emit array deletion if this is an array section and \p MapType indicates 9317 // that deletion is required. 9318 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9319 ElementSize, DoneBB, /*IsInit=*/false); 9320 9321 // Emit the function exit block. 9322 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9323 MapperCGF.FinishFunction(); 9324 UDMMap.try_emplace(D, Fn); 9325 if (CGF) { 9326 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9327 Decls.second.push_back(D); 9328 } 9329 } 9330 9331 /// Emit the array initialization or deletion portion for user-defined mapper 9332 /// code generation. First, it evaluates whether an array section is mapped and 9333 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9334 /// true, and \a MapType indicates to not delete this array, array 9335 /// initialization code is generated. If \a IsInit is false, and \a MapType 9336 /// indicates to not this array, array deletion code is generated. 9337 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9338 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9339 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9340 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9341 StringRef Prefix = IsInit ? ".init" : ".del"; 9342 9343 // Evaluate if this is an array section. 9344 llvm::BasicBlock *IsDeleteBB = 9345 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9346 llvm::BasicBlock *BodyBB = 9347 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9348 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9349 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9350 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9351 9352 // Evaluate if we are going to delete this section. 9353 MapperCGF.EmitBlock(IsDeleteBB); 9354 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9355 MapType, 9356 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9357 llvm::Value *DeleteCond; 9358 if (IsInit) { 9359 DeleteCond = MapperCGF.Builder.CreateIsNull( 9360 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9361 } else { 9362 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9363 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9364 } 9365 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9366 9367 MapperCGF.EmitBlock(BodyBB); 9368 // Get the array size by multiplying element size and element number (i.e., \p 9369 // Size). 9370 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9371 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9372 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9373 // memory allocation/deletion purpose only. 9374 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9375 MapType, 9376 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9377 MappableExprsHandler::OMP_MAP_FROM))); 9378 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9379 // data structure. 9380 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9381 MapperCGF.EmitRuntimeCall( 9382 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9383 OMPRTL___tgt_push_mapper_component), 9384 OffloadingArgs); 9385 } 9386 9387 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9388 const OMPDeclareMapperDecl *D) { 9389 auto I = UDMMap.find(D); 9390 if (I != UDMMap.end()) 9391 return I->second; 9392 emitUserDefinedMapper(D); 9393 return UDMMap.lookup(D); 9394 } 9395 9396 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9397 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9398 llvm::Value *DeviceID, 9399 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9400 const OMPLoopDirective &D)> 9401 SizeEmitter) { 9402 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9403 const OMPExecutableDirective *TD = &D; 9404 // Get nested teams distribute kind directive, if any. 9405 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9406 TD = getNestedDistributeDirective(CGM.getContext(), D); 9407 if (!TD) 9408 return; 9409 const auto *LD = cast<OMPLoopDirective>(TD); 9410 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9411 PrePostActionTy &) { 9412 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9413 llvm::Value *Args[] = {DeviceID, NumIterations}; 9414 CGF.EmitRuntimeCall( 9415 OMPBuilder.getOrCreateRuntimeFunction( 9416 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9417 Args); 9418 } 9419 }; 9420 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9421 } 9422 9423 void CGOpenMPRuntime::emitTargetCall( 9424 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9425 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9426 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9427 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9428 const OMPLoopDirective &D)> 9429 SizeEmitter) { 9430 if (!CGF.HaveInsertPoint()) 9431 return; 9432 9433 assert(OutlinedFn && "Invalid outlined function!"); 9434 9435 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9436 D.hasClausesOfKind<OMPNowaitClause>(); 9437 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9438 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9439 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9440 PrePostActionTy &) { 9441 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9442 }; 9443 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9444 9445 CodeGenFunction::OMPTargetDataInfo InputInfo; 9446 llvm::Value *MapTypesArray = nullptr; 9447 // Fill up the pointer arrays and transfer execution to the device. 9448 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9449 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9450 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9451 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9452 // Reverse offloading is not supported, so just execute on the host. 9453 if (RequiresOuterTask) { 9454 CapturedVars.clear(); 9455 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9456 } 9457 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9458 return; 9459 } 9460 9461 // On top of the arrays that were filled up, the target offloading call 9462 // takes as arguments the device id as well as the host pointer. The host 9463 // pointer is used by the runtime library to identify the current target 9464 // region, so it only has to be unique and not necessarily point to 9465 // anything. It could be the pointer to the outlined function that 9466 // implements the target region, but we aren't using that so that the 9467 // compiler doesn't need to keep that, and could therefore inline the host 9468 // function if proven worthwhile during optimization. 9469 9470 // From this point on, we need to have an ID of the target region defined. 9471 assert(OutlinedFnID && "Invalid outlined function ID!"); 9472 9473 // Emit device ID if any. 9474 llvm::Value *DeviceID; 9475 if (Device.getPointer()) { 9476 assert((Device.getInt() == OMPC_DEVICE_unknown || 9477 Device.getInt() == OMPC_DEVICE_device_num) && 9478 "Expected device_num modifier."); 9479 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9480 DeviceID = 9481 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9482 } else { 9483 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9484 } 9485 9486 // Emit the number of elements in the offloading arrays. 9487 llvm::Value *PointerNum = 9488 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9489 9490 // Return value of the runtime offloading call. 9491 llvm::Value *Return; 9492 9493 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9494 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9495 9496 // Emit tripcount for the target loop-based directive. 9497 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9498 9499 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9500 // The target region is an outlined function launched by the runtime 9501 // via calls __tgt_target() or __tgt_target_teams(). 9502 // 9503 // __tgt_target() launches a target region with one team and one thread, 9504 // executing a serial region. This master thread may in turn launch 9505 // more threads within its team upon encountering a parallel region, 9506 // however, no additional teams can be launched on the device. 9507 // 9508 // __tgt_target_teams() launches a target region with one or more teams, 9509 // each with one or more threads. This call is required for target 9510 // constructs such as: 9511 // 'target teams' 9512 // 'target' / 'teams' 9513 // 'target teams distribute parallel for' 9514 // 'target parallel' 9515 // and so on. 9516 // 9517 // Note that on the host and CPU targets, the runtime implementation of 9518 // these calls simply call the outlined function without forking threads. 9519 // The outlined functions themselves have runtime calls to 9520 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9521 // the compiler in emitTeamsCall() and emitParallelCall(). 9522 // 9523 // In contrast, on the NVPTX target, the implementation of 9524 // __tgt_target_teams() launches a GPU kernel with the requested number 9525 // of teams and threads so no additional calls to the runtime are required. 9526 if (NumTeams) { 9527 // If we have NumTeams defined this means that we have an enclosed teams 9528 // region. Therefore we also expect to have NumThreads defined. These two 9529 // values should be defined in the presence of a teams directive, 9530 // regardless of having any clauses associated. If the user is using teams 9531 // but no clauses, these two values will be the default that should be 9532 // passed to the runtime library - a 32-bit integer with the value zero. 9533 assert(NumThreads && "Thread limit expression should be available along " 9534 "with number of teams."); 9535 llvm::Value *OffloadingArgs[] = {DeviceID, 9536 OutlinedFnID, 9537 PointerNum, 9538 InputInfo.BasePointersArray.getPointer(), 9539 InputInfo.PointersArray.getPointer(), 9540 InputInfo.SizesArray.getPointer(), 9541 MapTypesArray, 9542 InputInfo.MappersArray.getPointer(), 9543 NumTeams, 9544 NumThreads}; 9545 Return = CGF.EmitRuntimeCall( 9546 OMPBuilder.getOrCreateRuntimeFunction( 9547 CGM.getModule(), HasNowait 9548 ? OMPRTL___tgt_target_teams_nowait_mapper 9549 : OMPRTL___tgt_target_teams_mapper), 9550 OffloadingArgs); 9551 } else { 9552 llvm::Value *OffloadingArgs[] = {DeviceID, 9553 OutlinedFnID, 9554 PointerNum, 9555 InputInfo.BasePointersArray.getPointer(), 9556 InputInfo.PointersArray.getPointer(), 9557 InputInfo.SizesArray.getPointer(), 9558 MapTypesArray, 9559 InputInfo.MappersArray.getPointer()}; 9560 Return = CGF.EmitRuntimeCall( 9561 OMPBuilder.getOrCreateRuntimeFunction( 9562 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 9563 : OMPRTL___tgt_target_mapper), 9564 OffloadingArgs); 9565 } 9566 9567 // Check the error code and execute the host version if required. 9568 llvm::BasicBlock *OffloadFailedBlock = 9569 CGF.createBasicBlock("omp_offload.failed"); 9570 llvm::BasicBlock *OffloadContBlock = 9571 CGF.createBasicBlock("omp_offload.cont"); 9572 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9573 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9574 9575 CGF.EmitBlock(OffloadFailedBlock); 9576 if (RequiresOuterTask) { 9577 CapturedVars.clear(); 9578 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9579 } 9580 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9581 CGF.EmitBranch(OffloadContBlock); 9582 9583 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9584 }; 9585 9586 // Notify that the host version must be executed. 9587 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9588 RequiresOuterTask](CodeGenFunction &CGF, 9589 PrePostActionTy &) { 9590 if (RequiresOuterTask) { 9591 CapturedVars.clear(); 9592 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9593 } 9594 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9595 }; 9596 9597 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9598 &CapturedVars, RequiresOuterTask, 9599 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9600 // Fill up the arrays with all the captured variables. 9601 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9602 9603 // Get mappable expression information. 9604 MappableExprsHandler MEHandler(D, CGF); 9605 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9606 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9607 9608 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9609 auto CV = CapturedVars.begin(); 9610 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9611 CE = CS.capture_end(); 9612 CI != CE; ++CI, ++RI, ++CV) { 9613 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9614 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9615 9616 // VLA sizes are passed to the outlined region by copy and do not have map 9617 // information associated. 9618 if (CI->capturesVariableArrayType()) { 9619 CurInfo.BasePointers.push_back(*CV); 9620 CurInfo.Pointers.push_back(*CV); 9621 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9622 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9623 // Copy to the device as an argument. No need to retrieve it. 9624 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9625 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9626 MappableExprsHandler::OMP_MAP_IMPLICIT); 9627 CurInfo.Mappers.push_back(nullptr); 9628 } else { 9629 // If we have any information in the map clause, we use it, otherwise we 9630 // just do a default mapping. 9631 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9632 if (!CI->capturesThis()) 9633 MappedVarSet.insert(CI->getCapturedVar()); 9634 else 9635 MappedVarSet.insert(nullptr); 9636 if (CurInfo.BasePointers.empty()) 9637 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9638 // Generate correct mapping for variables captured by reference in 9639 // lambdas. 9640 if (CI->capturesVariable()) 9641 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9642 CurInfo, LambdaPointers); 9643 } 9644 // We expect to have at least an element of information for this capture. 9645 assert(!CurInfo.BasePointers.empty() && 9646 "Non-existing map pointer for capture!"); 9647 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9648 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9649 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9650 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9651 "Inconsistent map information sizes!"); 9652 9653 // If there is an entry in PartialStruct it means we have a struct with 9654 // individual members mapped. Emit an extra combined entry. 9655 if (PartialStruct.Base.isValid()) 9656 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 9657 9658 // We need to append the results of this capture to what we already have. 9659 CombinedInfo.append(CurInfo); 9660 } 9661 // Adjust MEMBER_OF flags for the lambdas captures. 9662 MEHandler.adjustMemberOfForLambdaCaptures( 9663 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 9664 CombinedInfo.Types); 9665 // Map any list items in a map clause that were not captures because they 9666 // weren't referenced within the construct. 9667 MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true, 9668 MappedVarSet); 9669 9670 TargetDataInfo Info; 9671 // Fill up the arrays and create the arguments. 9672 emitOffloadingArrays(CGF, CombinedInfo, Info); 9673 emitOffloadingArraysArgument( 9674 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 9675 Info.MapTypesArray, Info.MappersArray, Info, {/*ForEndTask=*/false}); 9676 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9677 InputInfo.BasePointersArray = 9678 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9679 InputInfo.PointersArray = 9680 Address(Info.PointersArray, CGM.getPointerAlign()); 9681 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9682 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 9683 MapTypesArray = Info.MapTypesArray; 9684 if (RequiresOuterTask) 9685 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9686 else 9687 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9688 }; 9689 9690 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9691 CodeGenFunction &CGF, PrePostActionTy &) { 9692 if (RequiresOuterTask) { 9693 CodeGenFunction::OMPTargetDataInfo InputInfo; 9694 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9695 } else { 9696 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9697 } 9698 }; 9699 9700 // If we have a target function ID it means that we need to support 9701 // offloading, otherwise, just execute on the host. We need to execute on host 9702 // regardless of the conditional in the if clause if, e.g., the user do not 9703 // specify target triples. 9704 if (OutlinedFnID) { 9705 if (IfCond) { 9706 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9707 } else { 9708 RegionCodeGenTy ThenRCG(TargetThenGen); 9709 ThenRCG(CGF); 9710 } 9711 } else { 9712 RegionCodeGenTy ElseRCG(TargetElseGen); 9713 ElseRCG(CGF); 9714 } 9715 } 9716 9717 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9718 StringRef ParentName) { 9719 if (!S) 9720 return; 9721 9722 // Codegen OMP target directives that offload compute to the device. 9723 bool RequiresDeviceCodegen = 9724 isa<OMPExecutableDirective>(S) && 9725 isOpenMPTargetExecutionDirective( 9726 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9727 9728 if (RequiresDeviceCodegen) { 9729 const auto &E = *cast<OMPExecutableDirective>(S); 9730 unsigned DeviceID; 9731 unsigned FileID; 9732 unsigned Line; 9733 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9734 FileID, Line); 9735 9736 // Is this a target region that should not be emitted as an entry point? If 9737 // so just signal we are done with this target region. 9738 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9739 ParentName, Line)) 9740 return; 9741 9742 switch (E.getDirectiveKind()) { 9743 case OMPD_target: 9744 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9745 cast<OMPTargetDirective>(E)); 9746 break; 9747 case OMPD_target_parallel: 9748 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9749 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9750 break; 9751 case OMPD_target_teams: 9752 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9753 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9754 break; 9755 case OMPD_target_teams_distribute: 9756 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9757 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9758 break; 9759 case OMPD_target_teams_distribute_simd: 9760 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9761 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9762 break; 9763 case OMPD_target_parallel_for: 9764 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9765 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9766 break; 9767 case OMPD_target_parallel_for_simd: 9768 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9769 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9770 break; 9771 case OMPD_target_simd: 9772 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9773 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9774 break; 9775 case OMPD_target_teams_distribute_parallel_for: 9776 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9777 CGM, ParentName, 9778 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9779 break; 9780 case OMPD_target_teams_distribute_parallel_for_simd: 9781 CodeGenFunction:: 9782 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9783 CGM, ParentName, 9784 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9785 break; 9786 case OMPD_parallel: 9787 case OMPD_for: 9788 case OMPD_parallel_for: 9789 case OMPD_parallel_master: 9790 case OMPD_parallel_sections: 9791 case OMPD_for_simd: 9792 case OMPD_parallel_for_simd: 9793 case OMPD_cancel: 9794 case OMPD_cancellation_point: 9795 case OMPD_ordered: 9796 case OMPD_threadprivate: 9797 case OMPD_allocate: 9798 case OMPD_task: 9799 case OMPD_simd: 9800 case OMPD_sections: 9801 case OMPD_section: 9802 case OMPD_single: 9803 case OMPD_master: 9804 case OMPD_critical: 9805 case OMPD_taskyield: 9806 case OMPD_barrier: 9807 case OMPD_taskwait: 9808 case OMPD_taskgroup: 9809 case OMPD_atomic: 9810 case OMPD_flush: 9811 case OMPD_depobj: 9812 case OMPD_scan: 9813 case OMPD_teams: 9814 case OMPD_target_data: 9815 case OMPD_target_exit_data: 9816 case OMPD_target_enter_data: 9817 case OMPD_distribute: 9818 case OMPD_distribute_simd: 9819 case OMPD_distribute_parallel_for: 9820 case OMPD_distribute_parallel_for_simd: 9821 case OMPD_teams_distribute: 9822 case OMPD_teams_distribute_simd: 9823 case OMPD_teams_distribute_parallel_for: 9824 case OMPD_teams_distribute_parallel_for_simd: 9825 case OMPD_target_update: 9826 case OMPD_declare_simd: 9827 case OMPD_declare_variant: 9828 case OMPD_begin_declare_variant: 9829 case OMPD_end_declare_variant: 9830 case OMPD_declare_target: 9831 case OMPD_end_declare_target: 9832 case OMPD_declare_reduction: 9833 case OMPD_declare_mapper: 9834 case OMPD_taskloop: 9835 case OMPD_taskloop_simd: 9836 case OMPD_master_taskloop: 9837 case OMPD_master_taskloop_simd: 9838 case OMPD_parallel_master_taskloop: 9839 case OMPD_parallel_master_taskloop_simd: 9840 case OMPD_requires: 9841 case OMPD_unknown: 9842 default: 9843 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9844 } 9845 return; 9846 } 9847 9848 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9849 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9850 return; 9851 9852 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 9853 return; 9854 } 9855 9856 // If this is a lambda function, look into its body. 9857 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9858 S = L->getBody(); 9859 9860 // Keep looking for target regions recursively. 9861 for (const Stmt *II : S->children()) 9862 scanForTargetRegionsFunctions(II, ParentName); 9863 } 9864 9865 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9866 // If emitting code for the host, we do not process FD here. Instead we do 9867 // the normal code generation. 9868 if (!CGM.getLangOpts().OpenMPIsDevice) { 9869 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9870 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9871 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9872 // Do not emit device_type(nohost) functions for the host. 9873 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9874 return true; 9875 } 9876 return false; 9877 } 9878 9879 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9880 // Try to detect target regions in the function. 9881 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9882 StringRef Name = CGM.getMangledName(GD); 9883 scanForTargetRegionsFunctions(FD->getBody(), Name); 9884 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9885 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9886 // Do not emit device_type(nohost) functions for the host. 9887 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9888 return true; 9889 } 9890 9891 // Do not to emit function if it is not marked as declare target. 9892 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9893 AlreadyEmittedTargetDecls.count(VD) == 0; 9894 } 9895 9896 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9897 if (!CGM.getLangOpts().OpenMPIsDevice) 9898 return false; 9899 9900 // Check if there are Ctors/Dtors in this declaration and look for target 9901 // regions in it. We use the complete variant to produce the kernel name 9902 // mangling. 9903 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9904 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9905 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9906 StringRef ParentName = 9907 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9908 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9909 } 9910 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9911 StringRef ParentName = 9912 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9913 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9914 } 9915 } 9916 9917 // Do not to emit variable if it is not marked as declare target. 9918 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9919 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9920 cast<VarDecl>(GD.getDecl())); 9921 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9922 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9923 HasRequiresUnifiedSharedMemory)) { 9924 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9925 return true; 9926 } 9927 return false; 9928 } 9929 9930 llvm::Constant * 9931 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9932 const VarDecl *VD) { 9933 assert(VD->getType().isConstant(CGM.getContext()) && 9934 "Expected constant variable."); 9935 StringRef VarName; 9936 llvm::Constant *Addr; 9937 llvm::GlobalValue::LinkageTypes Linkage; 9938 QualType Ty = VD->getType(); 9939 SmallString<128> Buffer; 9940 { 9941 unsigned DeviceID; 9942 unsigned FileID; 9943 unsigned Line; 9944 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9945 FileID, Line); 9946 llvm::raw_svector_ostream OS(Buffer); 9947 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9948 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9949 VarName = OS.str(); 9950 } 9951 Linkage = llvm::GlobalValue::InternalLinkage; 9952 Addr = 9953 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9954 getDefaultFirstprivateAddressSpace()); 9955 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9956 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9957 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9958 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9959 VarName, Addr, VarSize, 9960 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9961 return Addr; 9962 } 9963 9964 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9965 llvm::Constant *Addr) { 9966 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9967 !CGM.getLangOpts().OpenMPIsDevice) 9968 return; 9969 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9970 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9971 if (!Res) { 9972 if (CGM.getLangOpts().OpenMPIsDevice) { 9973 // Register non-target variables being emitted in device code (debug info 9974 // may cause this). 9975 StringRef VarName = CGM.getMangledName(VD); 9976 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9977 } 9978 return; 9979 } 9980 // Register declare target variables. 9981 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9982 StringRef VarName; 9983 CharUnits VarSize; 9984 llvm::GlobalValue::LinkageTypes Linkage; 9985 9986 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9987 !HasRequiresUnifiedSharedMemory) { 9988 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9989 VarName = CGM.getMangledName(VD); 9990 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9991 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9992 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9993 } else { 9994 VarSize = CharUnits::Zero(); 9995 } 9996 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9997 // Temp solution to prevent optimizations of the internal variables. 9998 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9999 std::string RefName = getName({VarName, "ref"}); 10000 if (!CGM.GetGlobalValue(RefName)) { 10001 llvm::Constant *AddrRef = 10002 getOrCreateInternalVariable(Addr->getType(), RefName); 10003 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10004 GVAddrRef->setConstant(/*Val=*/true); 10005 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10006 GVAddrRef->setInitializer(Addr); 10007 CGM.addCompilerUsedGlobal(GVAddrRef); 10008 } 10009 } 10010 } else { 10011 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10012 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10013 HasRequiresUnifiedSharedMemory)) && 10014 "Declare target attribute must link or to with unified memory."); 10015 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10016 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10017 else 10018 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10019 10020 if (CGM.getLangOpts().OpenMPIsDevice) { 10021 VarName = Addr->getName(); 10022 Addr = nullptr; 10023 } else { 10024 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10025 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10026 } 10027 VarSize = CGM.getPointerSize(); 10028 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10029 } 10030 10031 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10032 VarName, Addr, VarSize, Flags, Linkage); 10033 } 10034 10035 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10036 if (isa<FunctionDecl>(GD.getDecl()) || 10037 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10038 return emitTargetFunctions(GD); 10039 10040 return emitTargetGlobalVariable(GD); 10041 } 10042 10043 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10044 for (const VarDecl *VD : DeferredGlobalVariables) { 10045 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10046 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10047 if (!Res) 10048 continue; 10049 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10050 !HasRequiresUnifiedSharedMemory) { 10051 CGM.EmitGlobal(VD); 10052 } else { 10053 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10054 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10055 HasRequiresUnifiedSharedMemory)) && 10056 "Expected link clause or to clause with unified memory."); 10057 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10058 } 10059 } 10060 } 10061 10062 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10063 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10064 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10065 " Expected target-based directive."); 10066 } 10067 10068 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10069 for (const OMPClause *Clause : D->clauselists()) { 10070 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10071 HasRequiresUnifiedSharedMemory = true; 10072 } else if (const auto *AC = 10073 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10074 switch (AC->getAtomicDefaultMemOrderKind()) { 10075 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10076 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10077 break; 10078 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10079 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10080 break; 10081 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10082 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10083 break; 10084 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10085 break; 10086 } 10087 } 10088 } 10089 } 10090 10091 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10092 return RequiresAtomicOrdering; 10093 } 10094 10095 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10096 LangAS &AS) { 10097 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10098 return false; 10099 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10100 switch(A->getAllocatorType()) { 10101 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10102 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10103 // Not supported, fallback to the default mem space. 10104 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10105 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10106 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10107 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10108 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10109 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10110 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10111 AS = LangAS::Default; 10112 return true; 10113 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10114 llvm_unreachable("Expected predefined allocator for the variables with the " 10115 "static storage."); 10116 } 10117 return false; 10118 } 10119 10120 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10121 return HasRequiresUnifiedSharedMemory; 10122 } 10123 10124 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10125 CodeGenModule &CGM) 10126 : CGM(CGM) { 10127 if (CGM.getLangOpts().OpenMPIsDevice) { 10128 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10129 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10130 } 10131 } 10132 10133 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10134 if (CGM.getLangOpts().OpenMPIsDevice) 10135 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10136 } 10137 10138 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10139 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10140 return true; 10141 10142 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10143 // Do not to emit function if it is marked as declare target as it was already 10144 // emitted. 10145 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10146 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10147 if (auto *F = dyn_cast_or_null<llvm::Function>( 10148 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10149 return !F->isDeclaration(); 10150 return false; 10151 } 10152 return true; 10153 } 10154 10155 return !AlreadyEmittedTargetDecls.insert(D).second; 10156 } 10157 10158 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10159 // If we don't have entries or if we are emitting code for the device, we 10160 // don't need to do anything. 10161 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10162 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10163 (OffloadEntriesInfoManager.empty() && 10164 !HasEmittedDeclareTargetRegion && 10165 !HasEmittedTargetRegion)) 10166 return nullptr; 10167 10168 // Create and register the function that handles the requires directives. 10169 ASTContext &C = CGM.getContext(); 10170 10171 llvm::Function *RequiresRegFn; 10172 { 10173 CodeGenFunction CGF(CGM); 10174 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10175 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10176 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10177 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10178 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10179 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10180 // TODO: check for other requires clauses. 10181 // The requires directive takes effect only when a target region is 10182 // present in the compilation unit. Otherwise it is ignored and not 10183 // passed to the runtime. This avoids the runtime from throwing an error 10184 // for mismatching requires clauses across compilation units that don't 10185 // contain at least 1 target region. 10186 assert((HasEmittedTargetRegion || 10187 HasEmittedDeclareTargetRegion || 10188 !OffloadEntriesInfoManager.empty()) && 10189 "Target or declare target region expected."); 10190 if (HasRequiresUnifiedSharedMemory) 10191 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10192 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10193 CGM.getModule(), OMPRTL___tgt_register_requires), 10194 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10195 CGF.FinishFunction(); 10196 } 10197 return RequiresRegFn; 10198 } 10199 10200 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10201 const OMPExecutableDirective &D, 10202 SourceLocation Loc, 10203 llvm::Function *OutlinedFn, 10204 ArrayRef<llvm::Value *> CapturedVars) { 10205 if (!CGF.HaveInsertPoint()) 10206 return; 10207 10208 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10209 CodeGenFunction::RunCleanupsScope Scope(CGF); 10210 10211 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10212 llvm::Value *Args[] = { 10213 RTLoc, 10214 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10215 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10216 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10217 RealArgs.append(std::begin(Args), std::end(Args)); 10218 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10219 10220 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10221 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10222 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10223 } 10224 10225 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10226 const Expr *NumTeams, 10227 const Expr *ThreadLimit, 10228 SourceLocation Loc) { 10229 if (!CGF.HaveInsertPoint()) 10230 return; 10231 10232 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10233 10234 llvm::Value *NumTeamsVal = 10235 NumTeams 10236 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10237 CGF.CGM.Int32Ty, /* isSigned = */ true) 10238 : CGF.Builder.getInt32(0); 10239 10240 llvm::Value *ThreadLimitVal = 10241 ThreadLimit 10242 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10243 CGF.CGM.Int32Ty, /* isSigned = */ true) 10244 : CGF.Builder.getInt32(0); 10245 10246 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10247 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10248 ThreadLimitVal}; 10249 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10250 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10251 PushNumTeamsArgs); 10252 } 10253 10254 void CGOpenMPRuntime::emitTargetDataCalls( 10255 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10256 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10257 if (!CGF.HaveInsertPoint()) 10258 return; 10259 10260 // Action used to replace the default codegen action and turn privatization 10261 // off. 10262 PrePostActionTy NoPrivAction; 10263 10264 // Generate the code for the opening of the data environment. Capture all the 10265 // arguments of the runtime call by reference because they are used in the 10266 // closing of the region. 10267 auto &&BeginThenGen = [this, &D, Device, &Info, 10268 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10269 // Fill up the arrays with all the mapped variables. 10270 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10271 10272 // Get map clause information. 10273 MappableExprsHandler MEHandler(D, CGF); 10274 MEHandler.generateAllInfo(CombinedInfo); 10275 10276 // Fill up the arrays and create the arguments. 10277 emitOffloadingArrays(CGF, CombinedInfo, Info); 10278 10279 llvm::Value *BasePointersArrayArg = nullptr; 10280 llvm::Value *PointersArrayArg = nullptr; 10281 llvm::Value *SizesArrayArg = nullptr; 10282 llvm::Value *MapTypesArrayArg = nullptr; 10283 llvm::Value *MappersArrayArg = nullptr; 10284 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10285 SizesArrayArg, MapTypesArrayArg, 10286 MappersArrayArg, Info); 10287 10288 // Emit device ID if any. 10289 llvm::Value *DeviceID = nullptr; 10290 if (Device) { 10291 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10292 CGF.Int64Ty, /*isSigned=*/true); 10293 } else { 10294 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10295 } 10296 10297 // Emit the number of elements in the offloading arrays. 10298 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10299 10300 llvm::Value *OffloadingArgs[] = { 10301 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10302 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10303 CGF.EmitRuntimeCall( 10304 OMPBuilder.getOrCreateRuntimeFunction( 10305 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10306 OffloadingArgs); 10307 10308 // If device pointer privatization is required, emit the body of the region 10309 // here. It will have to be duplicated: with and without privatization. 10310 if (!Info.CaptureDeviceAddrMap.empty()) 10311 CodeGen(CGF); 10312 }; 10313 10314 // Generate code for the closing of the data region. 10315 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10316 PrePostActionTy &) { 10317 assert(Info.isValid() && "Invalid data environment closing arguments."); 10318 10319 llvm::Value *BasePointersArrayArg = nullptr; 10320 llvm::Value *PointersArrayArg = nullptr; 10321 llvm::Value *SizesArrayArg = nullptr; 10322 llvm::Value *MapTypesArrayArg = nullptr; 10323 llvm::Value *MappersArrayArg = nullptr; 10324 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10325 SizesArrayArg, MapTypesArrayArg, 10326 MappersArrayArg, Info, {/*ForEndCall=*/true}); 10327 10328 // Emit device ID if any. 10329 llvm::Value *DeviceID = nullptr; 10330 if (Device) { 10331 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10332 CGF.Int64Ty, /*isSigned=*/true); 10333 } else { 10334 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10335 } 10336 10337 // Emit the number of elements in the offloading arrays. 10338 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10339 10340 llvm::Value *OffloadingArgs[] = { 10341 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10342 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10343 CGF.EmitRuntimeCall( 10344 OMPBuilder.getOrCreateRuntimeFunction( 10345 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10346 OffloadingArgs); 10347 }; 10348 10349 // If we need device pointer privatization, we need to emit the body of the 10350 // region with no privatization in the 'else' branch of the conditional. 10351 // Otherwise, we don't have to do anything. 10352 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10353 PrePostActionTy &) { 10354 if (!Info.CaptureDeviceAddrMap.empty()) { 10355 CodeGen.setAction(NoPrivAction); 10356 CodeGen(CGF); 10357 } 10358 }; 10359 10360 // We don't have to do anything to close the region if the if clause evaluates 10361 // to false. 10362 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10363 10364 if (IfCond) { 10365 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10366 } else { 10367 RegionCodeGenTy RCG(BeginThenGen); 10368 RCG(CGF); 10369 } 10370 10371 // If we don't require privatization of device pointers, we emit the body in 10372 // between the runtime calls. This avoids duplicating the body code. 10373 if (Info.CaptureDeviceAddrMap.empty()) { 10374 CodeGen.setAction(NoPrivAction); 10375 CodeGen(CGF); 10376 } 10377 10378 if (IfCond) { 10379 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10380 } else { 10381 RegionCodeGenTy RCG(EndThenGen); 10382 RCG(CGF); 10383 } 10384 } 10385 10386 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10387 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10388 const Expr *Device) { 10389 if (!CGF.HaveInsertPoint()) 10390 return; 10391 10392 assert((isa<OMPTargetEnterDataDirective>(D) || 10393 isa<OMPTargetExitDataDirective>(D) || 10394 isa<OMPTargetUpdateDirective>(D)) && 10395 "Expecting either target enter, exit data, or update directives."); 10396 10397 CodeGenFunction::OMPTargetDataInfo InputInfo; 10398 llvm::Value *MapTypesArray = nullptr; 10399 // Generate the code for the opening of the data environment. 10400 auto &&ThenGen = [this, &D, Device, &InputInfo, 10401 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10402 // Emit device ID if any. 10403 llvm::Value *DeviceID = nullptr; 10404 if (Device) { 10405 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10406 CGF.Int64Ty, /*isSigned=*/true); 10407 } else { 10408 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10409 } 10410 10411 // Emit the number of elements in the offloading arrays. 10412 llvm::Constant *PointerNum = 10413 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10414 10415 llvm::Value *OffloadingArgs[] = {DeviceID, 10416 PointerNum, 10417 InputInfo.BasePointersArray.getPointer(), 10418 InputInfo.PointersArray.getPointer(), 10419 InputInfo.SizesArray.getPointer(), 10420 MapTypesArray, 10421 InputInfo.MappersArray.getPointer()}; 10422 10423 // Select the right runtime function call for each standalone 10424 // directive. 10425 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10426 RuntimeFunction RTLFn; 10427 switch (D.getDirectiveKind()) { 10428 case OMPD_target_enter_data: 10429 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10430 : OMPRTL___tgt_target_data_begin_mapper; 10431 break; 10432 case OMPD_target_exit_data: 10433 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10434 : OMPRTL___tgt_target_data_end_mapper; 10435 break; 10436 case OMPD_target_update: 10437 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10438 : OMPRTL___tgt_target_data_update_mapper; 10439 break; 10440 case OMPD_parallel: 10441 case OMPD_for: 10442 case OMPD_parallel_for: 10443 case OMPD_parallel_master: 10444 case OMPD_parallel_sections: 10445 case OMPD_for_simd: 10446 case OMPD_parallel_for_simd: 10447 case OMPD_cancel: 10448 case OMPD_cancellation_point: 10449 case OMPD_ordered: 10450 case OMPD_threadprivate: 10451 case OMPD_allocate: 10452 case OMPD_task: 10453 case OMPD_simd: 10454 case OMPD_sections: 10455 case OMPD_section: 10456 case OMPD_single: 10457 case OMPD_master: 10458 case OMPD_critical: 10459 case OMPD_taskyield: 10460 case OMPD_barrier: 10461 case OMPD_taskwait: 10462 case OMPD_taskgroup: 10463 case OMPD_atomic: 10464 case OMPD_flush: 10465 case OMPD_depobj: 10466 case OMPD_scan: 10467 case OMPD_teams: 10468 case OMPD_target_data: 10469 case OMPD_distribute: 10470 case OMPD_distribute_simd: 10471 case OMPD_distribute_parallel_for: 10472 case OMPD_distribute_parallel_for_simd: 10473 case OMPD_teams_distribute: 10474 case OMPD_teams_distribute_simd: 10475 case OMPD_teams_distribute_parallel_for: 10476 case OMPD_teams_distribute_parallel_for_simd: 10477 case OMPD_declare_simd: 10478 case OMPD_declare_variant: 10479 case OMPD_begin_declare_variant: 10480 case OMPD_end_declare_variant: 10481 case OMPD_declare_target: 10482 case OMPD_end_declare_target: 10483 case OMPD_declare_reduction: 10484 case OMPD_declare_mapper: 10485 case OMPD_taskloop: 10486 case OMPD_taskloop_simd: 10487 case OMPD_master_taskloop: 10488 case OMPD_master_taskloop_simd: 10489 case OMPD_parallel_master_taskloop: 10490 case OMPD_parallel_master_taskloop_simd: 10491 case OMPD_target: 10492 case OMPD_target_simd: 10493 case OMPD_target_teams_distribute: 10494 case OMPD_target_teams_distribute_simd: 10495 case OMPD_target_teams_distribute_parallel_for: 10496 case OMPD_target_teams_distribute_parallel_for_simd: 10497 case OMPD_target_teams: 10498 case OMPD_target_parallel: 10499 case OMPD_target_parallel_for: 10500 case OMPD_target_parallel_for_simd: 10501 case OMPD_requires: 10502 case OMPD_unknown: 10503 default: 10504 llvm_unreachable("Unexpected standalone target data directive."); 10505 break; 10506 } 10507 CGF.EmitRuntimeCall( 10508 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10509 OffloadingArgs); 10510 }; 10511 10512 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10513 CodeGenFunction &CGF, PrePostActionTy &) { 10514 // Fill up the arrays with all the mapped variables. 10515 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10516 10517 // Get map clause information. 10518 MappableExprsHandler MEHandler(D, CGF); 10519 MEHandler.generateAllInfo(CombinedInfo); 10520 10521 TargetDataInfo Info; 10522 // Fill up the arrays and create the arguments. 10523 emitOffloadingArrays(CGF, CombinedInfo, Info); 10524 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10525 D.hasClausesOfKind<OMPNowaitClause>(); 10526 emitOffloadingArraysArgument( 10527 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10528 Info.MapTypesArray, Info.MappersArray, Info, {/*ForEndTask=*/false}); 10529 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10530 InputInfo.BasePointersArray = 10531 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10532 InputInfo.PointersArray = 10533 Address(Info.PointersArray, CGM.getPointerAlign()); 10534 InputInfo.SizesArray = 10535 Address(Info.SizesArray, CGM.getPointerAlign()); 10536 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10537 MapTypesArray = Info.MapTypesArray; 10538 if (RequiresOuterTask) 10539 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10540 else 10541 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10542 }; 10543 10544 if (IfCond) { 10545 emitIfClause(CGF, IfCond, TargetThenGen, 10546 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10547 } else { 10548 RegionCodeGenTy ThenRCG(TargetThenGen); 10549 ThenRCG(CGF); 10550 } 10551 } 10552 10553 namespace { 10554 /// Kind of parameter in a function with 'declare simd' directive. 10555 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10556 /// Attribute set of the parameter. 10557 struct ParamAttrTy { 10558 ParamKindTy Kind = Vector; 10559 llvm::APSInt StrideOrArg; 10560 llvm::APSInt Alignment; 10561 }; 10562 } // namespace 10563 10564 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10565 ArrayRef<ParamAttrTy> ParamAttrs) { 10566 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10567 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10568 // of that clause. The VLEN value must be power of 2. 10569 // In other case the notion of the function`s "characteristic data type" (CDT) 10570 // is used to compute the vector length. 10571 // CDT is defined in the following order: 10572 // a) For non-void function, the CDT is the return type. 10573 // b) If the function has any non-uniform, non-linear parameters, then the 10574 // CDT is the type of the first such parameter. 10575 // c) If the CDT determined by a) or b) above is struct, union, or class 10576 // type which is pass-by-value (except for the type that maps to the 10577 // built-in complex data type), the characteristic data type is int. 10578 // d) If none of the above three cases is applicable, the CDT is int. 10579 // The VLEN is then determined based on the CDT and the size of vector 10580 // register of that ISA for which current vector version is generated. The 10581 // VLEN is computed using the formula below: 10582 // VLEN = sizeof(vector_register) / sizeof(CDT), 10583 // where vector register size specified in section 3.2.1 Registers and the 10584 // Stack Frame of original AMD64 ABI document. 10585 QualType RetType = FD->getReturnType(); 10586 if (RetType.isNull()) 10587 return 0; 10588 ASTContext &C = FD->getASTContext(); 10589 QualType CDT; 10590 if (!RetType.isNull() && !RetType->isVoidType()) { 10591 CDT = RetType; 10592 } else { 10593 unsigned Offset = 0; 10594 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10595 if (ParamAttrs[Offset].Kind == Vector) 10596 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10597 ++Offset; 10598 } 10599 if (CDT.isNull()) { 10600 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10601 if (ParamAttrs[I + Offset].Kind == Vector) { 10602 CDT = FD->getParamDecl(I)->getType(); 10603 break; 10604 } 10605 } 10606 } 10607 } 10608 if (CDT.isNull()) 10609 CDT = C.IntTy; 10610 CDT = CDT->getCanonicalTypeUnqualified(); 10611 if (CDT->isRecordType() || CDT->isUnionType()) 10612 CDT = C.IntTy; 10613 return C.getTypeSize(CDT); 10614 } 10615 10616 static void 10617 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10618 const llvm::APSInt &VLENVal, 10619 ArrayRef<ParamAttrTy> ParamAttrs, 10620 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10621 struct ISADataTy { 10622 char ISA; 10623 unsigned VecRegSize; 10624 }; 10625 ISADataTy ISAData[] = { 10626 { 10627 'b', 128 10628 }, // SSE 10629 { 10630 'c', 256 10631 }, // AVX 10632 { 10633 'd', 256 10634 }, // AVX2 10635 { 10636 'e', 512 10637 }, // AVX512 10638 }; 10639 llvm::SmallVector<char, 2> Masked; 10640 switch (State) { 10641 case OMPDeclareSimdDeclAttr::BS_Undefined: 10642 Masked.push_back('N'); 10643 Masked.push_back('M'); 10644 break; 10645 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10646 Masked.push_back('N'); 10647 break; 10648 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10649 Masked.push_back('M'); 10650 break; 10651 } 10652 for (char Mask : Masked) { 10653 for (const ISADataTy &Data : ISAData) { 10654 SmallString<256> Buffer; 10655 llvm::raw_svector_ostream Out(Buffer); 10656 Out << "_ZGV" << Data.ISA << Mask; 10657 if (!VLENVal) { 10658 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10659 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10660 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10661 } else { 10662 Out << VLENVal; 10663 } 10664 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10665 switch (ParamAttr.Kind){ 10666 case LinearWithVarStride: 10667 Out << 's' << ParamAttr.StrideOrArg; 10668 break; 10669 case Linear: 10670 Out << 'l'; 10671 if (ParamAttr.StrideOrArg != 1) 10672 Out << ParamAttr.StrideOrArg; 10673 break; 10674 case Uniform: 10675 Out << 'u'; 10676 break; 10677 case Vector: 10678 Out << 'v'; 10679 break; 10680 } 10681 if (!!ParamAttr.Alignment) 10682 Out << 'a' << ParamAttr.Alignment; 10683 } 10684 Out << '_' << Fn->getName(); 10685 Fn->addFnAttr(Out.str()); 10686 } 10687 } 10688 } 10689 10690 // This are the Functions that are needed to mangle the name of the 10691 // vector functions generated by the compiler, according to the rules 10692 // defined in the "Vector Function ABI specifications for AArch64", 10693 // available at 10694 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10695 10696 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10697 /// 10698 /// TODO: Need to implement the behavior for reference marked with a 10699 /// var or no linear modifiers (1.b in the section). For this, we 10700 /// need to extend ParamKindTy to support the linear modifiers. 10701 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10702 QT = QT.getCanonicalType(); 10703 10704 if (QT->isVoidType()) 10705 return false; 10706 10707 if (Kind == ParamKindTy::Uniform) 10708 return false; 10709 10710 if (Kind == ParamKindTy::Linear) 10711 return false; 10712 10713 // TODO: Handle linear references with modifiers 10714 10715 if (Kind == ParamKindTy::LinearWithVarStride) 10716 return false; 10717 10718 return true; 10719 } 10720 10721 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10722 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10723 QT = QT.getCanonicalType(); 10724 unsigned Size = C.getTypeSize(QT); 10725 10726 // Only scalars and complex within 16 bytes wide set PVB to true. 10727 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10728 return false; 10729 10730 if (QT->isFloatingType()) 10731 return true; 10732 10733 if (QT->isIntegerType()) 10734 return true; 10735 10736 if (QT->isPointerType()) 10737 return true; 10738 10739 // TODO: Add support for complex types (section 3.1.2, item 2). 10740 10741 return false; 10742 } 10743 10744 /// Computes the lane size (LS) of a return type or of an input parameter, 10745 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10746 /// TODO: Add support for references, section 3.2.1, item 1. 10747 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10748 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10749 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10750 if (getAArch64PBV(PTy, C)) 10751 return C.getTypeSize(PTy); 10752 } 10753 if (getAArch64PBV(QT, C)) 10754 return C.getTypeSize(QT); 10755 10756 return C.getTypeSize(C.getUIntPtrType()); 10757 } 10758 10759 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10760 // signature of the scalar function, as defined in 3.2.2 of the 10761 // AAVFABI. 10762 static std::tuple<unsigned, unsigned, bool> 10763 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10764 QualType RetType = FD->getReturnType().getCanonicalType(); 10765 10766 ASTContext &C = FD->getASTContext(); 10767 10768 bool OutputBecomesInput = false; 10769 10770 llvm::SmallVector<unsigned, 8> Sizes; 10771 if (!RetType->isVoidType()) { 10772 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10773 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10774 OutputBecomesInput = true; 10775 } 10776 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10777 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10778 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10779 } 10780 10781 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10782 // The LS of a function parameter / return value can only be a power 10783 // of 2, starting from 8 bits, up to 128. 10784 assert(std::all_of(Sizes.begin(), Sizes.end(), 10785 [](unsigned Size) { 10786 return Size == 8 || Size == 16 || Size == 32 || 10787 Size == 64 || Size == 128; 10788 }) && 10789 "Invalid size"); 10790 10791 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10792 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10793 OutputBecomesInput); 10794 } 10795 10796 /// Mangle the parameter part of the vector function name according to 10797 /// their OpenMP classification. The mangling function is defined in 10798 /// section 3.5 of the AAVFABI. 10799 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10800 SmallString<256> Buffer; 10801 llvm::raw_svector_ostream Out(Buffer); 10802 for (const auto &ParamAttr : ParamAttrs) { 10803 switch (ParamAttr.Kind) { 10804 case LinearWithVarStride: 10805 Out << "ls" << ParamAttr.StrideOrArg; 10806 break; 10807 case Linear: 10808 Out << 'l'; 10809 // Don't print the step value if it is not present or if it is 10810 // equal to 1. 10811 if (ParamAttr.StrideOrArg != 1) 10812 Out << ParamAttr.StrideOrArg; 10813 break; 10814 case Uniform: 10815 Out << 'u'; 10816 break; 10817 case Vector: 10818 Out << 'v'; 10819 break; 10820 } 10821 10822 if (!!ParamAttr.Alignment) 10823 Out << 'a' << ParamAttr.Alignment; 10824 } 10825 10826 return std::string(Out.str()); 10827 } 10828 10829 // Function used to add the attribute. The parameter `VLEN` is 10830 // templated to allow the use of "x" when targeting scalable functions 10831 // for SVE. 10832 template <typename T> 10833 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10834 char ISA, StringRef ParSeq, 10835 StringRef MangledName, bool OutputBecomesInput, 10836 llvm::Function *Fn) { 10837 SmallString<256> Buffer; 10838 llvm::raw_svector_ostream Out(Buffer); 10839 Out << Prefix << ISA << LMask << VLEN; 10840 if (OutputBecomesInput) 10841 Out << "v"; 10842 Out << ParSeq << "_" << MangledName; 10843 Fn->addFnAttr(Out.str()); 10844 } 10845 10846 // Helper function to generate the Advanced SIMD names depending on 10847 // the value of the NDS when simdlen is not present. 10848 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10849 StringRef Prefix, char ISA, 10850 StringRef ParSeq, StringRef MangledName, 10851 bool OutputBecomesInput, 10852 llvm::Function *Fn) { 10853 switch (NDS) { 10854 case 8: 10855 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10856 OutputBecomesInput, Fn); 10857 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10858 OutputBecomesInput, Fn); 10859 break; 10860 case 16: 10861 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10862 OutputBecomesInput, Fn); 10863 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10864 OutputBecomesInput, Fn); 10865 break; 10866 case 32: 10867 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10868 OutputBecomesInput, Fn); 10869 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10870 OutputBecomesInput, Fn); 10871 break; 10872 case 64: 10873 case 128: 10874 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10875 OutputBecomesInput, Fn); 10876 break; 10877 default: 10878 llvm_unreachable("Scalar type is too wide."); 10879 } 10880 } 10881 10882 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10883 static void emitAArch64DeclareSimdFunction( 10884 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10885 ArrayRef<ParamAttrTy> ParamAttrs, 10886 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10887 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10888 10889 // Get basic data for building the vector signature. 10890 const auto Data = getNDSWDS(FD, ParamAttrs); 10891 const unsigned NDS = std::get<0>(Data); 10892 const unsigned WDS = std::get<1>(Data); 10893 const bool OutputBecomesInput = std::get<2>(Data); 10894 10895 // Check the values provided via `simdlen` by the user. 10896 // 1. A `simdlen(1)` doesn't produce vector signatures, 10897 if (UserVLEN == 1) { 10898 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10899 DiagnosticsEngine::Warning, 10900 "The clause simdlen(1) has no effect when targeting aarch64."); 10901 CGM.getDiags().Report(SLoc, DiagID); 10902 return; 10903 } 10904 10905 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10906 // Advanced SIMD output. 10907 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10908 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10909 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10910 "power of 2 when targeting Advanced SIMD."); 10911 CGM.getDiags().Report(SLoc, DiagID); 10912 return; 10913 } 10914 10915 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10916 // limits. 10917 if (ISA == 's' && UserVLEN != 0) { 10918 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10919 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10920 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10921 "lanes in the architectural constraints " 10922 "for SVE (min is 128-bit, max is " 10923 "2048-bit, by steps of 128-bit)"); 10924 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10925 return; 10926 } 10927 } 10928 10929 // Sort out parameter sequence. 10930 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10931 StringRef Prefix = "_ZGV"; 10932 // Generate simdlen from user input (if any). 10933 if (UserVLEN) { 10934 if (ISA == 's') { 10935 // SVE generates only a masked function. 10936 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10937 OutputBecomesInput, Fn); 10938 } else { 10939 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10940 // Advanced SIMD generates one or two functions, depending on 10941 // the `[not]inbranch` clause. 10942 switch (State) { 10943 case OMPDeclareSimdDeclAttr::BS_Undefined: 10944 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10945 OutputBecomesInput, Fn); 10946 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10947 OutputBecomesInput, Fn); 10948 break; 10949 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10950 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10951 OutputBecomesInput, Fn); 10952 break; 10953 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10954 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10955 OutputBecomesInput, Fn); 10956 break; 10957 } 10958 } 10959 } else { 10960 // If no user simdlen is provided, follow the AAVFABI rules for 10961 // generating the vector length. 10962 if (ISA == 's') { 10963 // SVE, section 3.4.1, item 1. 10964 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10965 OutputBecomesInput, Fn); 10966 } else { 10967 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10968 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10969 // two vector names depending on the use of the clause 10970 // `[not]inbranch`. 10971 switch (State) { 10972 case OMPDeclareSimdDeclAttr::BS_Undefined: 10973 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10974 OutputBecomesInput, Fn); 10975 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10976 OutputBecomesInput, Fn); 10977 break; 10978 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10979 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10980 OutputBecomesInput, Fn); 10981 break; 10982 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10983 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10984 OutputBecomesInput, Fn); 10985 break; 10986 } 10987 } 10988 } 10989 } 10990 10991 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10992 llvm::Function *Fn) { 10993 ASTContext &C = CGM.getContext(); 10994 FD = FD->getMostRecentDecl(); 10995 // Map params to their positions in function decl. 10996 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10997 if (isa<CXXMethodDecl>(FD)) 10998 ParamPositions.try_emplace(FD, 0); 10999 unsigned ParamPos = ParamPositions.size(); 11000 for (const ParmVarDecl *P : FD->parameters()) { 11001 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11002 ++ParamPos; 11003 } 11004 while (FD) { 11005 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11006 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11007 // Mark uniform parameters. 11008 for (const Expr *E : Attr->uniforms()) { 11009 E = E->IgnoreParenImpCasts(); 11010 unsigned Pos; 11011 if (isa<CXXThisExpr>(E)) { 11012 Pos = ParamPositions[FD]; 11013 } else { 11014 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11015 ->getCanonicalDecl(); 11016 Pos = ParamPositions[PVD]; 11017 } 11018 ParamAttrs[Pos].Kind = Uniform; 11019 } 11020 // Get alignment info. 11021 auto NI = Attr->alignments_begin(); 11022 for (const Expr *E : Attr->aligneds()) { 11023 E = E->IgnoreParenImpCasts(); 11024 unsigned Pos; 11025 QualType ParmTy; 11026 if (isa<CXXThisExpr>(E)) { 11027 Pos = ParamPositions[FD]; 11028 ParmTy = E->getType(); 11029 } else { 11030 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11031 ->getCanonicalDecl(); 11032 Pos = ParamPositions[PVD]; 11033 ParmTy = PVD->getType(); 11034 } 11035 ParamAttrs[Pos].Alignment = 11036 (*NI) 11037 ? (*NI)->EvaluateKnownConstInt(C) 11038 : llvm::APSInt::getUnsigned( 11039 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11040 .getQuantity()); 11041 ++NI; 11042 } 11043 // Mark linear parameters. 11044 auto SI = Attr->steps_begin(); 11045 auto MI = Attr->modifiers_begin(); 11046 for (const Expr *E : Attr->linears()) { 11047 E = E->IgnoreParenImpCasts(); 11048 unsigned Pos; 11049 // Rescaling factor needed to compute the linear parameter 11050 // value in the mangled name. 11051 unsigned PtrRescalingFactor = 1; 11052 if (isa<CXXThisExpr>(E)) { 11053 Pos = ParamPositions[FD]; 11054 } else { 11055 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11056 ->getCanonicalDecl(); 11057 Pos = ParamPositions[PVD]; 11058 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11059 PtrRescalingFactor = CGM.getContext() 11060 .getTypeSizeInChars(P->getPointeeType()) 11061 .getQuantity(); 11062 } 11063 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11064 ParamAttr.Kind = Linear; 11065 // Assuming a stride of 1, for `linear` without modifiers. 11066 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11067 if (*SI) { 11068 Expr::EvalResult Result; 11069 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11070 if (const auto *DRE = 11071 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11072 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11073 ParamAttr.Kind = LinearWithVarStride; 11074 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11075 ParamPositions[StridePVD->getCanonicalDecl()]); 11076 } 11077 } 11078 } else { 11079 ParamAttr.StrideOrArg = Result.Val.getInt(); 11080 } 11081 } 11082 // If we are using a linear clause on a pointer, we need to 11083 // rescale the value of linear_step with the byte size of the 11084 // pointee type. 11085 if (Linear == ParamAttr.Kind) 11086 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11087 ++SI; 11088 ++MI; 11089 } 11090 llvm::APSInt VLENVal; 11091 SourceLocation ExprLoc; 11092 const Expr *VLENExpr = Attr->getSimdlen(); 11093 if (VLENExpr) { 11094 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11095 ExprLoc = VLENExpr->getExprLoc(); 11096 } 11097 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11098 if (CGM.getTriple().isX86()) { 11099 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11100 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11101 unsigned VLEN = VLENVal.getExtValue(); 11102 StringRef MangledName = Fn->getName(); 11103 if (CGM.getTarget().hasFeature("sve")) 11104 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11105 MangledName, 's', 128, Fn, ExprLoc); 11106 if (CGM.getTarget().hasFeature("neon")) 11107 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11108 MangledName, 'n', 128, Fn, ExprLoc); 11109 } 11110 } 11111 FD = FD->getPreviousDecl(); 11112 } 11113 } 11114 11115 namespace { 11116 /// Cleanup action for doacross support. 11117 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11118 public: 11119 static const int DoacrossFinArgs = 2; 11120 11121 private: 11122 llvm::FunctionCallee RTLFn; 11123 llvm::Value *Args[DoacrossFinArgs]; 11124 11125 public: 11126 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11127 ArrayRef<llvm::Value *> CallArgs) 11128 : RTLFn(RTLFn) { 11129 assert(CallArgs.size() == DoacrossFinArgs); 11130 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11131 } 11132 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11133 if (!CGF.HaveInsertPoint()) 11134 return; 11135 CGF.EmitRuntimeCall(RTLFn, Args); 11136 } 11137 }; 11138 } // namespace 11139 11140 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11141 const OMPLoopDirective &D, 11142 ArrayRef<Expr *> NumIterations) { 11143 if (!CGF.HaveInsertPoint()) 11144 return; 11145 11146 ASTContext &C = CGM.getContext(); 11147 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11148 RecordDecl *RD; 11149 if (KmpDimTy.isNull()) { 11150 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11151 // kmp_int64 lo; // lower 11152 // kmp_int64 up; // upper 11153 // kmp_int64 st; // stride 11154 // }; 11155 RD = C.buildImplicitRecord("kmp_dim"); 11156 RD->startDefinition(); 11157 addFieldToRecordDecl(C, RD, Int64Ty); 11158 addFieldToRecordDecl(C, RD, Int64Ty); 11159 addFieldToRecordDecl(C, RD, Int64Ty); 11160 RD->completeDefinition(); 11161 KmpDimTy = C.getRecordType(RD); 11162 } else { 11163 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11164 } 11165 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11166 QualType ArrayTy = 11167 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11168 11169 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11170 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11171 enum { LowerFD = 0, UpperFD, StrideFD }; 11172 // Fill dims with data. 11173 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11174 LValue DimsLVal = CGF.MakeAddrLValue( 11175 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11176 // dims.upper = num_iterations; 11177 LValue UpperLVal = CGF.EmitLValueForField( 11178 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11179 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11180 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11181 Int64Ty, NumIterations[I]->getExprLoc()); 11182 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11183 // dims.stride = 1; 11184 LValue StrideLVal = CGF.EmitLValueForField( 11185 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11186 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11187 StrideLVal); 11188 } 11189 11190 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11191 // kmp_int32 num_dims, struct kmp_dim * dims); 11192 llvm::Value *Args[] = { 11193 emitUpdateLocation(CGF, D.getBeginLoc()), 11194 getThreadID(CGF, D.getBeginLoc()), 11195 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11196 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11197 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11198 CGM.VoidPtrTy)}; 11199 11200 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11201 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11202 CGF.EmitRuntimeCall(RTLFn, Args); 11203 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11204 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11205 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11206 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11207 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11208 llvm::makeArrayRef(FiniArgs)); 11209 } 11210 11211 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11212 const OMPDependClause *C) { 11213 QualType Int64Ty = 11214 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11215 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11216 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11217 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11218 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11219 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11220 const Expr *CounterVal = C->getLoopData(I); 11221 assert(CounterVal); 11222 llvm::Value *CntVal = CGF.EmitScalarConversion( 11223 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11224 CounterVal->getExprLoc()); 11225 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11226 /*Volatile=*/false, Int64Ty); 11227 } 11228 llvm::Value *Args[] = { 11229 emitUpdateLocation(CGF, C->getBeginLoc()), 11230 getThreadID(CGF, C->getBeginLoc()), 11231 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11232 llvm::FunctionCallee RTLFn; 11233 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11234 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11235 OMPRTL___kmpc_doacross_post); 11236 } else { 11237 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11238 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11239 OMPRTL___kmpc_doacross_wait); 11240 } 11241 CGF.EmitRuntimeCall(RTLFn, Args); 11242 } 11243 11244 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11245 llvm::FunctionCallee Callee, 11246 ArrayRef<llvm::Value *> Args) const { 11247 assert(Loc.isValid() && "Outlined function call location must be valid."); 11248 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11249 11250 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11251 if (Fn->doesNotThrow()) { 11252 CGF.EmitNounwindRuntimeCall(Fn, Args); 11253 return; 11254 } 11255 } 11256 CGF.EmitRuntimeCall(Callee, Args); 11257 } 11258 11259 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11260 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11261 ArrayRef<llvm::Value *> Args) const { 11262 emitCall(CGF, Loc, OutlinedFn, Args); 11263 } 11264 11265 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11266 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11267 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11268 HasEmittedDeclareTargetRegion = true; 11269 } 11270 11271 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11272 const VarDecl *NativeParam, 11273 const VarDecl *TargetParam) const { 11274 return CGF.GetAddrOfLocalVar(NativeParam); 11275 } 11276 11277 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11278 const VarDecl *VD) { 11279 if (!VD) 11280 return Address::invalid(); 11281 Address UntiedAddr = Address::invalid(); 11282 Address UntiedRealAddr = Address::invalid(); 11283 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11284 if (It != FunctionToUntiedTaskStackMap.end()) { 11285 const UntiedLocalVarsAddressesMap &UntiedData = 11286 UntiedLocalVarsStack[It->second]; 11287 auto I = UntiedData.find(VD); 11288 if (I != UntiedData.end()) { 11289 UntiedAddr = I->second.first; 11290 UntiedRealAddr = I->second.second; 11291 } 11292 } 11293 const VarDecl *CVD = VD->getCanonicalDecl(); 11294 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11295 // Use the default allocation. 11296 if (!isAllocatableDecl(VD)) 11297 return UntiedAddr; 11298 llvm::Value *Size; 11299 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11300 if (CVD->getType()->isVariablyModifiedType()) { 11301 Size = CGF.getTypeSize(CVD->getType()); 11302 // Align the size: ((size + align - 1) / align) * align 11303 Size = CGF.Builder.CreateNUWAdd( 11304 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11305 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11306 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11307 } else { 11308 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11309 Size = CGM.getSize(Sz.alignTo(Align)); 11310 } 11311 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11312 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11313 assert(AA->getAllocator() && 11314 "Expected allocator expression for non-default allocator."); 11315 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11316 // According to the standard, the original allocator type is a enum 11317 // (integer). Convert to pointer type, if required. 11318 Allocator = CGF.EmitScalarConversion( 11319 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11320 AA->getAllocator()->getExprLoc()); 11321 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11322 11323 llvm::Value *Addr = 11324 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11325 CGM.getModule(), OMPRTL___kmpc_alloc), 11326 Args, getName({CVD->getName(), ".void.addr"})); 11327 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11328 CGM.getModule(), OMPRTL___kmpc_free); 11329 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11330 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11331 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11332 if (UntiedAddr.isValid()) 11333 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11334 11335 // Cleanup action for allocate support. 11336 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11337 llvm::FunctionCallee RTLFn; 11338 unsigned LocEncoding; 11339 Address Addr; 11340 const Expr *Allocator; 11341 11342 public: 11343 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11344 Address Addr, const Expr *Allocator) 11345 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11346 Allocator(Allocator) {} 11347 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11348 if (!CGF.HaveInsertPoint()) 11349 return; 11350 llvm::Value *Args[3]; 11351 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11352 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11353 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11354 Addr.getPointer(), CGF.VoidPtrTy); 11355 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11356 // According to the standard, the original allocator type is a enum 11357 // (integer). Convert to pointer type, if required. 11358 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11359 CGF.getContext().VoidPtrTy, 11360 Allocator->getExprLoc()); 11361 Args[2] = AllocVal; 11362 11363 CGF.EmitRuntimeCall(RTLFn, Args); 11364 } 11365 }; 11366 Address VDAddr = 11367 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11368 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11369 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11370 VDAddr, AA->getAllocator()); 11371 if (UntiedRealAddr.isValid()) 11372 if (auto *Region = 11373 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11374 Region->emitUntiedSwitch(CGF); 11375 return VDAddr; 11376 } 11377 return UntiedAddr; 11378 } 11379 11380 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11381 const VarDecl *VD) const { 11382 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11383 if (It == FunctionToUntiedTaskStackMap.end()) 11384 return false; 11385 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11386 } 11387 11388 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11389 CodeGenModule &CGM, const OMPLoopDirective &S) 11390 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11391 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11392 if (!NeedToPush) 11393 return; 11394 NontemporalDeclsSet &DS = 11395 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11396 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11397 for (const Stmt *Ref : C->private_refs()) { 11398 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11399 const ValueDecl *VD; 11400 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11401 VD = DRE->getDecl(); 11402 } else { 11403 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11404 assert((ME->isImplicitCXXThis() || 11405 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11406 "Expected member of current class."); 11407 VD = ME->getMemberDecl(); 11408 } 11409 DS.insert(VD); 11410 } 11411 } 11412 } 11413 11414 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11415 if (!NeedToPush) 11416 return; 11417 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11418 } 11419 11420 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11421 CodeGenFunction &CGF, 11422 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 11423 std::pair<Address, Address>> &LocalVars) 11424 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11425 if (!NeedToPush) 11426 return; 11427 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11428 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11429 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11430 } 11431 11432 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11433 if (!NeedToPush) 11434 return; 11435 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11436 } 11437 11438 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11439 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11440 11441 return llvm::any_of( 11442 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11443 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11444 } 11445 11446 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11447 const OMPExecutableDirective &S, 11448 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11449 const { 11450 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11451 // Vars in target/task regions must be excluded completely. 11452 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11453 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11454 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11455 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11456 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11457 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11458 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11459 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11460 } 11461 } 11462 // Exclude vars in private clauses. 11463 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11464 for (const Expr *Ref : C->varlists()) { 11465 if (!Ref->getType()->isScalarType()) 11466 continue; 11467 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11468 if (!DRE) 11469 continue; 11470 NeedToCheckForLPCs.insert(DRE->getDecl()); 11471 } 11472 } 11473 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11474 for (const Expr *Ref : C->varlists()) { 11475 if (!Ref->getType()->isScalarType()) 11476 continue; 11477 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11478 if (!DRE) 11479 continue; 11480 NeedToCheckForLPCs.insert(DRE->getDecl()); 11481 } 11482 } 11483 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11484 for (const Expr *Ref : C->varlists()) { 11485 if (!Ref->getType()->isScalarType()) 11486 continue; 11487 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11488 if (!DRE) 11489 continue; 11490 NeedToCheckForLPCs.insert(DRE->getDecl()); 11491 } 11492 } 11493 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11494 for (const Expr *Ref : C->varlists()) { 11495 if (!Ref->getType()->isScalarType()) 11496 continue; 11497 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11498 if (!DRE) 11499 continue; 11500 NeedToCheckForLPCs.insert(DRE->getDecl()); 11501 } 11502 } 11503 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11504 for (const Expr *Ref : C->varlists()) { 11505 if (!Ref->getType()->isScalarType()) 11506 continue; 11507 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11508 if (!DRE) 11509 continue; 11510 NeedToCheckForLPCs.insert(DRE->getDecl()); 11511 } 11512 } 11513 for (const Decl *VD : NeedToCheckForLPCs) { 11514 for (const LastprivateConditionalData &Data : 11515 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11516 if (Data.DeclToUniqueName.count(VD) > 0) { 11517 if (!Data.Disabled) 11518 NeedToAddForLPCsAsDisabled.insert(VD); 11519 break; 11520 } 11521 } 11522 } 11523 } 11524 11525 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11526 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11527 : CGM(CGF.CGM), 11528 Action((CGM.getLangOpts().OpenMP >= 50 && 11529 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11530 [](const OMPLastprivateClause *C) { 11531 return C->getKind() == 11532 OMPC_LASTPRIVATE_conditional; 11533 })) 11534 ? ActionToDo::PushAsLastprivateConditional 11535 : ActionToDo::DoNotPush) { 11536 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11537 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11538 return; 11539 assert(Action == ActionToDo::PushAsLastprivateConditional && 11540 "Expected a push action."); 11541 LastprivateConditionalData &Data = 11542 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11543 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11544 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11545 continue; 11546 11547 for (const Expr *Ref : C->varlists()) { 11548 Data.DeclToUniqueName.insert(std::make_pair( 11549 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11550 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11551 } 11552 } 11553 Data.IVLVal = IVLVal; 11554 Data.Fn = CGF.CurFn; 11555 } 11556 11557 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11558 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11559 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11560 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11561 if (CGM.getLangOpts().OpenMP < 50) 11562 return; 11563 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11564 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11565 if (!NeedToAddForLPCsAsDisabled.empty()) { 11566 Action = ActionToDo::DisableLastprivateConditional; 11567 LastprivateConditionalData &Data = 11568 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11569 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11570 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11571 Data.Fn = CGF.CurFn; 11572 Data.Disabled = true; 11573 } 11574 } 11575 11576 CGOpenMPRuntime::LastprivateConditionalRAII 11577 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11578 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11579 return LastprivateConditionalRAII(CGF, S); 11580 } 11581 11582 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11583 if (CGM.getLangOpts().OpenMP < 50) 11584 return; 11585 if (Action == ActionToDo::DisableLastprivateConditional) { 11586 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11587 "Expected list of disabled private vars."); 11588 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11589 } 11590 if (Action == ActionToDo::PushAsLastprivateConditional) { 11591 assert( 11592 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11593 "Expected list of lastprivate conditional vars."); 11594 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11595 } 11596 } 11597 11598 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11599 const VarDecl *VD) { 11600 ASTContext &C = CGM.getContext(); 11601 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11602 if (I == LastprivateConditionalToTypes.end()) 11603 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11604 QualType NewType; 11605 const FieldDecl *VDField; 11606 const FieldDecl *FiredField; 11607 LValue BaseLVal; 11608 auto VI = I->getSecond().find(VD); 11609 if (VI == I->getSecond().end()) { 11610 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11611 RD->startDefinition(); 11612 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11613 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11614 RD->completeDefinition(); 11615 NewType = C.getRecordType(RD); 11616 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11617 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11618 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11619 } else { 11620 NewType = std::get<0>(VI->getSecond()); 11621 VDField = std::get<1>(VI->getSecond()); 11622 FiredField = std::get<2>(VI->getSecond()); 11623 BaseLVal = std::get<3>(VI->getSecond()); 11624 } 11625 LValue FiredLVal = 11626 CGF.EmitLValueForField(BaseLVal, FiredField); 11627 CGF.EmitStoreOfScalar( 11628 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11629 FiredLVal); 11630 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11631 } 11632 11633 namespace { 11634 /// Checks if the lastprivate conditional variable is referenced in LHS. 11635 class LastprivateConditionalRefChecker final 11636 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11637 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11638 const Expr *FoundE = nullptr; 11639 const Decl *FoundD = nullptr; 11640 StringRef UniqueDeclName; 11641 LValue IVLVal; 11642 llvm::Function *FoundFn = nullptr; 11643 SourceLocation Loc; 11644 11645 public: 11646 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11647 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11648 llvm::reverse(LPM)) { 11649 auto It = D.DeclToUniqueName.find(E->getDecl()); 11650 if (It == D.DeclToUniqueName.end()) 11651 continue; 11652 if (D.Disabled) 11653 return false; 11654 FoundE = E; 11655 FoundD = E->getDecl()->getCanonicalDecl(); 11656 UniqueDeclName = It->second; 11657 IVLVal = D.IVLVal; 11658 FoundFn = D.Fn; 11659 break; 11660 } 11661 return FoundE == E; 11662 } 11663 bool VisitMemberExpr(const MemberExpr *E) { 11664 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11665 return false; 11666 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11667 llvm::reverse(LPM)) { 11668 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11669 if (It == D.DeclToUniqueName.end()) 11670 continue; 11671 if (D.Disabled) 11672 return false; 11673 FoundE = E; 11674 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11675 UniqueDeclName = It->second; 11676 IVLVal = D.IVLVal; 11677 FoundFn = D.Fn; 11678 break; 11679 } 11680 return FoundE == E; 11681 } 11682 bool VisitStmt(const Stmt *S) { 11683 for (const Stmt *Child : S->children()) { 11684 if (!Child) 11685 continue; 11686 if (const auto *E = dyn_cast<Expr>(Child)) 11687 if (!E->isGLValue()) 11688 continue; 11689 if (Visit(Child)) 11690 return true; 11691 } 11692 return false; 11693 } 11694 explicit LastprivateConditionalRefChecker( 11695 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11696 : LPM(LPM) {} 11697 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11698 getFoundData() const { 11699 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11700 } 11701 }; 11702 } // namespace 11703 11704 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11705 LValue IVLVal, 11706 StringRef UniqueDeclName, 11707 LValue LVal, 11708 SourceLocation Loc) { 11709 // Last updated loop counter for the lastprivate conditional var. 11710 // int<xx> last_iv = 0; 11711 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11712 llvm::Constant *LastIV = 11713 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11714 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11715 IVLVal.getAlignment().getAsAlign()); 11716 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11717 11718 // Last value of the lastprivate conditional. 11719 // decltype(priv_a) last_a; 11720 llvm::Constant *Last = getOrCreateInternalVariable( 11721 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11722 cast<llvm::GlobalVariable>(Last)->setAlignment( 11723 LVal.getAlignment().getAsAlign()); 11724 LValue LastLVal = 11725 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11726 11727 // Global loop counter. Required to handle inner parallel-for regions. 11728 // iv 11729 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11730 11731 // #pragma omp critical(a) 11732 // if (last_iv <= iv) { 11733 // last_iv = iv; 11734 // last_a = priv_a; 11735 // } 11736 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11737 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11738 Action.Enter(CGF); 11739 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11740 // (last_iv <= iv) ? Check if the variable is updated and store new 11741 // value in global var. 11742 llvm::Value *CmpRes; 11743 if (IVLVal.getType()->isSignedIntegerType()) { 11744 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11745 } else { 11746 assert(IVLVal.getType()->isUnsignedIntegerType() && 11747 "Loop iteration variable must be integer."); 11748 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11749 } 11750 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11751 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11752 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11753 // { 11754 CGF.EmitBlock(ThenBB); 11755 11756 // last_iv = iv; 11757 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11758 11759 // last_a = priv_a; 11760 switch (CGF.getEvaluationKind(LVal.getType())) { 11761 case TEK_Scalar: { 11762 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11763 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11764 break; 11765 } 11766 case TEK_Complex: { 11767 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11768 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11769 break; 11770 } 11771 case TEK_Aggregate: 11772 llvm_unreachable( 11773 "Aggregates are not supported in lastprivate conditional."); 11774 } 11775 // } 11776 CGF.EmitBranch(ExitBB); 11777 // There is no need to emit line number for unconditional branch. 11778 (void)ApplyDebugLocation::CreateEmpty(CGF); 11779 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11780 }; 11781 11782 if (CGM.getLangOpts().OpenMPSimd) { 11783 // Do not emit as a critical region as no parallel region could be emitted. 11784 RegionCodeGenTy ThenRCG(CodeGen); 11785 ThenRCG(CGF); 11786 } else { 11787 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11788 } 11789 } 11790 11791 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11792 const Expr *LHS) { 11793 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11794 return; 11795 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11796 if (!Checker.Visit(LHS)) 11797 return; 11798 const Expr *FoundE; 11799 const Decl *FoundD; 11800 StringRef UniqueDeclName; 11801 LValue IVLVal; 11802 llvm::Function *FoundFn; 11803 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11804 Checker.getFoundData(); 11805 if (FoundFn != CGF.CurFn) { 11806 // Special codegen for inner parallel regions. 11807 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11808 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11809 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11810 "Lastprivate conditional is not found in outer region."); 11811 QualType StructTy = std::get<0>(It->getSecond()); 11812 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11813 LValue PrivLVal = CGF.EmitLValue(FoundE); 11814 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11815 PrivLVal.getAddress(CGF), 11816 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11817 LValue BaseLVal = 11818 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11819 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11820 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11821 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11822 FiredLVal, llvm::AtomicOrdering::Unordered, 11823 /*IsVolatile=*/true, /*isInit=*/false); 11824 return; 11825 } 11826 11827 // Private address of the lastprivate conditional in the current context. 11828 // priv_a 11829 LValue LVal = CGF.EmitLValue(FoundE); 11830 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11831 FoundE->getExprLoc()); 11832 } 11833 11834 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11835 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11836 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11837 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11838 return; 11839 auto Range = llvm::reverse(LastprivateConditionalStack); 11840 auto It = llvm::find_if( 11841 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11842 if (It == Range.end() || It->Fn != CGF.CurFn) 11843 return; 11844 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11845 assert(LPCI != LastprivateConditionalToTypes.end() && 11846 "Lastprivates must be registered already."); 11847 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11848 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11849 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11850 for (const auto &Pair : It->DeclToUniqueName) { 11851 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11852 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11853 continue; 11854 auto I = LPCI->getSecond().find(Pair.first); 11855 assert(I != LPCI->getSecond().end() && 11856 "Lastprivate must be rehistered already."); 11857 // bool Cmp = priv_a.Fired != 0; 11858 LValue BaseLVal = std::get<3>(I->getSecond()); 11859 LValue FiredLVal = 11860 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11861 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11862 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11863 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11864 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11865 // if (Cmp) { 11866 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11867 CGF.EmitBlock(ThenBB); 11868 Address Addr = CGF.GetAddrOfLocalVar(VD); 11869 LValue LVal; 11870 if (VD->getType()->isReferenceType()) 11871 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11872 AlignmentSource::Decl); 11873 else 11874 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11875 AlignmentSource::Decl); 11876 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11877 D.getBeginLoc()); 11878 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11879 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11880 // } 11881 } 11882 } 11883 11884 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11885 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11886 SourceLocation Loc) { 11887 if (CGF.getLangOpts().OpenMP < 50) 11888 return; 11889 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11890 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11891 "Unknown lastprivate conditional variable."); 11892 StringRef UniqueName = It->second; 11893 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11894 // The variable was not updated in the region - exit. 11895 if (!GV) 11896 return; 11897 LValue LPLVal = CGF.MakeAddrLValue( 11898 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11899 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11900 CGF.EmitStoreOfScalar(Res, PrivLVal); 11901 } 11902 11903 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11904 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11905 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11906 llvm_unreachable("Not supported in SIMD-only mode"); 11907 } 11908 11909 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11910 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11911 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11912 llvm_unreachable("Not supported in SIMD-only mode"); 11913 } 11914 11915 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11916 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11917 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11918 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11919 bool Tied, unsigned &NumberOfParts) { 11920 llvm_unreachable("Not supported in SIMD-only mode"); 11921 } 11922 11923 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11924 SourceLocation Loc, 11925 llvm::Function *OutlinedFn, 11926 ArrayRef<llvm::Value *> CapturedVars, 11927 const Expr *IfCond) { 11928 llvm_unreachable("Not supported in SIMD-only mode"); 11929 } 11930 11931 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11932 CodeGenFunction &CGF, StringRef CriticalName, 11933 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11934 const Expr *Hint) { 11935 llvm_unreachable("Not supported in SIMD-only mode"); 11936 } 11937 11938 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11939 const RegionCodeGenTy &MasterOpGen, 11940 SourceLocation Loc) { 11941 llvm_unreachable("Not supported in SIMD-only mode"); 11942 } 11943 11944 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11945 SourceLocation Loc) { 11946 llvm_unreachable("Not supported in SIMD-only mode"); 11947 } 11948 11949 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11950 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11951 SourceLocation Loc) { 11952 llvm_unreachable("Not supported in SIMD-only mode"); 11953 } 11954 11955 void CGOpenMPSIMDRuntime::emitSingleRegion( 11956 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11957 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11958 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11959 ArrayRef<const Expr *> AssignmentOps) { 11960 llvm_unreachable("Not supported in SIMD-only mode"); 11961 } 11962 11963 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11964 const RegionCodeGenTy &OrderedOpGen, 11965 SourceLocation Loc, 11966 bool IsThreads) { 11967 llvm_unreachable("Not supported in SIMD-only mode"); 11968 } 11969 11970 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11971 SourceLocation Loc, 11972 OpenMPDirectiveKind Kind, 11973 bool EmitChecks, 11974 bool ForceSimpleCall) { 11975 llvm_unreachable("Not supported in SIMD-only mode"); 11976 } 11977 11978 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11979 CodeGenFunction &CGF, SourceLocation Loc, 11980 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11981 bool Ordered, const DispatchRTInput &DispatchValues) { 11982 llvm_unreachable("Not supported in SIMD-only mode"); 11983 } 11984 11985 void CGOpenMPSIMDRuntime::emitForStaticInit( 11986 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11987 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11988 llvm_unreachable("Not supported in SIMD-only mode"); 11989 } 11990 11991 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11992 CodeGenFunction &CGF, SourceLocation Loc, 11993 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11994 llvm_unreachable("Not supported in SIMD-only mode"); 11995 } 11996 11997 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11998 SourceLocation Loc, 11999 unsigned IVSize, 12000 bool IVSigned) { 12001 llvm_unreachable("Not supported in SIMD-only mode"); 12002 } 12003 12004 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12005 SourceLocation Loc, 12006 OpenMPDirectiveKind DKind) { 12007 llvm_unreachable("Not supported in SIMD-only mode"); 12008 } 12009 12010 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12011 SourceLocation Loc, 12012 unsigned IVSize, bool IVSigned, 12013 Address IL, Address LB, 12014 Address UB, Address ST) { 12015 llvm_unreachable("Not supported in SIMD-only mode"); 12016 } 12017 12018 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12019 llvm::Value *NumThreads, 12020 SourceLocation Loc) { 12021 llvm_unreachable("Not supported in SIMD-only mode"); 12022 } 12023 12024 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12025 ProcBindKind ProcBind, 12026 SourceLocation Loc) { 12027 llvm_unreachable("Not supported in SIMD-only mode"); 12028 } 12029 12030 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12031 const VarDecl *VD, 12032 Address VDAddr, 12033 SourceLocation Loc) { 12034 llvm_unreachable("Not supported in SIMD-only mode"); 12035 } 12036 12037 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12038 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12039 CodeGenFunction *CGF) { 12040 llvm_unreachable("Not supported in SIMD-only mode"); 12041 } 12042 12043 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12044 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12045 llvm_unreachable("Not supported in SIMD-only mode"); 12046 } 12047 12048 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12049 ArrayRef<const Expr *> Vars, 12050 SourceLocation Loc, 12051 llvm::AtomicOrdering AO) { 12052 llvm_unreachable("Not supported in SIMD-only mode"); 12053 } 12054 12055 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12056 const OMPExecutableDirective &D, 12057 llvm::Function *TaskFunction, 12058 QualType SharedsTy, Address Shareds, 12059 const Expr *IfCond, 12060 const OMPTaskDataTy &Data) { 12061 llvm_unreachable("Not supported in SIMD-only mode"); 12062 } 12063 12064 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12065 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12066 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12067 const Expr *IfCond, const OMPTaskDataTy &Data) { 12068 llvm_unreachable("Not supported in SIMD-only mode"); 12069 } 12070 12071 void CGOpenMPSIMDRuntime::emitReduction( 12072 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12073 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12074 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12075 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12076 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12077 ReductionOps, Options); 12078 } 12079 12080 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12081 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12082 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12083 llvm_unreachable("Not supported in SIMD-only mode"); 12084 } 12085 12086 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12087 SourceLocation Loc, 12088 bool IsWorksharingReduction) { 12089 llvm_unreachable("Not supported in SIMD-only mode"); 12090 } 12091 12092 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12093 SourceLocation Loc, 12094 ReductionCodeGen &RCG, 12095 unsigned N) { 12096 llvm_unreachable("Not supported in SIMD-only mode"); 12097 } 12098 12099 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12100 SourceLocation Loc, 12101 llvm::Value *ReductionsPtr, 12102 LValue SharedLVal) { 12103 llvm_unreachable("Not supported in SIMD-only mode"); 12104 } 12105 12106 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12107 SourceLocation Loc) { 12108 llvm_unreachable("Not supported in SIMD-only mode"); 12109 } 12110 12111 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12112 CodeGenFunction &CGF, SourceLocation Loc, 12113 OpenMPDirectiveKind CancelRegion) { 12114 llvm_unreachable("Not supported in SIMD-only mode"); 12115 } 12116 12117 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12118 SourceLocation Loc, const Expr *IfCond, 12119 OpenMPDirectiveKind CancelRegion) { 12120 llvm_unreachable("Not supported in SIMD-only mode"); 12121 } 12122 12123 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12124 const OMPExecutableDirective &D, StringRef ParentName, 12125 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12126 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12127 llvm_unreachable("Not supported in SIMD-only mode"); 12128 } 12129 12130 void CGOpenMPSIMDRuntime::emitTargetCall( 12131 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12132 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12133 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12134 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12135 const OMPLoopDirective &D)> 12136 SizeEmitter) { 12137 llvm_unreachable("Not supported in SIMD-only mode"); 12138 } 12139 12140 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12141 llvm_unreachable("Not supported in SIMD-only mode"); 12142 } 12143 12144 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12145 llvm_unreachable("Not supported in SIMD-only mode"); 12146 } 12147 12148 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12149 return false; 12150 } 12151 12152 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12153 const OMPExecutableDirective &D, 12154 SourceLocation Loc, 12155 llvm::Function *OutlinedFn, 12156 ArrayRef<llvm::Value *> CapturedVars) { 12157 llvm_unreachable("Not supported in SIMD-only mode"); 12158 } 12159 12160 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12161 const Expr *NumTeams, 12162 const Expr *ThreadLimit, 12163 SourceLocation Loc) { 12164 llvm_unreachable("Not supported in SIMD-only mode"); 12165 } 12166 12167 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12168 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12169 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12170 llvm_unreachable("Not supported in SIMD-only mode"); 12171 } 12172 12173 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12174 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12175 const Expr *Device) { 12176 llvm_unreachable("Not supported in SIMD-only mode"); 12177 } 12178 12179 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12180 const OMPLoopDirective &D, 12181 ArrayRef<Expr *> NumIterations) { 12182 llvm_unreachable("Not supported in SIMD-only mode"); 12183 } 12184 12185 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12186 const OMPDependClause *C) { 12187 llvm_unreachable("Not supported in SIMD-only mode"); 12188 } 12189 12190 const VarDecl * 12191 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12192 const VarDecl *NativeParam) const { 12193 llvm_unreachable("Not supported in SIMD-only mode"); 12194 } 12195 12196 Address 12197 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12198 const VarDecl *NativeParam, 12199 const VarDecl *TargetParam) const { 12200 llvm_unreachable("Not supported in SIMD-only mode"); 12201 } 12202