1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1530 } 1531 1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1533 return OMPBuilder.IdentPtr; 1534 } 1535 1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1537 if (!Kmpc_MicroTy) { 1538 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1539 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1540 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1541 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1542 } 1543 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1544 } 1545 1546 llvm::FunctionCallee 1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1548 assert((IVSize == 32 || IVSize == 64) && 1549 "IV size is not compatible with the omp runtime"); 1550 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1551 : "__kmpc_for_static_init_4u") 1552 : (IVSigned ? "__kmpc_for_static_init_8" 1553 : "__kmpc_for_static_init_8u"); 1554 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1555 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1556 llvm::Type *TypeParams[] = { 1557 getIdentTyPointerTy(), // loc 1558 CGM.Int32Ty, // tid 1559 CGM.Int32Ty, // schedtype 1560 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1561 PtrTy, // p_lower 1562 PtrTy, // p_upper 1563 PtrTy, // p_stride 1564 ITy, // incr 1565 ITy // chunk 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1579 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1582 CGM.Int32Ty, // tid 1583 CGM.Int32Ty, // schedtype 1584 ITy, // lower 1585 ITy, // upper 1586 ITy, // stride 1587 ITy // chunk 1588 }; 1589 auto *FnTy = 1590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1591 return CGM.CreateRuntimeFunction(FnTy, Name); 1592 } 1593 1594 llvm::FunctionCallee 1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1596 assert((IVSize == 32 || IVSize == 64) && 1597 "IV size is not compatible with the omp runtime"); 1598 StringRef Name = 1599 IVSize == 32 1600 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1601 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1602 llvm::Type *TypeParams[] = { 1603 getIdentTyPointerTy(), // loc 1604 CGM.Int32Ty, // tid 1605 }; 1606 auto *FnTy = 1607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1608 return CGM.CreateRuntimeFunction(FnTy, Name); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = 1616 IVSize == 32 1617 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1618 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1621 llvm::Type *TypeParams[] = { 1622 getIdentTyPointerTy(), // loc 1623 CGM.Int32Ty, // tid 1624 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1625 PtrTy, // p_lower 1626 PtrTy, // p_upper 1627 PtrTy // p_stride 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 /// Obtain information that uniquely identifies a target entry. This 1635 /// consists of the file and device IDs as well as line number associated with 1636 /// the relevant entry source location. 1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1638 unsigned &DeviceID, unsigned &FileID, 1639 unsigned &LineNum) { 1640 SourceManager &SM = C.getSourceManager(); 1641 1642 // The loc should be always valid and have a file ID (the user cannot use 1643 // #pragma directives in macros) 1644 1645 assert(Loc.isValid() && "Source location is expected to be always valid."); 1646 1647 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1648 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1649 1650 llvm::sys::fs::UniqueID ID; 1651 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1652 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1653 << PLoc.getFilename() << EC.message(); 1654 1655 DeviceID = ID.getDevice(); 1656 FileID = ID.getFile(); 1657 LineNum = PLoc.getLine(); 1658 } 1659 1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1661 if (CGM.getLangOpts().OpenMPSimd) 1662 return Address::invalid(); 1663 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1664 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1665 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1666 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1667 HasRequiresUnifiedSharedMemory))) { 1668 SmallString<64> PtrName; 1669 { 1670 llvm::raw_svector_ostream OS(PtrName); 1671 OS << CGM.getMangledName(GlobalDecl(VD)); 1672 if (!VD->isExternallyVisible()) { 1673 unsigned DeviceID, FileID, Line; 1674 getTargetEntryUniqueInfo(CGM.getContext(), 1675 VD->getCanonicalDecl()->getBeginLoc(), 1676 DeviceID, FileID, Line); 1677 OS << llvm::format("_%x", FileID); 1678 } 1679 OS << "_decl_tgt_ref_ptr"; 1680 } 1681 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1682 if (!Ptr) { 1683 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1684 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1685 PtrName); 1686 1687 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1688 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1689 1690 if (!CGM.getLangOpts().OpenMPIsDevice) 1691 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1692 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1693 } 1694 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1695 } 1696 return Address::invalid(); 1697 } 1698 1699 llvm::Constant * 1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1701 assert(!CGM.getLangOpts().OpenMPUseTLS || 1702 !CGM.getContext().getTargetInfo().isTLSSupported()); 1703 // Lookup the entry, lazily creating it if necessary. 1704 std::string Suffix = getName({"cache", ""}); 1705 return getOrCreateInternalVariable( 1706 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1707 } 1708 1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1710 const VarDecl *VD, 1711 Address VDAddr, 1712 SourceLocation Loc) { 1713 if (CGM.getLangOpts().OpenMPUseTLS && 1714 CGM.getContext().getTargetInfo().isTLSSupported()) 1715 return VDAddr; 1716 1717 llvm::Type *VarTy = VDAddr.getElementType(); 1718 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1719 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1720 CGM.Int8PtrTy), 1721 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1722 getOrCreateThreadPrivateCache(VD)}; 1723 return Address(CGF.EmitRuntimeCall( 1724 OMPBuilder.getOrCreateRuntimeFunction( 1725 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1726 Args), 1727 VDAddr.getAlignment()); 1728 } 1729 1730 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1731 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1732 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1733 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1734 // library. 1735 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1736 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1737 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1738 OMPLoc); 1739 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1740 // to register constructor/destructor for variable. 1741 llvm::Value *Args[] = { 1742 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1743 Ctor, CopyCtor, Dtor}; 1744 CGF.EmitRuntimeCall( 1745 OMPBuilder.getOrCreateRuntimeFunction( 1746 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1747 Args); 1748 } 1749 1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1751 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1752 bool PerformInit, CodeGenFunction *CGF) { 1753 if (CGM.getLangOpts().OpenMPUseTLS && 1754 CGM.getContext().getTargetInfo().isTLSSupported()) 1755 return nullptr; 1756 1757 VD = VD->getDefinition(CGM.getContext()); 1758 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1759 QualType ASTTy = VD->getType(); 1760 1761 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1762 const Expr *Init = VD->getAnyInitializer(); 1763 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1764 // Generate function that re-emits the declaration's initializer into the 1765 // threadprivate copy of the variable VD 1766 CodeGenFunction CtorCGF(CGM); 1767 FunctionArgList Args; 1768 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1769 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1770 ImplicitParamDecl::Other); 1771 Args.push_back(&Dst); 1772 1773 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1774 CGM.getContext().VoidPtrTy, Args); 1775 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1776 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1777 llvm::Function *Fn = 1778 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1779 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1780 Args, Loc, Loc); 1781 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1782 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1783 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1784 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1785 Arg = CtorCGF.Builder.CreateElementBitCast( 1786 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1787 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1788 /*IsInitializer=*/true); 1789 ArgVal = CtorCGF.EmitLoadOfScalar( 1790 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1791 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1792 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1793 CtorCGF.FinishFunction(); 1794 Ctor = Fn; 1795 } 1796 if (VD->getType().isDestructedType() != QualType::DK_none) { 1797 // Generate function that emits destructor call for the threadprivate copy 1798 // of the variable VD 1799 CodeGenFunction DtorCGF(CGM); 1800 FunctionArgList Args; 1801 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1802 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1803 ImplicitParamDecl::Other); 1804 Args.push_back(&Dst); 1805 1806 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1807 CGM.getContext().VoidTy, Args); 1808 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1809 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1810 llvm::Function *Fn = 1811 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1812 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1813 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1814 Loc, Loc); 1815 // Create a scope with an artificial location for the body of this function. 1816 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1817 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1818 DtorCGF.GetAddrOfLocalVar(&Dst), 1819 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1820 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1821 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1822 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1823 DtorCGF.FinishFunction(); 1824 Dtor = Fn; 1825 } 1826 // Do not emit init function if it is not required. 1827 if (!Ctor && !Dtor) 1828 return nullptr; 1829 1830 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1831 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1832 /*isVarArg=*/false) 1833 ->getPointerTo(); 1834 // Copying constructor for the threadprivate variable. 1835 // Must be NULL - reserved by runtime, but currently it requires that this 1836 // parameter is always NULL. Otherwise it fires assertion. 1837 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1838 if (Ctor == nullptr) { 1839 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1840 /*isVarArg=*/false) 1841 ->getPointerTo(); 1842 Ctor = llvm::Constant::getNullValue(CtorTy); 1843 } 1844 if (Dtor == nullptr) { 1845 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1846 /*isVarArg=*/false) 1847 ->getPointerTo(); 1848 Dtor = llvm::Constant::getNullValue(DtorTy); 1849 } 1850 if (!CGF) { 1851 auto *InitFunctionTy = 1852 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1853 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1854 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1855 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1856 CodeGenFunction InitCGF(CGM); 1857 FunctionArgList ArgList; 1858 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1859 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1860 Loc, Loc); 1861 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1862 InitCGF.FinishFunction(); 1863 return InitFunction; 1864 } 1865 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1866 } 1867 return nullptr; 1868 } 1869 1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1871 llvm::GlobalVariable *Addr, 1872 bool PerformInit) { 1873 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1874 !CGM.getLangOpts().OpenMPIsDevice) 1875 return false; 1876 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1877 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1878 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1879 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1880 HasRequiresUnifiedSharedMemory)) 1881 return CGM.getLangOpts().OpenMPIsDevice; 1882 VD = VD->getDefinition(CGM.getContext()); 1883 assert(VD && "Unknown VarDecl"); 1884 1885 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1886 return CGM.getLangOpts().OpenMPIsDevice; 1887 1888 QualType ASTTy = VD->getType(); 1889 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1890 1891 // Produce the unique prefix to identify the new target regions. We use 1892 // the source location of the variable declaration which we know to not 1893 // conflict with any target region. 1894 unsigned DeviceID; 1895 unsigned FileID; 1896 unsigned Line; 1897 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1898 SmallString<128> Buffer, Out; 1899 { 1900 llvm::raw_svector_ostream OS(Buffer); 1901 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1902 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1903 } 1904 1905 const Expr *Init = VD->getAnyInitializer(); 1906 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1907 llvm::Constant *Ctor; 1908 llvm::Constant *ID; 1909 if (CGM.getLangOpts().OpenMPIsDevice) { 1910 // Generate function that re-emits the declaration's initializer into 1911 // the threadprivate copy of the variable VD 1912 CodeGenFunction CtorCGF(CGM); 1913 1914 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1915 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1916 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1917 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1918 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1919 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1920 FunctionArgList(), Loc, Loc); 1921 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1922 CtorCGF.EmitAnyExprToMem(Init, 1923 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1924 Init->getType().getQualifiers(), 1925 /*IsInitializer=*/true); 1926 CtorCGF.FinishFunction(); 1927 Ctor = Fn; 1928 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1929 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1930 } else { 1931 Ctor = new llvm::GlobalVariable( 1932 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1933 llvm::GlobalValue::PrivateLinkage, 1934 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1935 ID = Ctor; 1936 } 1937 1938 // Register the information for the entry associated with the constructor. 1939 Out.clear(); 1940 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1941 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1942 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1943 } 1944 if (VD->getType().isDestructedType() != QualType::DK_none) { 1945 llvm::Constant *Dtor; 1946 llvm::Constant *ID; 1947 if (CGM.getLangOpts().OpenMPIsDevice) { 1948 // Generate function that emits destructor call for the threadprivate 1949 // copy of the variable VD 1950 CodeGenFunction DtorCGF(CGM); 1951 1952 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1953 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1954 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1955 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1956 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1957 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1958 FunctionArgList(), Loc, Loc); 1959 // Create a scope with an artificial location for the body of this 1960 // function. 1961 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1962 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1963 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1964 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1965 DtorCGF.FinishFunction(); 1966 Dtor = Fn; 1967 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1968 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1969 } else { 1970 Dtor = new llvm::GlobalVariable( 1971 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1972 llvm::GlobalValue::PrivateLinkage, 1973 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1974 ID = Dtor; 1975 } 1976 // Register the information for the entry associated with the destructor. 1977 Out.clear(); 1978 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1979 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1980 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1981 } 1982 return CGM.getLangOpts().OpenMPIsDevice; 1983 } 1984 1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1986 QualType VarType, 1987 StringRef Name) { 1988 std::string Suffix = getName({"artificial", ""}); 1989 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1990 llvm::Value *GAddr = 1991 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1992 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1993 CGM.getTarget().isTLSSupported()) { 1994 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1995 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1996 } 1997 std::string CacheSuffix = getName({"cache", ""}); 1998 llvm::Value *Args[] = { 1999 emitUpdateLocation(CGF, SourceLocation()), 2000 getThreadID(CGF, SourceLocation()), 2001 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2002 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2003 /*isSigned=*/false), 2004 getOrCreateInternalVariable( 2005 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2006 return Address( 2007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2008 CGF.EmitRuntimeCall( 2009 OMPBuilder.getOrCreateRuntimeFunction( 2010 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2011 Args), 2012 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2013 CGM.getContext().getTypeAlignInChars(VarType)); 2014 } 2015 2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2017 const RegionCodeGenTy &ThenGen, 2018 const RegionCodeGenTy &ElseGen) { 2019 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2020 2021 // If the condition constant folds and can be elided, try to avoid emitting 2022 // the condition and the dead arm of the if/else. 2023 bool CondConstant; 2024 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2025 if (CondConstant) 2026 ThenGen(CGF); 2027 else 2028 ElseGen(CGF); 2029 return; 2030 } 2031 2032 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2033 // emit the conditional branch. 2034 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2035 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2036 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2037 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2038 2039 // Emit the 'then' code. 2040 CGF.EmitBlock(ThenBlock); 2041 ThenGen(CGF); 2042 CGF.EmitBranch(ContBlock); 2043 // Emit the 'else' code if present. 2044 // There is no need to emit line number for unconditional branch. 2045 (void)ApplyDebugLocation::CreateEmpty(CGF); 2046 CGF.EmitBlock(ElseBlock); 2047 ElseGen(CGF); 2048 // There is no need to emit line number for unconditional branch. 2049 (void)ApplyDebugLocation::CreateEmpty(CGF); 2050 CGF.EmitBranch(ContBlock); 2051 // Emit the continuation block for code after the if. 2052 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2053 } 2054 2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2056 llvm::Function *OutlinedFn, 2057 ArrayRef<llvm::Value *> CapturedVars, 2058 const Expr *IfCond) { 2059 if (!CGF.HaveInsertPoint()) 2060 return; 2061 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2062 auto &M = CGM.getModule(); 2063 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2064 this](CodeGenFunction &CGF, PrePostActionTy &) { 2065 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *Args[] = { 2068 RTLoc, 2069 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2070 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2071 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2072 RealArgs.append(std::begin(Args), std::end(Args)); 2073 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2074 2075 llvm::FunctionCallee RTLFn = 2076 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2077 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2078 }; 2079 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2082 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2083 // Build calls: 2084 // __kmpc_serialized_parallel(&Loc, GTid); 2085 llvm::Value *Args[] = {RTLoc, ThreadID}; 2086 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2087 M, OMPRTL___kmpc_serialized_parallel), 2088 Args); 2089 2090 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2091 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2092 Address ZeroAddrBound = 2093 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2094 /*Name=*/".bound.zero.addr"); 2095 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2096 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2097 // ThreadId for serialized parallels is 0. 2098 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2099 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2100 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2101 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2102 2103 // __kmpc_end_serialized_parallel(&Loc, GTid); 2104 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2105 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2106 M, OMPRTL___kmpc_end_serialized_parallel), 2107 EndArgs); 2108 }; 2109 if (IfCond) { 2110 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2111 } else { 2112 RegionCodeGenTy ThenRCG(ThenGen); 2113 ThenRCG(CGF); 2114 } 2115 } 2116 2117 // If we're inside an (outlined) parallel region, use the region info's 2118 // thread-ID variable (it is passed in a first argument of the outlined function 2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2120 // regular serial code region, get thread ID by calling kmp_int32 2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2122 // return the address of that temp. 2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2124 SourceLocation Loc) { 2125 if (auto *OMPRegionInfo = 2126 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2127 if (OMPRegionInfo->getThreadIDVariable()) 2128 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2129 2130 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2131 QualType Int32Ty = 2132 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2133 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2134 CGF.EmitStoreOfScalar(ThreadID, 2135 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2136 2137 return ThreadIDTemp; 2138 } 2139 2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2141 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2142 SmallString<256> Buffer; 2143 llvm::raw_svector_ostream Out(Buffer); 2144 Out << Name; 2145 StringRef RuntimeName = Out.str(); 2146 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2147 if (Elem.second) { 2148 assert(Elem.second->getType()->getPointerElementType() == Ty && 2149 "OMP internal variable has different type than requested"); 2150 return &*Elem.second; 2151 } 2152 2153 return Elem.second = new llvm::GlobalVariable( 2154 CGM.getModule(), Ty, /*IsConstant*/ false, 2155 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2156 Elem.first(), /*InsertBefore=*/nullptr, 2157 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2158 } 2159 2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2161 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2162 std::string Name = getName({Prefix, "var"}); 2163 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2164 } 2165 2166 namespace { 2167 /// Common pre(post)-action for different OpenMP constructs. 2168 class CommonActionTy final : public PrePostActionTy { 2169 llvm::FunctionCallee EnterCallee; 2170 ArrayRef<llvm::Value *> EnterArgs; 2171 llvm::FunctionCallee ExitCallee; 2172 ArrayRef<llvm::Value *> ExitArgs; 2173 bool Conditional; 2174 llvm::BasicBlock *ContBlock = nullptr; 2175 2176 public: 2177 CommonActionTy(llvm::FunctionCallee EnterCallee, 2178 ArrayRef<llvm::Value *> EnterArgs, 2179 llvm::FunctionCallee ExitCallee, 2180 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2181 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2182 ExitArgs(ExitArgs), Conditional(Conditional) {} 2183 void Enter(CodeGenFunction &CGF) override { 2184 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2185 if (Conditional) { 2186 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2187 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2188 ContBlock = CGF.createBasicBlock("omp_if.end"); 2189 // Generate the branch (If-stmt) 2190 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2191 CGF.EmitBlock(ThenBlock); 2192 } 2193 } 2194 void Done(CodeGenFunction &CGF) { 2195 // Emit the rest of blocks/branches 2196 CGF.EmitBranch(ContBlock); 2197 CGF.EmitBlock(ContBlock, true); 2198 } 2199 void Exit(CodeGenFunction &CGF) override { 2200 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2201 } 2202 }; 2203 } // anonymous namespace 2204 2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2206 StringRef CriticalName, 2207 const RegionCodeGenTy &CriticalOpGen, 2208 SourceLocation Loc, const Expr *Hint) { 2209 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2210 // CriticalOpGen(); 2211 // __kmpc_end_critical(ident_t *, gtid, Lock); 2212 // Prepare arguments and build a call to __kmpc_critical 2213 if (!CGF.HaveInsertPoint()) 2214 return; 2215 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2216 getCriticalRegionLock(CriticalName)}; 2217 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2218 std::end(Args)); 2219 if (Hint) { 2220 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2221 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2222 } 2223 CommonActionTy Action( 2224 OMPBuilder.getOrCreateRuntimeFunction( 2225 CGM.getModule(), 2226 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2227 EnterArgs, 2228 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2229 OMPRTL___kmpc_end_critical), 2230 Args); 2231 CriticalOpGen.setAction(Action); 2232 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2233 } 2234 2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2236 const RegionCodeGenTy &MasterOpGen, 2237 SourceLocation Loc) { 2238 if (!CGF.HaveInsertPoint()) 2239 return; 2240 // if(__kmpc_master(ident_t *, gtid)) { 2241 // MasterOpGen(); 2242 // __kmpc_end_master(ident_t *, gtid); 2243 // } 2244 // Prepare arguments and build a call to __kmpc_master 2245 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2246 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2247 CGM.getModule(), OMPRTL___kmpc_master), 2248 Args, 2249 OMPBuilder.getOrCreateRuntimeFunction( 2250 CGM.getModule(), OMPRTL___kmpc_end_master), 2251 Args, 2252 /*Conditional=*/true); 2253 MasterOpGen.setAction(Action); 2254 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2255 Action.Done(CGF); 2256 } 2257 2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2259 SourceLocation Loc) { 2260 if (!CGF.HaveInsertPoint()) 2261 return; 2262 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2263 OMPBuilder.createTaskyield(CGF.Builder); 2264 } else { 2265 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2266 llvm::Value *Args[] = { 2267 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2268 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2269 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2271 Args); 2272 } 2273 2274 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2275 Region->emitUntiedSwitch(CGF); 2276 } 2277 2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2279 const RegionCodeGenTy &TaskgroupOpGen, 2280 SourceLocation Loc) { 2281 if (!CGF.HaveInsertPoint()) 2282 return; 2283 // __kmpc_taskgroup(ident_t *, gtid); 2284 // TaskgroupOpGen(); 2285 // __kmpc_end_taskgroup(ident_t *, gtid); 2286 // Prepare arguments and build a call to __kmpc_taskgroup 2287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2288 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2289 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2290 Args, 2291 OMPBuilder.getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2293 Args); 2294 TaskgroupOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2296 } 2297 2298 /// Given an array of pointers to variables, project the address of a 2299 /// given variable. 2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2301 unsigned Index, const VarDecl *Var) { 2302 // Pull out the pointer to the variable. 2303 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2304 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2305 2306 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2307 Addr = CGF.Builder.CreateElementBitCast( 2308 Addr, CGF.ConvertTypeForMem(Var->getType())); 2309 return Addr; 2310 } 2311 2312 static llvm::Value *emitCopyprivateCopyFunction( 2313 CodeGenModule &CGM, llvm::Type *ArgsType, 2314 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2315 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2316 SourceLocation Loc) { 2317 ASTContext &C = CGM.getContext(); 2318 // void copy_func(void *LHSArg, void *RHSArg); 2319 FunctionArgList Args; 2320 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2321 ImplicitParamDecl::Other); 2322 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2323 ImplicitParamDecl::Other); 2324 Args.push_back(&LHSArg); 2325 Args.push_back(&RHSArg); 2326 const auto &CGFI = 2327 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2328 std::string Name = 2329 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2330 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2331 llvm::GlobalValue::InternalLinkage, Name, 2332 &CGM.getModule()); 2333 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2334 Fn->setDoesNotRecurse(); 2335 CodeGenFunction CGF(CGM); 2336 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2337 // Dest = (void*[n])(LHSArg); 2338 // Src = (void*[n])(RHSArg); 2339 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2340 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2341 ArgsType), CGF.getPointerAlign()); 2342 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2343 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2344 ArgsType), CGF.getPointerAlign()); 2345 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2346 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2347 // ... 2348 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2349 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2350 const auto *DestVar = 2351 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2352 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2353 2354 const auto *SrcVar = 2355 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2356 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2357 2358 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2359 QualType Type = VD->getType(); 2360 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2361 } 2362 CGF.FinishFunction(); 2363 return Fn; 2364 } 2365 2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2367 const RegionCodeGenTy &SingleOpGen, 2368 SourceLocation Loc, 2369 ArrayRef<const Expr *> CopyprivateVars, 2370 ArrayRef<const Expr *> SrcExprs, 2371 ArrayRef<const Expr *> DstExprs, 2372 ArrayRef<const Expr *> AssignmentOps) { 2373 if (!CGF.HaveInsertPoint()) 2374 return; 2375 assert(CopyprivateVars.size() == SrcExprs.size() && 2376 CopyprivateVars.size() == DstExprs.size() && 2377 CopyprivateVars.size() == AssignmentOps.size()); 2378 ASTContext &C = CGM.getContext(); 2379 // int32 did_it = 0; 2380 // if(__kmpc_single(ident_t *, gtid)) { 2381 // SingleOpGen(); 2382 // __kmpc_end_single(ident_t *, gtid); 2383 // did_it = 1; 2384 // } 2385 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2386 // <copy_func>, did_it); 2387 2388 Address DidIt = Address::invalid(); 2389 if (!CopyprivateVars.empty()) { 2390 // int32 did_it = 0; 2391 QualType KmpInt32Ty = 2392 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2393 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2394 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2395 } 2396 // Prepare arguments and build a call to __kmpc_single 2397 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2398 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2399 CGM.getModule(), OMPRTL___kmpc_single), 2400 Args, 2401 OMPBuilder.getOrCreateRuntimeFunction( 2402 CGM.getModule(), OMPRTL___kmpc_end_single), 2403 Args, 2404 /*Conditional=*/true); 2405 SingleOpGen.setAction(Action); 2406 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2407 if (DidIt.isValid()) { 2408 // did_it = 1; 2409 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2410 } 2411 Action.Done(CGF); 2412 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2413 // <copy_func>, did_it); 2414 if (DidIt.isValid()) { 2415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2416 QualType CopyprivateArrayTy = C.getConstantArrayType( 2417 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2418 /*IndexTypeQuals=*/0); 2419 // Create a list of all private variables for copyprivate. 2420 Address CopyprivateList = 2421 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2422 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2423 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2424 CGF.Builder.CreateStore( 2425 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2426 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2427 CGF.VoidPtrTy), 2428 Elem); 2429 } 2430 // Build function that copies private values from single region to all other 2431 // threads in the corresponding parallel region. 2432 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2433 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2434 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2435 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2436 Address CL = 2437 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2438 CGF.VoidPtrTy); 2439 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2440 llvm::Value *Args[] = { 2441 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2442 getThreadID(CGF, Loc), // i32 <gtid> 2443 BufSize, // size_t <buf_size> 2444 CL.getPointer(), // void *<copyprivate list> 2445 CpyFn, // void (*) (void *, void *) <copy_func> 2446 DidItVal // i32 did_it 2447 }; 2448 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2449 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2450 Args); 2451 } 2452 } 2453 2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2455 const RegionCodeGenTy &OrderedOpGen, 2456 SourceLocation Loc, bool IsThreads) { 2457 if (!CGF.HaveInsertPoint()) 2458 return; 2459 // __kmpc_ordered(ident_t *, gtid); 2460 // OrderedOpGen(); 2461 // __kmpc_end_ordered(ident_t *, gtid); 2462 // Prepare arguments and build a call to __kmpc_ordered 2463 if (IsThreads) { 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_ordered), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2470 Args); 2471 OrderedOpGen.setAction(Action); 2472 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2473 return; 2474 } 2475 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2476 } 2477 2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2479 unsigned Flags; 2480 if (Kind == OMPD_for) 2481 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2482 else if (Kind == OMPD_sections) 2483 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2484 else if (Kind == OMPD_single) 2485 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2486 else if (Kind == OMPD_barrier) 2487 Flags = OMP_IDENT_BARRIER_EXPL; 2488 else 2489 Flags = OMP_IDENT_BARRIER_IMPL; 2490 return Flags; 2491 } 2492 2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2494 CodeGenFunction &CGF, const OMPLoopDirective &S, 2495 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2496 // Check if the loop directive is actually a doacross loop directive. In this 2497 // case choose static, 1 schedule. 2498 if (llvm::any_of( 2499 S.getClausesOfKind<OMPOrderedClause>(), 2500 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2501 ScheduleKind = OMPC_SCHEDULE_static; 2502 // Chunk size is 1 in this case. 2503 llvm::APInt ChunkSize(32, 1); 2504 ChunkExpr = IntegerLiteral::Create( 2505 CGF.getContext(), ChunkSize, 2506 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2507 SourceLocation()); 2508 } 2509 } 2510 2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2512 OpenMPDirectiveKind Kind, bool EmitChecks, 2513 bool ForceSimpleCall) { 2514 // Check if we should use the OMPBuilder 2515 auto *OMPRegionInfo = 2516 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2517 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2518 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2519 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2520 return; 2521 } 2522 2523 if (!CGF.HaveInsertPoint()) 2524 return; 2525 // Build call __kmpc_cancel_barrier(loc, thread_id); 2526 // Build call __kmpc_barrier(loc, thread_id); 2527 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2528 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2529 // thread_id); 2530 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2531 getThreadID(CGF, Loc)}; 2532 if (OMPRegionInfo) { 2533 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2534 llvm::Value *Result = CGF.EmitRuntimeCall( 2535 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2536 OMPRTL___kmpc_cancel_barrier), 2537 Args); 2538 if (EmitChecks) { 2539 // if (__kmpc_cancel_barrier()) { 2540 // exit from construct; 2541 // } 2542 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2543 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2544 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2545 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2546 CGF.EmitBlock(ExitBB); 2547 // exit from construct; 2548 CodeGenFunction::JumpDest CancelDestination = 2549 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2550 CGF.EmitBranchThroughCleanup(CancelDestination); 2551 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2552 } 2553 return; 2554 } 2555 } 2556 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2557 CGM.getModule(), OMPRTL___kmpc_barrier), 2558 Args); 2559 } 2560 2561 /// Map the OpenMP loop schedule to the runtime enumeration. 2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2563 bool Chunked, bool Ordered) { 2564 switch (ScheduleKind) { 2565 case OMPC_SCHEDULE_static: 2566 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2567 : (Ordered ? OMP_ord_static : OMP_sch_static); 2568 case OMPC_SCHEDULE_dynamic: 2569 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2570 case OMPC_SCHEDULE_guided: 2571 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2572 case OMPC_SCHEDULE_runtime: 2573 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2574 case OMPC_SCHEDULE_auto: 2575 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2576 case OMPC_SCHEDULE_unknown: 2577 assert(!Chunked && "chunk was specified but schedule kind not known"); 2578 return Ordered ? OMP_ord_static : OMP_sch_static; 2579 } 2580 llvm_unreachable("Unexpected runtime schedule"); 2581 } 2582 2583 /// Map the OpenMP distribute schedule to the runtime enumeration. 2584 static OpenMPSchedType 2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2586 // only static is allowed for dist_schedule 2587 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2588 } 2589 2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2591 bool Chunked) const { 2592 OpenMPSchedType Schedule = 2593 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2594 return Schedule == OMP_sch_static; 2595 } 2596 2597 bool CGOpenMPRuntime::isStaticNonchunked( 2598 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2599 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2600 return Schedule == OMP_dist_sch_static; 2601 } 2602 2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2604 bool Chunked) const { 2605 OpenMPSchedType Schedule = 2606 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2607 return Schedule == OMP_sch_static_chunked; 2608 } 2609 2610 bool CGOpenMPRuntime::isStaticChunked( 2611 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2612 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2613 return Schedule == OMP_dist_sch_static_chunked; 2614 } 2615 2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2619 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2620 return Schedule != OMP_sch_static; 2621 } 2622 2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2624 OpenMPScheduleClauseModifier M1, 2625 OpenMPScheduleClauseModifier M2) { 2626 int Modifier = 0; 2627 switch (M1) { 2628 case OMPC_SCHEDULE_MODIFIER_monotonic: 2629 Modifier = OMP_sch_modifier_monotonic; 2630 break; 2631 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2632 Modifier = OMP_sch_modifier_nonmonotonic; 2633 break; 2634 case OMPC_SCHEDULE_MODIFIER_simd: 2635 if (Schedule == OMP_sch_static_chunked) 2636 Schedule = OMP_sch_static_balanced_chunked; 2637 break; 2638 case OMPC_SCHEDULE_MODIFIER_last: 2639 case OMPC_SCHEDULE_MODIFIER_unknown: 2640 break; 2641 } 2642 switch (M2) { 2643 case OMPC_SCHEDULE_MODIFIER_monotonic: 2644 Modifier = OMP_sch_modifier_monotonic; 2645 break; 2646 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2647 Modifier = OMP_sch_modifier_nonmonotonic; 2648 break; 2649 case OMPC_SCHEDULE_MODIFIER_simd: 2650 if (Schedule == OMP_sch_static_chunked) 2651 Schedule = OMP_sch_static_balanced_chunked; 2652 break; 2653 case OMPC_SCHEDULE_MODIFIER_last: 2654 case OMPC_SCHEDULE_MODIFIER_unknown: 2655 break; 2656 } 2657 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2658 // If the static schedule kind is specified or if the ordered clause is 2659 // specified, and if the nonmonotonic modifier is not specified, the effect is 2660 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2661 // modifier is specified, the effect is as if the nonmonotonic modifier is 2662 // specified. 2663 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2664 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2665 Schedule == OMP_sch_static_balanced_chunked || 2666 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2667 Schedule == OMP_dist_sch_static_chunked || 2668 Schedule == OMP_dist_sch_static)) 2669 Modifier = OMP_sch_modifier_nonmonotonic; 2670 } 2671 return Schedule | Modifier; 2672 } 2673 2674 void CGOpenMPRuntime::emitForDispatchInit( 2675 CodeGenFunction &CGF, SourceLocation Loc, 2676 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2677 bool Ordered, const DispatchRTInput &DispatchValues) { 2678 if (!CGF.HaveInsertPoint()) 2679 return; 2680 OpenMPSchedType Schedule = getRuntimeSchedule( 2681 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2682 assert(Ordered || 2683 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2684 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2685 Schedule != OMP_sch_static_balanced_chunked)); 2686 // Call __kmpc_dispatch_init( 2687 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2688 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2689 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2690 2691 // If the Chunk was not specified in the clause - use default value 1. 2692 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2693 : CGF.Builder.getIntN(IVSize, 1); 2694 llvm::Value *Args[] = { 2695 emitUpdateLocation(CGF, Loc), 2696 getThreadID(CGF, Loc), 2697 CGF.Builder.getInt32(addMonoNonMonoModifier( 2698 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2699 DispatchValues.LB, // Lower 2700 DispatchValues.UB, // Upper 2701 CGF.Builder.getIntN(IVSize, 1), // Stride 2702 Chunk // Chunk 2703 }; 2704 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2705 } 2706 2707 static void emitForStaticInitCall( 2708 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2709 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2710 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2711 const CGOpenMPRuntime::StaticRTInput &Values) { 2712 if (!CGF.HaveInsertPoint()) 2713 return; 2714 2715 assert(!Values.Ordered); 2716 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2717 Schedule == OMP_sch_static_balanced_chunked || 2718 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2719 Schedule == OMP_dist_sch_static || 2720 Schedule == OMP_dist_sch_static_chunked); 2721 2722 // Call __kmpc_for_static_init( 2723 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2724 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2725 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2726 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2727 llvm::Value *Chunk = Values.Chunk; 2728 if (Chunk == nullptr) { 2729 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2730 Schedule == OMP_dist_sch_static) && 2731 "expected static non-chunked schedule"); 2732 // If the Chunk was not specified in the clause - use default value 1. 2733 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2734 } else { 2735 assert((Schedule == OMP_sch_static_chunked || 2736 Schedule == OMP_sch_static_balanced_chunked || 2737 Schedule == OMP_ord_static_chunked || 2738 Schedule == OMP_dist_sch_static_chunked) && 2739 "expected static chunked schedule"); 2740 } 2741 llvm::Value *Args[] = { 2742 UpdateLocation, 2743 ThreadId, 2744 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2745 M2)), // Schedule type 2746 Values.IL.getPointer(), // &isLastIter 2747 Values.LB.getPointer(), // &LB 2748 Values.UB.getPointer(), // &UB 2749 Values.ST.getPointer(), // &Stride 2750 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2751 Chunk // Chunk 2752 }; 2753 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2754 } 2755 2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2757 SourceLocation Loc, 2758 OpenMPDirectiveKind DKind, 2759 const OpenMPScheduleTy &ScheduleKind, 2760 const StaticRTInput &Values) { 2761 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2762 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2763 assert(isOpenMPWorksharingDirective(DKind) && 2764 "Expected loop-based or sections-based directive."); 2765 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2766 isOpenMPLoopDirective(DKind) 2767 ? OMP_IDENT_WORK_LOOP 2768 : OMP_IDENT_WORK_SECTIONS); 2769 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2770 llvm::FunctionCallee StaticInitFunction = 2771 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2772 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2773 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2774 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2775 } 2776 2777 void CGOpenMPRuntime::emitDistributeStaticInit( 2778 CodeGenFunction &CGF, SourceLocation Loc, 2779 OpenMPDistScheduleClauseKind SchedKind, 2780 const CGOpenMPRuntime::StaticRTInput &Values) { 2781 OpenMPSchedType ScheduleNum = 2782 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2783 llvm::Value *UpdatedLocation = 2784 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2785 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2786 llvm::FunctionCallee StaticInitFunction = 2787 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2788 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2789 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2790 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2791 } 2792 2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2794 SourceLocation Loc, 2795 OpenMPDirectiveKind DKind) { 2796 if (!CGF.HaveInsertPoint()) 2797 return; 2798 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2799 llvm::Value *Args[] = { 2800 emitUpdateLocation(CGF, Loc, 2801 isOpenMPDistributeDirective(DKind) 2802 ? OMP_IDENT_WORK_DISTRIBUTE 2803 : isOpenMPLoopDirective(DKind) 2804 ? OMP_IDENT_WORK_LOOP 2805 : OMP_IDENT_WORK_SECTIONS), 2806 getThreadID(CGF, Loc)}; 2807 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2808 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2809 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2810 Args); 2811 } 2812 2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2814 SourceLocation Loc, 2815 unsigned IVSize, 2816 bool IVSigned) { 2817 if (!CGF.HaveInsertPoint()) 2818 return; 2819 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2820 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2821 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2822 } 2823 2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2825 SourceLocation Loc, unsigned IVSize, 2826 bool IVSigned, Address IL, 2827 Address LB, Address UB, 2828 Address ST) { 2829 // Call __kmpc_dispatch_next( 2830 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2831 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2832 // kmp_int[32|64] *p_stride); 2833 llvm::Value *Args[] = { 2834 emitUpdateLocation(CGF, Loc), 2835 getThreadID(CGF, Loc), 2836 IL.getPointer(), // &isLastIter 2837 LB.getPointer(), // &Lower 2838 UB.getPointer(), // &Upper 2839 ST.getPointer() // &Stride 2840 }; 2841 llvm::Value *Call = 2842 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2843 return CGF.EmitScalarConversion( 2844 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2845 CGF.getContext().BoolTy, Loc); 2846 } 2847 2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2849 llvm::Value *NumThreads, 2850 SourceLocation Loc) { 2851 if (!CGF.HaveInsertPoint()) 2852 return; 2853 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2854 llvm::Value *Args[] = { 2855 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2856 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2857 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2858 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2859 Args); 2860 } 2861 2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2863 ProcBindKind ProcBind, 2864 SourceLocation Loc) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2868 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2869 llvm::Value *Args[] = { 2870 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2871 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2872 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2873 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2874 Args); 2875 } 2876 2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2878 SourceLocation Loc, llvm::AtomicOrdering AO) { 2879 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2880 OMPBuilder.createFlush(CGF.Builder); 2881 } else { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Build call void __kmpc_flush(ident_t *loc) 2885 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2886 CGM.getModule(), OMPRTL___kmpc_flush), 2887 emitUpdateLocation(CGF, Loc)); 2888 } 2889 } 2890 2891 namespace { 2892 /// Indexes of fields for type kmp_task_t. 2893 enum KmpTaskTFields { 2894 /// List of shared variables. 2895 KmpTaskTShareds, 2896 /// Task routine. 2897 KmpTaskTRoutine, 2898 /// Partition id for the untied tasks. 2899 KmpTaskTPartId, 2900 /// Function with call of destructors for private variables. 2901 Data1, 2902 /// Task priority. 2903 Data2, 2904 /// (Taskloops only) Lower bound. 2905 KmpTaskTLowerBound, 2906 /// (Taskloops only) Upper bound. 2907 KmpTaskTUpperBound, 2908 /// (Taskloops only) Stride. 2909 KmpTaskTStride, 2910 /// (Taskloops only) Is last iteration flag. 2911 KmpTaskTLastIter, 2912 /// (Taskloops only) Reduction data. 2913 KmpTaskTReductions, 2914 }; 2915 } // anonymous namespace 2916 2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2918 return OffloadEntriesTargetRegion.empty() && 2919 OffloadEntriesDeviceGlobalVar.empty(); 2920 } 2921 2922 /// Initialize target region entry. 2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2924 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2925 StringRef ParentName, unsigned LineNum, 2926 unsigned Order) { 2927 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2928 "only required for the device " 2929 "code generation."); 2930 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2931 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2932 OMPTargetRegionEntryTargetRegion); 2933 ++OffloadingEntriesNum; 2934 } 2935 2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2937 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2938 StringRef ParentName, unsigned LineNum, 2939 llvm::Constant *Addr, llvm::Constant *ID, 2940 OMPTargetRegionEntryKind Flags) { 2941 // If we are emitting code for a target, the entry is already initialized, 2942 // only has to be registered. 2943 if (CGM.getLangOpts().OpenMPIsDevice) { 2944 // This could happen if the device compilation is invoked standalone. 2945 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2946 initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2947 OffloadingEntriesNum); 2948 auto &Entry = 2949 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2950 Entry.setAddress(Addr); 2951 Entry.setID(ID); 2952 Entry.setFlags(Flags); 2953 } else { 2954 if (Flags == 2955 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2956 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2957 /*IgnoreAddressId*/ true)) 2958 return; 2959 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2960 "Target region entry already registered!"); 2961 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2962 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2963 ++OffloadingEntriesNum; 2964 } 2965 } 2966 2967 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2968 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2969 bool IgnoreAddressId) const { 2970 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2971 if (PerDevice == OffloadEntriesTargetRegion.end()) 2972 return false; 2973 auto PerFile = PerDevice->second.find(FileID); 2974 if (PerFile == PerDevice->second.end()) 2975 return false; 2976 auto PerParentName = PerFile->second.find(ParentName); 2977 if (PerParentName == PerFile->second.end()) 2978 return false; 2979 auto PerLine = PerParentName->second.find(LineNum); 2980 if (PerLine == PerParentName->second.end()) 2981 return false; 2982 // Fail if this entry is already registered. 2983 if (!IgnoreAddressId && 2984 (PerLine->second.getAddress() || PerLine->second.getID())) 2985 return false; 2986 return true; 2987 } 2988 2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2990 const OffloadTargetRegionEntryInfoActTy &Action) { 2991 // Scan all target region entries and perform the provided action. 2992 for (const auto &D : OffloadEntriesTargetRegion) 2993 for (const auto &F : D.second) 2994 for (const auto &P : F.second) 2995 for (const auto &L : P.second) 2996 Action(D.first, F.first, P.first(), L.first, L.second); 2997 } 2998 2999 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3000 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3001 OMPTargetGlobalVarEntryKind Flags, 3002 unsigned Order) { 3003 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3004 "only required for the device " 3005 "code generation."); 3006 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3007 ++OffloadingEntriesNum; 3008 } 3009 3010 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3011 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3012 CharUnits VarSize, 3013 OMPTargetGlobalVarEntryKind Flags, 3014 llvm::GlobalValue::LinkageTypes Linkage) { 3015 if (CGM.getLangOpts().OpenMPIsDevice) { 3016 // This could happen if the device compilation is invoked standalone. 3017 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3018 initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); 3019 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3020 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3021 "Resetting with the new address."); 3022 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3023 if (Entry.getVarSize().isZero()) { 3024 Entry.setVarSize(VarSize); 3025 Entry.setLinkage(Linkage); 3026 } 3027 return; 3028 } 3029 Entry.setVarSize(VarSize); 3030 Entry.setLinkage(Linkage); 3031 Entry.setAddress(Addr); 3032 } else { 3033 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3034 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3035 assert(Entry.isValid() && Entry.getFlags() == Flags && 3036 "Entry not initialized!"); 3037 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3038 "Resetting with the new address."); 3039 if (Entry.getVarSize().isZero()) { 3040 Entry.setVarSize(VarSize); 3041 Entry.setLinkage(Linkage); 3042 } 3043 return; 3044 } 3045 OffloadEntriesDeviceGlobalVar.try_emplace( 3046 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3047 ++OffloadingEntriesNum; 3048 } 3049 } 3050 3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3052 actOnDeviceGlobalVarEntriesInfo( 3053 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3054 // Scan all target region entries and perform the provided action. 3055 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3056 Action(E.getKey(), E.getValue()); 3057 } 3058 3059 void CGOpenMPRuntime::createOffloadEntry( 3060 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3061 llvm::GlobalValue::LinkageTypes Linkage) { 3062 StringRef Name = Addr->getName(); 3063 llvm::Module &M = CGM.getModule(); 3064 llvm::LLVMContext &C = M.getContext(); 3065 3066 // Create constant string with the name. 3067 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3068 3069 std::string StringName = getName({"omp_offloading", "entry_name"}); 3070 auto *Str = new llvm::GlobalVariable( 3071 M, StrPtrInit->getType(), /*isConstant=*/true, 3072 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3073 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3074 3075 llvm::Constant *Data[] = { 3076 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3077 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3078 llvm::ConstantInt::get(CGM.SizeTy, Size), 3079 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3080 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3081 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3082 llvm::GlobalVariable *Entry = createGlobalStruct( 3083 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3084 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3085 3086 // The entry has to be created in the section the linker expects it to be. 3087 Entry->setSection("omp_offloading_entries"); 3088 } 3089 3090 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3091 // Emit the offloading entries and metadata so that the device codegen side 3092 // can easily figure out what to emit. The produced metadata looks like 3093 // this: 3094 // 3095 // !omp_offload.info = !{!1, ...} 3096 // 3097 // Right now we only generate metadata for function that contain target 3098 // regions. 3099 3100 // If we are in simd mode or there are no entries, we don't need to do 3101 // anything. 3102 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3103 return; 3104 3105 llvm::Module &M = CGM.getModule(); 3106 llvm::LLVMContext &C = M.getContext(); 3107 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3108 SourceLocation, StringRef>, 3109 16> 3110 OrderedEntries(OffloadEntriesInfoManager.size()); 3111 llvm::SmallVector<StringRef, 16> ParentFunctions( 3112 OffloadEntriesInfoManager.size()); 3113 3114 // Auxiliary methods to create metadata values and strings. 3115 auto &&GetMDInt = [this](unsigned V) { 3116 return llvm::ConstantAsMetadata::get( 3117 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3118 }; 3119 3120 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3121 3122 // Create the offloading info metadata node. 3123 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3124 3125 // Create function that emits metadata for each target region entry; 3126 auto &&TargetRegionMetadataEmitter = 3127 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3128 &GetMDString]( 3129 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3130 unsigned Line, 3131 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3132 // Generate metadata for target regions. Each entry of this metadata 3133 // contains: 3134 // - Entry 0 -> Kind of this type of metadata (0). 3135 // - Entry 1 -> Device ID of the file where the entry was identified. 3136 // - Entry 2 -> File ID of the file where the entry was identified. 3137 // - Entry 3 -> Mangled name of the function where the entry was 3138 // identified. 3139 // - Entry 4 -> Line in the file where the entry was identified. 3140 // - Entry 5 -> Order the entry was created. 3141 // The first element of the metadata node is the kind. 3142 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3143 GetMDInt(FileID), GetMDString(ParentName), 3144 GetMDInt(Line), GetMDInt(E.getOrder())}; 3145 3146 SourceLocation Loc; 3147 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3148 E = CGM.getContext().getSourceManager().fileinfo_end(); 3149 I != E; ++I) { 3150 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3151 I->getFirst()->getUniqueID().getFile() == FileID) { 3152 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3153 I->getFirst(), Line, 1); 3154 break; 3155 } 3156 } 3157 // Save this entry in the right position of the ordered entries array. 3158 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3159 ParentFunctions[E.getOrder()] = ParentName; 3160 3161 // Add metadata to the named metadata node. 3162 MD->addOperand(llvm::MDNode::get(C, Ops)); 3163 }; 3164 3165 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3166 TargetRegionMetadataEmitter); 3167 3168 // Create function that emits metadata for each device global variable entry; 3169 auto &&DeviceGlobalVarMetadataEmitter = 3170 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3171 MD](StringRef MangledName, 3172 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3173 &E) { 3174 // Generate metadata for global variables. Each entry of this metadata 3175 // contains: 3176 // - Entry 0 -> Kind of this type of metadata (1). 3177 // - Entry 1 -> Mangled name of the variable. 3178 // - Entry 2 -> Declare target kind. 3179 // - Entry 3 -> Order the entry was created. 3180 // The first element of the metadata node is the kind. 3181 llvm::Metadata *Ops[] = { 3182 GetMDInt(E.getKind()), GetMDString(MangledName), 3183 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3184 3185 // Save this entry in the right position of the ordered entries array. 3186 OrderedEntries[E.getOrder()] = 3187 std::make_tuple(&E, SourceLocation(), MangledName); 3188 3189 // Add metadata to the named metadata node. 3190 MD->addOperand(llvm::MDNode::get(C, Ops)); 3191 }; 3192 3193 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3194 DeviceGlobalVarMetadataEmitter); 3195 3196 for (const auto &E : OrderedEntries) { 3197 assert(std::get<0>(E) && "All ordered entries must exist!"); 3198 if (const auto *CE = 3199 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3200 std::get<0>(E))) { 3201 if (!CE->getID() || !CE->getAddress()) { 3202 // Do not blame the entry if the parent funtion is not emitted. 3203 StringRef FnName = ParentFunctions[CE->getOrder()]; 3204 if (!CGM.GetGlobalValue(FnName)) 3205 continue; 3206 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3207 DiagnosticsEngine::Error, 3208 "Offloading entry for target region in %0 is incorrect: either the " 3209 "address or the ID is invalid."); 3210 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3211 continue; 3212 } 3213 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3214 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3215 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3216 OffloadEntryInfoDeviceGlobalVar>( 3217 std::get<0>(E))) { 3218 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3219 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3220 CE->getFlags()); 3221 switch (Flags) { 3222 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3223 if (CGM.getLangOpts().OpenMPIsDevice && 3224 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3225 continue; 3226 if (!CE->getAddress()) { 3227 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3228 DiagnosticsEngine::Error, "Offloading entry for declare target " 3229 "variable %0 is incorrect: the " 3230 "address is invalid."); 3231 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3232 continue; 3233 } 3234 // The vaiable has no definition - no need to add the entry. 3235 if (CE->getVarSize().isZero()) 3236 continue; 3237 break; 3238 } 3239 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3240 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3241 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3242 "Declaret target link address is set."); 3243 if (CGM.getLangOpts().OpenMPIsDevice) 3244 continue; 3245 if (!CE->getAddress()) { 3246 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3247 DiagnosticsEngine::Error, 3248 "Offloading entry for declare target variable is incorrect: the " 3249 "address is invalid."); 3250 CGM.getDiags().Report(DiagID); 3251 continue; 3252 } 3253 break; 3254 } 3255 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3256 CE->getVarSize().getQuantity(), Flags, 3257 CE->getLinkage()); 3258 } else { 3259 llvm_unreachable("Unsupported entry kind."); 3260 } 3261 } 3262 } 3263 3264 /// Loads all the offload entries information from the host IR 3265 /// metadata. 3266 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3267 // If we are in target mode, load the metadata from the host IR. This code has 3268 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3269 3270 if (!CGM.getLangOpts().OpenMPIsDevice) 3271 return; 3272 3273 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3274 return; 3275 3276 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3277 if (auto EC = Buf.getError()) { 3278 CGM.getDiags().Report(diag::err_cannot_open_file) 3279 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3280 return; 3281 } 3282 3283 llvm::LLVMContext C; 3284 auto ME = expectedToErrorOrAndEmitErrors( 3285 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3286 3287 if (auto EC = ME.getError()) { 3288 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3289 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3290 CGM.getDiags().Report(DiagID) 3291 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3292 return; 3293 } 3294 3295 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3296 if (!MD) 3297 return; 3298 3299 for (llvm::MDNode *MN : MD->operands()) { 3300 auto &&GetMDInt = [MN](unsigned Idx) { 3301 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3302 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3303 }; 3304 3305 auto &&GetMDString = [MN](unsigned Idx) { 3306 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3307 return V->getString(); 3308 }; 3309 3310 switch (GetMDInt(0)) { 3311 default: 3312 llvm_unreachable("Unexpected metadata!"); 3313 break; 3314 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3315 OffloadingEntryInfoTargetRegion: 3316 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3317 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3318 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3319 /*Order=*/GetMDInt(5)); 3320 break; 3321 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3322 OffloadingEntryInfoDeviceGlobalVar: 3323 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3324 /*MangledName=*/GetMDString(1), 3325 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3326 /*Flags=*/GetMDInt(2)), 3327 /*Order=*/GetMDInt(3)); 3328 break; 3329 } 3330 } 3331 } 3332 3333 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3334 if (!KmpRoutineEntryPtrTy) { 3335 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3336 ASTContext &C = CGM.getContext(); 3337 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3338 FunctionProtoType::ExtProtoInfo EPI; 3339 KmpRoutineEntryPtrQTy = C.getPointerType( 3340 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3341 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3342 } 3343 } 3344 3345 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3346 // Make sure the type of the entry is already created. This is the type we 3347 // have to create: 3348 // struct __tgt_offload_entry{ 3349 // void *addr; // Pointer to the offload entry info. 3350 // // (function or global) 3351 // char *name; // Name of the function or global. 3352 // size_t size; // Size of the entry info (0 if it a function). 3353 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3354 // int32_t reserved; // Reserved, to use by the runtime library. 3355 // }; 3356 if (TgtOffloadEntryQTy.isNull()) { 3357 ASTContext &C = CGM.getContext(); 3358 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3359 RD->startDefinition(); 3360 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3361 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3362 addFieldToRecordDecl(C, RD, C.getSizeType()); 3363 addFieldToRecordDecl( 3364 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3365 addFieldToRecordDecl( 3366 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3367 RD->completeDefinition(); 3368 RD->addAttr(PackedAttr::CreateImplicit(C)); 3369 TgtOffloadEntryQTy = C.getRecordType(RD); 3370 } 3371 return TgtOffloadEntryQTy; 3372 } 3373 3374 namespace { 3375 struct PrivateHelpersTy { 3376 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3377 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3378 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3379 PrivateElemInit(PrivateElemInit) {} 3380 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3381 const Expr *OriginalRef = nullptr; 3382 const VarDecl *Original = nullptr; 3383 const VarDecl *PrivateCopy = nullptr; 3384 const VarDecl *PrivateElemInit = nullptr; 3385 bool isLocalPrivate() const { 3386 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3387 } 3388 }; 3389 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3390 } // anonymous namespace 3391 3392 static bool isAllocatableDecl(const VarDecl *VD) { 3393 const VarDecl *CVD = VD->getCanonicalDecl(); 3394 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3395 return false; 3396 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3397 // Use the default allocation. 3398 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3399 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3400 !AA->getAllocator()); 3401 } 3402 3403 static RecordDecl * 3404 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3405 if (!Privates.empty()) { 3406 ASTContext &C = CGM.getContext(); 3407 // Build struct .kmp_privates_t. { 3408 // /* private vars */ 3409 // }; 3410 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3411 RD->startDefinition(); 3412 for (const auto &Pair : Privates) { 3413 const VarDecl *VD = Pair.second.Original; 3414 QualType Type = VD->getType().getNonReferenceType(); 3415 // If the private variable is a local variable with lvalue ref type, 3416 // allocate the pointer instead of the pointee type. 3417 if (Pair.second.isLocalPrivate()) { 3418 if (VD->getType()->isLValueReferenceType()) 3419 Type = C.getPointerType(Type); 3420 if (isAllocatableDecl(VD)) 3421 Type = C.getPointerType(Type); 3422 } 3423 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3424 if (VD->hasAttrs()) { 3425 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3426 E(VD->getAttrs().end()); 3427 I != E; ++I) 3428 FD->addAttr(*I); 3429 } 3430 } 3431 RD->completeDefinition(); 3432 return RD; 3433 } 3434 return nullptr; 3435 } 3436 3437 static RecordDecl * 3438 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3439 QualType KmpInt32Ty, 3440 QualType KmpRoutineEntryPointerQTy) { 3441 ASTContext &C = CGM.getContext(); 3442 // Build struct kmp_task_t { 3443 // void * shareds; 3444 // kmp_routine_entry_t routine; 3445 // kmp_int32 part_id; 3446 // kmp_cmplrdata_t data1; 3447 // kmp_cmplrdata_t data2; 3448 // For taskloops additional fields: 3449 // kmp_uint64 lb; 3450 // kmp_uint64 ub; 3451 // kmp_int64 st; 3452 // kmp_int32 liter; 3453 // void * reductions; 3454 // }; 3455 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3456 UD->startDefinition(); 3457 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3458 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3459 UD->completeDefinition(); 3460 QualType KmpCmplrdataTy = C.getRecordType(UD); 3461 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3462 RD->startDefinition(); 3463 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3464 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3465 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3466 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3467 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3468 if (isOpenMPTaskLoopDirective(Kind)) { 3469 QualType KmpUInt64Ty = 3470 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3471 QualType KmpInt64Ty = 3472 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3473 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3474 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3475 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3476 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3477 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3478 } 3479 RD->completeDefinition(); 3480 return RD; 3481 } 3482 3483 static RecordDecl * 3484 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3485 ArrayRef<PrivateDataTy> Privates) { 3486 ASTContext &C = CGM.getContext(); 3487 // Build struct kmp_task_t_with_privates { 3488 // kmp_task_t task_data; 3489 // .kmp_privates_t. privates; 3490 // }; 3491 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3492 RD->startDefinition(); 3493 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3494 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3495 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3496 RD->completeDefinition(); 3497 return RD; 3498 } 3499 3500 /// Emit a proxy function which accepts kmp_task_t as the second 3501 /// argument. 3502 /// \code 3503 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3504 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3505 /// For taskloops: 3506 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3507 /// tt->reductions, tt->shareds); 3508 /// return 0; 3509 /// } 3510 /// \endcode 3511 static llvm::Function * 3512 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3513 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3514 QualType KmpTaskTWithPrivatesPtrQTy, 3515 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3516 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3517 llvm::Value *TaskPrivatesMap) { 3518 ASTContext &C = CGM.getContext(); 3519 FunctionArgList Args; 3520 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3521 ImplicitParamDecl::Other); 3522 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3523 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3524 ImplicitParamDecl::Other); 3525 Args.push_back(&GtidArg); 3526 Args.push_back(&TaskTypeArg); 3527 const auto &TaskEntryFnInfo = 3528 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3529 llvm::FunctionType *TaskEntryTy = 3530 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3531 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3532 auto *TaskEntry = llvm::Function::Create( 3533 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3534 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3535 TaskEntry->setDoesNotRecurse(); 3536 CodeGenFunction CGF(CGM); 3537 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3538 Loc, Loc); 3539 3540 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3541 // tt, 3542 // For taskloops: 3543 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3544 // tt->task_data.shareds); 3545 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3546 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3547 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3548 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3549 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3550 const auto *KmpTaskTWithPrivatesQTyRD = 3551 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3552 LValue Base = 3553 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3554 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3555 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3556 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3557 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3558 3559 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3560 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3561 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3562 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3563 CGF.ConvertTypeForMem(SharedsPtrTy)); 3564 3565 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3566 llvm::Value *PrivatesParam; 3567 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3568 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3569 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3570 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3571 } else { 3572 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3573 } 3574 3575 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3576 TaskPrivatesMap, 3577 CGF.Builder 3578 .CreatePointerBitCastOrAddrSpaceCast( 3579 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3580 .getPointer()}; 3581 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3582 std::end(CommonArgs)); 3583 if (isOpenMPTaskLoopDirective(Kind)) { 3584 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3585 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3586 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3587 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3588 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3589 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3590 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3591 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3592 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3593 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3594 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3595 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3596 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3597 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3598 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3599 CallArgs.push_back(LBParam); 3600 CallArgs.push_back(UBParam); 3601 CallArgs.push_back(StParam); 3602 CallArgs.push_back(LIParam); 3603 CallArgs.push_back(RParam); 3604 } 3605 CallArgs.push_back(SharedsParam); 3606 3607 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3608 CallArgs); 3609 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3610 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3611 CGF.FinishFunction(); 3612 return TaskEntry; 3613 } 3614 3615 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3616 SourceLocation Loc, 3617 QualType KmpInt32Ty, 3618 QualType KmpTaskTWithPrivatesPtrQTy, 3619 QualType KmpTaskTWithPrivatesQTy) { 3620 ASTContext &C = CGM.getContext(); 3621 FunctionArgList Args; 3622 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3623 ImplicitParamDecl::Other); 3624 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3625 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3626 ImplicitParamDecl::Other); 3627 Args.push_back(&GtidArg); 3628 Args.push_back(&TaskTypeArg); 3629 const auto &DestructorFnInfo = 3630 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3631 llvm::FunctionType *DestructorFnTy = 3632 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3633 std::string Name = 3634 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3635 auto *DestructorFn = 3636 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3637 Name, &CGM.getModule()); 3638 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3639 DestructorFnInfo); 3640 DestructorFn->setDoesNotRecurse(); 3641 CodeGenFunction CGF(CGM); 3642 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3643 Args, Loc, Loc); 3644 3645 LValue Base = CGF.EmitLoadOfPointerLValue( 3646 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3647 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3648 const auto *KmpTaskTWithPrivatesQTyRD = 3649 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3650 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3651 Base = CGF.EmitLValueForField(Base, *FI); 3652 for (const auto *Field : 3653 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3654 if (QualType::DestructionKind DtorKind = 3655 Field->getType().isDestructedType()) { 3656 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3657 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3658 } 3659 } 3660 CGF.FinishFunction(); 3661 return DestructorFn; 3662 } 3663 3664 /// Emit a privates mapping function for correct handling of private and 3665 /// firstprivate variables. 3666 /// \code 3667 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3668 /// **noalias priv1,..., <tyn> **noalias privn) { 3669 /// *priv1 = &.privates.priv1; 3670 /// ...; 3671 /// *privn = &.privates.privn; 3672 /// } 3673 /// \endcode 3674 static llvm::Value * 3675 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3676 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3677 ArrayRef<PrivateDataTy> Privates) { 3678 ASTContext &C = CGM.getContext(); 3679 FunctionArgList Args; 3680 ImplicitParamDecl TaskPrivatesArg( 3681 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3682 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3683 ImplicitParamDecl::Other); 3684 Args.push_back(&TaskPrivatesArg); 3685 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3686 unsigned Counter = 1; 3687 for (const Expr *E : Data.PrivateVars) { 3688 Args.push_back(ImplicitParamDecl::Create( 3689 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3690 C.getPointerType(C.getPointerType(E->getType())) 3691 .withConst() 3692 .withRestrict(), 3693 ImplicitParamDecl::Other)); 3694 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3695 PrivateVarsPos[VD] = Counter; 3696 ++Counter; 3697 } 3698 for (const Expr *E : Data.FirstprivateVars) { 3699 Args.push_back(ImplicitParamDecl::Create( 3700 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3701 C.getPointerType(C.getPointerType(E->getType())) 3702 .withConst() 3703 .withRestrict(), 3704 ImplicitParamDecl::Other)); 3705 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3706 PrivateVarsPos[VD] = Counter; 3707 ++Counter; 3708 } 3709 for (const Expr *E : Data.LastprivateVars) { 3710 Args.push_back(ImplicitParamDecl::Create( 3711 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3712 C.getPointerType(C.getPointerType(E->getType())) 3713 .withConst() 3714 .withRestrict(), 3715 ImplicitParamDecl::Other)); 3716 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3717 PrivateVarsPos[VD] = Counter; 3718 ++Counter; 3719 } 3720 for (const VarDecl *VD : Data.PrivateLocals) { 3721 QualType Ty = VD->getType().getNonReferenceType(); 3722 if (VD->getType()->isLValueReferenceType()) 3723 Ty = C.getPointerType(Ty); 3724 if (isAllocatableDecl(VD)) 3725 Ty = C.getPointerType(Ty); 3726 Args.push_back(ImplicitParamDecl::Create( 3727 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3728 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3729 ImplicitParamDecl::Other)); 3730 PrivateVarsPos[VD] = Counter; 3731 ++Counter; 3732 } 3733 const auto &TaskPrivatesMapFnInfo = 3734 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3735 llvm::FunctionType *TaskPrivatesMapTy = 3736 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3737 std::string Name = 3738 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3739 auto *TaskPrivatesMap = llvm::Function::Create( 3740 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3741 &CGM.getModule()); 3742 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3743 TaskPrivatesMapFnInfo); 3744 if (CGM.getLangOpts().Optimize) { 3745 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3746 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3747 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3748 } 3749 CodeGenFunction CGF(CGM); 3750 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3751 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3752 3753 // *privi = &.privates.privi; 3754 LValue Base = CGF.EmitLoadOfPointerLValue( 3755 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3756 TaskPrivatesArg.getType()->castAs<PointerType>()); 3757 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3758 Counter = 0; 3759 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3760 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3761 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3762 LValue RefLVal = 3763 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3764 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3765 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3766 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3767 ++Counter; 3768 } 3769 CGF.FinishFunction(); 3770 return TaskPrivatesMap; 3771 } 3772 3773 /// Emit initialization for private variables in task-based directives. 3774 static void emitPrivatesInit(CodeGenFunction &CGF, 3775 const OMPExecutableDirective &D, 3776 Address KmpTaskSharedsPtr, LValue TDBase, 3777 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3778 QualType SharedsTy, QualType SharedsPtrTy, 3779 const OMPTaskDataTy &Data, 3780 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3781 ASTContext &C = CGF.getContext(); 3782 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3783 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3784 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3785 ? OMPD_taskloop 3786 : OMPD_task; 3787 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3788 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3789 LValue SrcBase; 3790 bool IsTargetTask = 3791 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3792 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3793 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3794 // PointersArray, SizesArray, and MappersArray. The original variables for 3795 // these arrays are not captured and we get their addresses explicitly. 3796 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3797 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3798 SrcBase = CGF.MakeAddrLValue( 3799 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3800 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3801 SharedsTy); 3802 } 3803 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3804 for (const PrivateDataTy &Pair : Privates) { 3805 // Do not initialize private locals. 3806 if (Pair.second.isLocalPrivate()) { 3807 ++FI; 3808 continue; 3809 } 3810 const VarDecl *VD = Pair.second.PrivateCopy; 3811 const Expr *Init = VD->getAnyInitializer(); 3812 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3813 !CGF.isTrivialInitializer(Init)))) { 3814 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3815 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3816 const VarDecl *OriginalVD = Pair.second.Original; 3817 // Check if the variable is the target-based BasePointersArray, 3818 // PointersArray, SizesArray, or MappersArray. 3819 LValue SharedRefLValue; 3820 QualType Type = PrivateLValue.getType(); 3821 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3822 if (IsTargetTask && !SharedField) { 3823 assert(isa<ImplicitParamDecl>(OriginalVD) && 3824 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3825 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3826 ->getNumParams() == 0 && 3827 isa<TranslationUnitDecl>( 3828 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3829 ->getDeclContext()) && 3830 "Expected artificial target data variable."); 3831 SharedRefLValue = 3832 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3833 } else if (ForDup) { 3834 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3835 SharedRefLValue = CGF.MakeAddrLValue( 3836 Address(SharedRefLValue.getPointer(CGF), 3837 C.getDeclAlign(OriginalVD)), 3838 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3839 SharedRefLValue.getTBAAInfo()); 3840 } else if (CGF.LambdaCaptureFields.count( 3841 Pair.second.Original->getCanonicalDecl()) > 0 || 3842 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3843 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3844 } else { 3845 // Processing for implicitly captured variables. 3846 InlinedOpenMPRegionRAII Region( 3847 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3848 /*HasCancel=*/false); 3849 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3850 } 3851 if (Type->isArrayType()) { 3852 // Initialize firstprivate array. 3853 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3854 // Perform simple memcpy. 3855 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3856 } else { 3857 // Initialize firstprivate array using element-by-element 3858 // initialization. 3859 CGF.EmitOMPAggregateAssign( 3860 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3861 Type, 3862 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3863 Address SrcElement) { 3864 // Clean up any temporaries needed by the initialization. 3865 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3866 InitScope.addPrivate( 3867 Elem, [SrcElement]() -> Address { return SrcElement; }); 3868 (void)InitScope.Privatize(); 3869 // Emit initialization for single element. 3870 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3871 CGF, &CapturesInfo); 3872 CGF.EmitAnyExprToMem(Init, DestElement, 3873 Init->getType().getQualifiers(), 3874 /*IsInitializer=*/false); 3875 }); 3876 } 3877 } else { 3878 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3879 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3880 return SharedRefLValue.getAddress(CGF); 3881 }); 3882 (void)InitScope.Privatize(); 3883 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3884 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3885 /*capturedByInit=*/false); 3886 } 3887 } else { 3888 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3889 } 3890 } 3891 ++FI; 3892 } 3893 } 3894 3895 /// Check if duplication function is required for taskloops. 3896 static bool checkInitIsRequired(CodeGenFunction &CGF, 3897 ArrayRef<PrivateDataTy> Privates) { 3898 bool InitRequired = false; 3899 for (const PrivateDataTy &Pair : Privates) { 3900 if (Pair.second.isLocalPrivate()) 3901 continue; 3902 const VarDecl *VD = Pair.second.PrivateCopy; 3903 const Expr *Init = VD->getAnyInitializer(); 3904 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3905 !CGF.isTrivialInitializer(Init)); 3906 if (InitRequired) 3907 break; 3908 } 3909 return InitRequired; 3910 } 3911 3912 3913 /// Emit task_dup function (for initialization of 3914 /// private/firstprivate/lastprivate vars and last_iter flag) 3915 /// \code 3916 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3917 /// lastpriv) { 3918 /// // setup lastprivate flag 3919 /// task_dst->last = lastpriv; 3920 /// // could be constructor calls here... 3921 /// } 3922 /// \endcode 3923 static llvm::Value * 3924 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3925 const OMPExecutableDirective &D, 3926 QualType KmpTaskTWithPrivatesPtrQTy, 3927 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3928 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3929 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3930 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3931 ASTContext &C = CGM.getContext(); 3932 FunctionArgList Args; 3933 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3934 KmpTaskTWithPrivatesPtrQTy, 3935 ImplicitParamDecl::Other); 3936 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3937 KmpTaskTWithPrivatesPtrQTy, 3938 ImplicitParamDecl::Other); 3939 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3940 ImplicitParamDecl::Other); 3941 Args.push_back(&DstArg); 3942 Args.push_back(&SrcArg); 3943 Args.push_back(&LastprivArg); 3944 const auto &TaskDupFnInfo = 3945 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3946 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3947 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3948 auto *TaskDup = llvm::Function::Create( 3949 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3950 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3951 TaskDup->setDoesNotRecurse(); 3952 CodeGenFunction CGF(CGM); 3953 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3954 Loc); 3955 3956 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3957 CGF.GetAddrOfLocalVar(&DstArg), 3958 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3959 // task_dst->liter = lastpriv; 3960 if (WithLastIter) { 3961 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3962 LValue Base = CGF.EmitLValueForField( 3963 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3964 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3965 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3966 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3967 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3968 } 3969 3970 // Emit initial values for private copies (if any). 3971 assert(!Privates.empty()); 3972 Address KmpTaskSharedsPtr = Address::invalid(); 3973 if (!Data.FirstprivateVars.empty()) { 3974 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3975 CGF.GetAddrOfLocalVar(&SrcArg), 3976 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3977 LValue Base = CGF.EmitLValueForField( 3978 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3979 KmpTaskSharedsPtr = Address( 3980 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3981 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3982 KmpTaskTShareds)), 3983 Loc), 3984 CGM.getNaturalTypeAlignment(SharedsTy)); 3985 } 3986 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3987 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3988 CGF.FinishFunction(); 3989 return TaskDup; 3990 } 3991 3992 /// Checks if destructor function is required to be generated. 3993 /// \return true if cleanups are required, false otherwise. 3994 static bool 3995 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3996 ArrayRef<PrivateDataTy> Privates) { 3997 for (const PrivateDataTy &P : Privates) { 3998 if (P.second.isLocalPrivate()) 3999 continue; 4000 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4001 if (Ty.isDestructedType()) 4002 return true; 4003 } 4004 return false; 4005 } 4006 4007 namespace { 4008 /// Loop generator for OpenMP iterator expression. 4009 class OMPIteratorGeneratorScope final 4010 : public CodeGenFunction::OMPPrivateScope { 4011 CodeGenFunction &CGF; 4012 const OMPIteratorExpr *E = nullptr; 4013 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4014 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4015 OMPIteratorGeneratorScope() = delete; 4016 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4017 4018 public: 4019 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4020 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4021 if (!E) 4022 return; 4023 SmallVector<llvm::Value *, 4> Uppers; 4024 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4025 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4026 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4027 addPrivate(VD, [&CGF, VD]() { 4028 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4029 }); 4030 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4031 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4032 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4033 "counter.addr"); 4034 }); 4035 } 4036 Privatize(); 4037 4038 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4039 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4040 LValue CLVal = 4041 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4042 HelperData.CounterVD->getType()); 4043 // Counter = 0; 4044 CGF.EmitStoreOfScalar( 4045 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4046 CLVal); 4047 CodeGenFunction::JumpDest &ContDest = 4048 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4049 CodeGenFunction::JumpDest &ExitDest = 4050 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4051 // N = <number-of_iterations>; 4052 llvm::Value *N = Uppers[I]; 4053 // cont: 4054 // if (Counter < N) goto body; else goto exit; 4055 CGF.EmitBlock(ContDest.getBlock()); 4056 auto *CVal = 4057 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4058 llvm::Value *Cmp = 4059 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4060 ? CGF.Builder.CreateICmpSLT(CVal, N) 4061 : CGF.Builder.CreateICmpULT(CVal, N); 4062 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4063 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4064 // body: 4065 CGF.EmitBlock(BodyBB); 4066 // Iteri = Begini + Counter * Stepi; 4067 CGF.EmitIgnoredExpr(HelperData.Update); 4068 } 4069 } 4070 ~OMPIteratorGeneratorScope() { 4071 if (!E) 4072 return; 4073 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4074 // Counter = Counter + 1; 4075 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4076 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4077 // goto cont; 4078 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4079 // exit: 4080 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4081 } 4082 } 4083 }; 4084 } // namespace 4085 4086 static std::pair<llvm::Value *, llvm::Value *> 4087 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4088 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4089 llvm::Value *Addr; 4090 if (OASE) { 4091 const Expr *Base = OASE->getBase(); 4092 Addr = CGF.EmitScalarExpr(Base); 4093 } else { 4094 Addr = CGF.EmitLValue(E).getPointer(CGF); 4095 } 4096 llvm::Value *SizeVal; 4097 QualType Ty = E->getType(); 4098 if (OASE) { 4099 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4100 for (const Expr *SE : OASE->getDimensions()) { 4101 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4102 Sz = CGF.EmitScalarConversion( 4103 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4104 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4105 } 4106 } else if (const auto *ASE = 4107 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4108 LValue UpAddrLVal = 4109 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4110 llvm::Value *UpAddr = 4111 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4112 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4113 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4114 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4115 } else { 4116 SizeVal = CGF.getTypeSize(Ty); 4117 } 4118 return std::make_pair(Addr, SizeVal); 4119 } 4120 4121 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4122 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4123 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4124 if (KmpTaskAffinityInfoTy.isNull()) { 4125 RecordDecl *KmpAffinityInfoRD = 4126 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4127 KmpAffinityInfoRD->startDefinition(); 4128 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4129 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4130 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4131 KmpAffinityInfoRD->completeDefinition(); 4132 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4133 } 4134 } 4135 4136 CGOpenMPRuntime::TaskResultTy 4137 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4138 const OMPExecutableDirective &D, 4139 llvm::Function *TaskFunction, QualType SharedsTy, 4140 Address Shareds, const OMPTaskDataTy &Data) { 4141 ASTContext &C = CGM.getContext(); 4142 llvm::SmallVector<PrivateDataTy, 4> Privates; 4143 // Aggregate privates and sort them by the alignment. 4144 const auto *I = Data.PrivateCopies.begin(); 4145 for (const Expr *E : Data.PrivateVars) { 4146 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4147 Privates.emplace_back( 4148 C.getDeclAlign(VD), 4149 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4150 /*PrivateElemInit=*/nullptr)); 4151 ++I; 4152 } 4153 I = Data.FirstprivateCopies.begin(); 4154 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4155 for (const Expr *E : Data.FirstprivateVars) { 4156 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4157 Privates.emplace_back( 4158 C.getDeclAlign(VD), 4159 PrivateHelpersTy( 4160 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4161 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4162 ++I; 4163 ++IElemInitRef; 4164 } 4165 I = Data.LastprivateCopies.begin(); 4166 for (const Expr *E : Data.LastprivateVars) { 4167 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4168 Privates.emplace_back( 4169 C.getDeclAlign(VD), 4170 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4171 /*PrivateElemInit=*/nullptr)); 4172 ++I; 4173 } 4174 for (const VarDecl *VD : Data.PrivateLocals) { 4175 if (isAllocatableDecl(VD)) 4176 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4177 else 4178 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4179 } 4180 llvm::stable_sort(Privates, 4181 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4182 return L.first > R.first; 4183 }); 4184 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4185 // Build type kmp_routine_entry_t (if not built yet). 4186 emitKmpRoutineEntryT(KmpInt32Ty); 4187 // Build type kmp_task_t (if not built yet). 4188 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4189 if (SavedKmpTaskloopTQTy.isNull()) { 4190 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4191 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4192 } 4193 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4194 } else { 4195 assert((D.getDirectiveKind() == OMPD_task || 4196 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4197 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4198 "Expected taskloop, task or target directive"); 4199 if (SavedKmpTaskTQTy.isNull()) { 4200 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4201 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4202 } 4203 KmpTaskTQTy = SavedKmpTaskTQTy; 4204 } 4205 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4206 // Build particular struct kmp_task_t for the given task. 4207 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4208 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4209 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4210 QualType KmpTaskTWithPrivatesPtrQTy = 4211 C.getPointerType(KmpTaskTWithPrivatesQTy); 4212 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4213 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4214 KmpTaskTWithPrivatesTy->getPointerTo(); 4215 llvm::Value *KmpTaskTWithPrivatesTySize = 4216 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4217 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4218 4219 // Emit initial values for private copies (if any). 4220 llvm::Value *TaskPrivatesMap = nullptr; 4221 llvm::Type *TaskPrivatesMapTy = 4222 std::next(TaskFunction->arg_begin(), 3)->getType(); 4223 if (!Privates.empty()) { 4224 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4225 TaskPrivatesMap = 4226 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4227 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4228 TaskPrivatesMap, TaskPrivatesMapTy); 4229 } else { 4230 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4231 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4232 } 4233 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4234 // kmp_task_t *tt); 4235 llvm::Function *TaskEntry = emitProxyTaskFunction( 4236 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4237 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4238 TaskPrivatesMap); 4239 4240 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4241 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4242 // kmp_routine_entry_t *task_entry); 4243 // Task flags. Format is taken from 4244 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4245 // description of kmp_tasking_flags struct. 4246 enum { 4247 TiedFlag = 0x1, 4248 FinalFlag = 0x2, 4249 DestructorsFlag = 0x8, 4250 PriorityFlag = 0x20, 4251 DetachableFlag = 0x40, 4252 }; 4253 unsigned Flags = Data.Tied ? TiedFlag : 0; 4254 bool NeedsCleanup = false; 4255 if (!Privates.empty()) { 4256 NeedsCleanup = 4257 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4258 if (NeedsCleanup) 4259 Flags = Flags | DestructorsFlag; 4260 } 4261 if (Data.Priority.getInt()) 4262 Flags = Flags | PriorityFlag; 4263 if (D.hasClausesOfKind<OMPDetachClause>()) 4264 Flags = Flags | DetachableFlag; 4265 llvm::Value *TaskFlags = 4266 Data.Final.getPointer() 4267 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4268 CGF.Builder.getInt32(FinalFlag), 4269 CGF.Builder.getInt32(/*C=*/0)) 4270 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4271 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4272 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4273 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4274 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4275 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4276 TaskEntry, KmpRoutineEntryPtrTy)}; 4277 llvm::Value *NewTask; 4278 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4279 // Check if we have any device clause associated with the directive. 4280 const Expr *Device = nullptr; 4281 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4282 Device = C->getDevice(); 4283 // Emit device ID if any otherwise use default value. 4284 llvm::Value *DeviceID; 4285 if (Device) 4286 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4287 CGF.Int64Ty, /*isSigned=*/true); 4288 else 4289 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4290 AllocArgs.push_back(DeviceID); 4291 NewTask = CGF.EmitRuntimeCall( 4292 OMPBuilder.getOrCreateRuntimeFunction( 4293 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4294 AllocArgs); 4295 } else { 4296 NewTask = 4297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4298 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4299 AllocArgs); 4300 } 4301 // Emit detach clause initialization. 4302 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4303 // task_descriptor); 4304 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4305 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4306 LValue EvtLVal = CGF.EmitLValue(Evt); 4307 4308 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4309 // int gtid, kmp_task_t *task); 4310 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4311 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4312 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4313 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4314 OMPBuilder.getOrCreateRuntimeFunction( 4315 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4316 {Loc, Tid, NewTask}); 4317 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4318 Evt->getExprLoc()); 4319 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4320 } 4321 // Process affinity clauses. 4322 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4323 // Process list of affinity data. 4324 ASTContext &C = CGM.getContext(); 4325 Address AffinitiesArray = Address::invalid(); 4326 // Calculate number of elements to form the array of affinity data. 4327 llvm::Value *NumOfElements = nullptr; 4328 unsigned NumAffinities = 0; 4329 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4330 if (const Expr *Modifier = C->getModifier()) { 4331 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4332 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4333 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4334 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4335 NumOfElements = 4336 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4337 } 4338 } else { 4339 NumAffinities += C->varlist_size(); 4340 } 4341 } 4342 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4343 // Fields ids in kmp_task_affinity_info record. 4344 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4345 4346 QualType KmpTaskAffinityInfoArrayTy; 4347 if (NumOfElements) { 4348 NumOfElements = CGF.Builder.CreateNUWAdd( 4349 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4350 OpaqueValueExpr OVE( 4351 Loc, 4352 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4353 VK_RValue); 4354 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4355 RValue::get(NumOfElements)); 4356 KmpTaskAffinityInfoArrayTy = 4357 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4358 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4359 // Properly emit variable-sized array. 4360 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4361 ImplicitParamDecl::Other); 4362 CGF.EmitVarDecl(*PD); 4363 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4364 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4365 /*isSigned=*/false); 4366 } else { 4367 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4368 KmpTaskAffinityInfoTy, 4369 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4370 ArrayType::Normal, /*IndexTypeQuals=*/0); 4371 AffinitiesArray = 4372 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4373 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4374 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4375 /*isSigned=*/false); 4376 } 4377 4378 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4379 // Fill array by elements without iterators. 4380 unsigned Pos = 0; 4381 bool HasIterator = false; 4382 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4383 if (C->getModifier()) { 4384 HasIterator = true; 4385 continue; 4386 } 4387 for (const Expr *E : C->varlists()) { 4388 llvm::Value *Addr; 4389 llvm::Value *Size; 4390 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4391 LValue Base = 4392 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4393 KmpTaskAffinityInfoTy); 4394 // affs[i].base_addr = &<Affinities[i].second>; 4395 LValue BaseAddrLVal = CGF.EmitLValueForField( 4396 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4397 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4398 BaseAddrLVal); 4399 // affs[i].len = sizeof(<Affinities[i].second>); 4400 LValue LenLVal = CGF.EmitLValueForField( 4401 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4402 CGF.EmitStoreOfScalar(Size, LenLVal); 4403 ++Pos; 4404 } 4405 } 4406 LValue PosLVal; 4407 if (HasIterator) { 4408 PosLVal = CGF.MakeAddrLValue( 4409 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4410 C.getSizeType()); 4411 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4412 } 4413 // Process elements with iterators. 4414 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4415 const Expr *Modifier = C->getModifier(); 4416 if (!Modifier) 4417 continue; 4418 OMPIteratorGeneratorScope IteratorScope( 4419 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4420 for (const Expr *E : C->varlists()) { 4421 llvm::Value *Addr; 4422 llvm::Value *Size; 4423 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4424 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4425 LValue Base = CGF.MakeAddrLValue( 4426 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4427 AffinitiesArray.getAlignment()), 4428 KmpTaskAffinityInfoTy); 4429 // affs[i].base_addr = &<Affinities[i].second>; 4430 LValue BaseAddrLVal = CGF.EmitLValueForField( 4431 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4432 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4433 BaseAddrLVal); 4434 // affs[i].len = sizeof(<Affinities[i].second>); 4435 LValue LenLVal = CGF.EmitLValueForField( 4436 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4437 CGF.EmitStoreOfScalar(Size, LenLVal); 4438 Idx = CGF.Builder.CreateNUWAdd( 4439 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4440 CGF.EmitStoreOfScalar(Idx, PosLVal); 4441 } 4442 } 4443 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4444 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4445 // naffins, kmp_task_affinity_info_t *affin_list); 4446 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4447 llvm::Value *GTid = getThreadID(CGF, Loc); 4448 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4449 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4450 // FIXME: Emit the function and ignore its result for now unless the 4451 // runtime function is properly implemented. 4452 (void)CGF.EmitRuntimeCall( 4453 OMPBuilder.getOrCreateRuntimeFunction( 4454 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4455 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4456 } 4457 llvm::Value *NewTaskNewTaskTTy = 4458 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4459 NewTask, KmpTaskTWithPrivatesPtrTy); 4460 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4461 KmpTaskTWithPrivatesQTy); 4462 LValue TDBase = 4463 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4464 // Fill the data in the resulting kmp_task_t record. 4465 // Copy shareds if there are any. 4466 Address KmpTaskSharedsPtr = Address::invalid(); 4467 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4468 KmpTaskSharedsPtr = 4469 Address(CGF.EmitLoadOfScalar( 4470 CGF.EmitLValueForField( 4471 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4472 KmpTaskTShareds)), 4473 Loc), 4474 CGM.getNaturalTypeAlignment(SharedsTy)); 4475 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4476 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4477 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4478 } 4479 // Emit initial values for private copies (if any). 4480 TaskResultTy Result; 4481 if (!Privates.empty()) { 4482 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4483 SharedsTy, SharedsPtrTy, Data, Privates, 4484 /*ForDup=*/false); 4485 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4486 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4487 Result.TaskDupFn = emitTaskDupFunction( 4488 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4489 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4490 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4491 } 4492 } 4493 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4494 enum { Priority = 0, Destructors = 1 }; 4495 // Provide pointer to function with destructors for privates. 4496 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4497 const RecordDecl *KmpCmplrdataUD = 4498 (*FI)->getType()->getAsUnionType()->getDecl(); 4499 if (NeedsCleanup) { 4500 llvm::Value *DestructorFn = emitDestructorsFunction( 4501 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4502 KmpTaskTWithPrivatesQTy); 4503 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4504 LValue DestructorsLV = CGF.EmitLValueForField( 4505 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4506 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4507 DestructorFn, KmpRoutineEntryPtrTy), 4508 DestructorsLV); 4509 } 4510 // Set priority. 4511 if (Data.Priority.getInt()) { 4512 LValue Data2LV = CGF.EmitLValueForField( 4513 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4514 LValue PriorityLV = CGF.EmitLValueForField( 4515 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4516 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4517 } 4518 Result.NewTask = NewTask; 4519 Result.TaskEntry = TaskEntry; 4520 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4521 Result.TDBase = TDBase; 4522 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4523 return Result; 4524 } 4525 4526 namespace { 4527 /// Dependence kind for RTL. 4528 enum RTLDependenceKindTy { 4529 DepIn = 0x01, 4530 DepInOut = 0x3, 4531 DepMutexInOutSet = 0x4 4532 }; 4533 /// Fields ids in kmp_depend_info record. 4534 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4535 } // namespace 4536 4537 /// Translates internal dependency kind into the runtime kind. 4538 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4539 RTLDependenceKindTy DepKind; 4540 switch (K) { 4541 case OMPC_DEPEND_in: 4542 DepKind = DepIn; 4543 break; 4544 // Out and InOut dependencies must use the same code. 4545 case OMPC_DEPEND_out: 4546 case OMPC_DEPEND_inout: 4547 DepKind = DepInOut; 4548 break; 4549 case OMPC_DEPEND_mutexinoutset: 4550 DepKind = DepMutexInOutSet; 4551 break; 4552 case OMPC_DEPEND_source: 4553 case OMPC_DEPEND_sink: 4554 case OMPC_DEPEND_depobj: 4555 case OMPC_DEPEND_unknown: 4556 llvm_unreachable("Unknown task dependence type"); 4557 } 4558 return DepKind; 4559 } 4560 4561 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4562 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4563 QualType &FlagsTy) { 4564 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4565 if (KmpDependInfoTy.isNull()) { 4566 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4567 KmpDependInfoRD->startDefinition(); 4568 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4569 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4570 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4571 KmpDependInfoRD->completeDefinition(); 4572 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4573 } 4574 } 4575 4576 std::pair<llvm::Value *, LValue> 4577 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4578 SourceLocation Loc) { 4579 ASTContext &C = CGM.getContext(); 4580 QualType FlagsTy; 4581 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4582 RecordDecl *KmpDependInfoRD = 4583 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4584 LValue Base = CGF.EmitLoadOfPointerLValue( 4585 DepobjLVal.getAddress(CGF), 4586 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4587 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4588 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4589 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4590 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4591 Base.getTBAAInfo()); 4592 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4593 Addr.getPointer(), 4594 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4595 LValue NumDepsBase = CGF.MakeAddrLValue( 4596 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4597 Base.getBaseInfo(), Base.getTBAAInfo()); 4598 // NumDeps = deps[i].base_addr; 4599 LValue BaseAddrLVal = CGF.EmitLValueForField( 4600 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4601 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4602 return std::make_pair(NumDeps, Base); 4603 } 4604 4605 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4606 llvm::PointerUnion<unsigned *, LValue *> Pos, 4607 const OMPTaskDataTy::DependData &Data, 4608 Address DependenciesArray) { 4609 CodeGenModule &CGM = CGF.CGM; 4610 ASTContext &C = CGM.getContext(); 4611 QualType FlagsTy; 4612 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4613 RecordDecl *KmpDependInfoRD = 4614 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4615 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4616 4617 OMPIteratorGeneratorScope IteratorScope( 4618 CGF, cast_or_null<OMPIteratorExpr>( 4619 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4620 : nullptr)); 4621 for (const Expr *E : Data.DepExprs) { 4622 llvm::Value *Addr; 4623 llvm::Value *Size; 4624 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4625 LValue Base; 4626 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4627 Base = CGF.MakeAddrLValue( 4628 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4629 } else { 4630 LValue &PosLVal = *Pos.get<LValue *>(); 4631 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4632 Base = CGF.MakeAddrLValue( 4633 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4634 DependenciesArray.getAlignment()), 4635 KmpDependInfoTy); 4636 } 4637 // deps[i].base_addr = &<Dependencies[i].second>; 4638 LValue BaseAddrLVal = CGF.EmitLValueForField( 4639 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4640 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4641 BaseAddrLVal); 4642 // deps[i].len = sizeof(<Dependencies[i].second>); 4643 LValue LenLVal = CGF.EmitLValueForField( 4644 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4645 CGF.EmitStoreOfScalar(Size, LenLVal); 4646 // deps[i].flags = <Dependencies[i].first>; 4647 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4648 LValue FlagsLVal = CGF.EmitLValueForField( 4649 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4650 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4651 FlagsLVal); 4652 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4653 ++(*P); 4654 } else { 4655 LValue &PosLVal = *Pos.get<LValue *>(); 4656 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4657 Idx = CGF.Builder.CreateNUWAdd(Idx, 4658 llvm::ConstantInt::get(Idx->getType(), 1)); 4659 CGF.EmitStoreOfScalar(Idx, PosLVal); 4660 } 4661 } 4662 } 4663 4664 static SmallVector<llvm::Value *, 4> 4665 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4666 const OMPTaskDataTy::DependData &Data) { 4667 assert(Data.DepKind == OMPC_DEPEND_depobj && 4668 "Expected depobj dependecy kind."); 4669 SmallVector<llvm::Value *, 4> Sizes; 4670 SmallVector<LValue, 4> SizeLVals; 4671 ASTContext &C = CGF.getContext(); 4672 QualType FlagsTy; 4673 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4674 RecordDecl *KmpDependInfoRD = 4675 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4676 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4677 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4678 { 4679 OMPIteratorGeneratorScope IteratorScope( 4680 CGF, cast_or_null<OMPIteratorExpr>( 4681 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4682 : nullptr)); 4683 for (const Expr *E : Data.DepExprs) { 4684 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4685 LValue Base = CGF.EmitLoadOfPointerLValue( 4686 DepobjLVal.getAddress(CGF), 4687 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4688 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4689 Base.getAddress(CGF), KmpDependInfoPtrT); 4690 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4691 Base.getTBAAInfo()); 4692 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4693 Addr.getPointer(), 4694 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4695 LValue NumDepsBase = CGF.MakeAddrLValue( 4696 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4697 Base.getBaseInfo(), Base.getTBAAInfo()); 4698 // NumDeps = deps[i].base_addr; 4699 LValue BaseAddrLVal = CGF.EmitLValueForField( 4700 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4701 llvm::Value *NumDeps = 4702 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4703 LValue NumLVal = CGF.MakeAddrLValue( 4704 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4705 C.getUIntPtrType()); 4706 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4707 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4708 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4709 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4710 CGF.EmitStoreOfScalar(Add, NumLVal); 4711 SizeLVals.push_back(NumLVal); 4712 } 4713 } 4714 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4715 llvm::Value *Size = 4716 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4717 Sizes.push_back(Size); 4718 } 4719 return Sizes; 4720 } 4721 4722 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4723 LValue PosLVal, 4724 const OMPTaskDataTy::DependData &Data, 4725 Address DependenciesArray) { 4726 assert(Data.DepKind == OMPC_DEPEND_depobj && 4727 "Expected depobj dependecy kind."); 4728 ASTContext &C = CGF.getContext(); 4729 QualType FlagsTy; 4730 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4731 RecordDecl *KmpDependInfoRD = 4732 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4733 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4734 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4735 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4736 { 4737 OMPIteratorGeneratorScope IteratorScope( 4738 CGF, cast_or_null<OMPIteratorExpr>( 4739 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4740 : nullptr)); 4741 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4742 const Expr *E = Data.DepExprs[I]; 4743 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4744 LValue Base = CGF.EmitLoadOfPointerLValue( 4745 DepobjLVal.getAddress(CGF), 4746 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4747 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4748 Base.getAddress(CGF), KmpDependInfoPtrT); 4749 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4750 Base.getTBAAInfo()); 4751 4752 // Get number of elements in a single depobj. 4753 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4754 Addr.getPointer(), 4755 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4756 LValue NumDepsBase = CGF.MakeAddrLValue( 4757 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4758 Base.getBaseInfo(), Base.getTBAAInfo()); 4759 // NumDeps = deps[i].base_addr; 4760 LValue BaseAddrLVal = CGF.EmitLValueForField( 4761 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4762 llvm::Value *NumDeps = 4763 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4764 4765 // memcopy dependency data. 4766 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4767 ElSize, 4768 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4769 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4770 Address DepAddr = 4771 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4772 DependenciesArray.getAlignment()); 4773 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4774 4775 // Increase pos. 4776 // pos += size; 4777 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4778 CGF.EmitStoreOfScalar(Add, PosLVal); 4779 } 4780 } 4781 } 4782 4783 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4784 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4785 SourceLocation Loc) { 4786 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4787 return D.DepExprs.empty(); 4788 })) 4789 return std::make_pair(nullptr, Address::invalid()); 4790 // Process list of dependencies. 4791 ASTContext &C = CGM.getContext(); 4792 Address DependenciesArray = Address::invalid(); 4793 llvm::Value *NumOfElements = nullptr; 4794 unsigned NumDependencies = std::accumulate( 4795 Dependencies.begin(), Dependencies.end(), 0, 4796 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4797 return D.DepKind == OMPC_DEPEND_depobj 4798 ? V 4799 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4800 }); 4801 QualType FlagsTy; 4802 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4803 bool HasDepobjDeps = false; 4804 bool HasRegularWithIterators = false; 4805 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4806 llvm::Value *NumOfRegularWithIterators = 4807 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4808 // Calculate number of depobj dependecies and regular deps with the iterators. 4809 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4810 if (D.DepKind == OMPC_DEPEND_depobj) { 4811 SmallVector<llvm::Value *, 4> Sizes = 4812 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4813 for (llvm::Value *Size : Sizes) { 4814 NumOfDepobjElements = 4815 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4816 } 4817 HasDepobjDeps = true; 4818 continue; 4819 } 4820 // Include number of iterations, if any. 4821 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4822 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4823 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4824 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4825 NumOfRegularWithIterators = 4826 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4827 } 4828 HasRegularWithIterators = true; 4829 continue; 4830 } 4831 } 4832 4833 QualType KmpDependInfoArrayTy; 4834 if (HasDepobjDeps || HasRegularWithIterators) { 4835 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4836 /*isSigned=*/false); 4837 if (HasDepobjDeps) { 4838 NumOfElements = 4839 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4840 } 4841 if (HasRegularWithIterators) { 4842 NumOfElements = 4843 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4844 } 4845 OpaqueValueExpr OVE(Loc, 4846 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4847 VK_RValue); 4848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4849 RValue::get(NumOfElements)); 4850 KmpDependInfoArrayTy = 4851 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4852 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4853 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4854 // Properly emit variable-sized array. 4855 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4856 ImplicitParamDecl::Other); 4857 CGF.EmitVarDecl(*PD); 4858 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4859 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4860 /*isSigned=*/false); 4861 } else { 4862 KmpDependInfoArrayTy = C.getConstantArrayType( 4863 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4864 ArrayType::Normal, /*IndexTypeQuals=*/0); 4865 DependenciesArray = 4866 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4867 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4868 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4869 /*isSigned=*/false); 4870 } 4871 unsigned Pos = 0; 4872 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4873 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4874 Dependencies[I].IteratorExpr) 4875 continue; 4876 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4877 DependenciesArray); 4878 } 4879 // Copy regular dependecies with iterators. 4880 LValue PosLVal = CGF.MakeAddrLValue( 4881 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4882 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4883 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4884 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4885 !Dependencies[I].IteratorExpr) 4886 continue; 4887 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4888 DependenciesArray); 4889 } 4890 // Copy final depobj arrays without iterators. 4891 if (HasDepobjDeps) { 4892 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4893 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4894 continue; 4895 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4896 DependenciesArray); 4897 } 4898 } 4899 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4900 DependenciesArray, CGF.VoidPtrTy); 4901 return std::make_pair(NumOfElements, DependenciesArray); 4902 } 4903 4904 Address CGOpenMPRuntime::emitDepobjDependClause( 4905 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4906 SourceLocation Loc) { 4907 if (Dependencies.DepExprs.empty()) 4908 return Address::invalid(); 4909 // Process list of dependencies. 4910 ASTContext &C = CGM.getContext(); 4911 Address DependenciesArray = Address::invalid(); 4912 unsigned NumDependencies = Dependencies.DepExprs.size(); 4913 QualType FlagsTy; 4914 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4915 RecordDecl *KmpDependInfoRD = 4916 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4917 4918 llvm::Value *Size; 4919 // Define type kmp_depend_info[<Dependencies.size()>]; 4920 // For depobj reserve one extra element to store the number of elements. 4921 // It is required to handle depobj(x) update(in) construct. 4922 // kmp_depend_info[<Dependencies.size()>] deps; 4923 llvm::Value *NumDepsVal; 4924 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4925 if (const auto *IE = 4926 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4927 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4928 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4929 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4930 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4931 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4932 } 4933 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4934 NumDepsVal); 4935 CharUnits SizeInBytes = 4936 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4937 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4938 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4939 NumDepsVal = 4940 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4941 } else { 4942 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4943 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4944 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4945 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4946 Size = CGM.getSize(Sz.alignTo(Align)); 4947 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4948 } 4949 // Need to allocate on the dynamic memory. 4950 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4951 // Use default allocator. 4952 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4953 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4954 4955 llvm::Value *Addr = 4956 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4957 CGM.getModule(), OMPRTL___kmpc_alloc), 4958 Args, ".dep.arr.addr"); 4959 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4960 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4961 DependenciesArray = Address(Addr, Align); 4962 // Write number of elements in the first element of array for depobj. 4963 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4964 // deps[i].base_addr = NumDependencies; 4965 LValue BaseAddrLVal = CGF.EmitLValueForField( 4966 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4967 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4968 llvm::PointerUnion<unsigned *, LValue *> Pos; 4969 unsigned Idx = 1; 4970 LValue PosLVal; 4971 if (Dependencies.IteratorExpr) { 4972 PosLVal = CGF.MakeAddrLValue( 4973 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4974 C.getSizeType()); 4975 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4976 /*IsInit=*/true); 4977 Pos = &PosLVal; 4978 } else { 4979 Pos = &Idx; 4980 } 4981 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4982 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4983 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4984 return DependenciesArray; 4985 } 4986 4987 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4988 SourceLocation Loc) { 4989 ASTContext &C = CGM.getContext(); 4990 QualType FlagsTy; 4991 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4992 LValue Base = CGF.EmitLoadOfPointerLValue( 4993 DepobjLVal.getAddress(CGF), 4994 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4995 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4996 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4997 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4998 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4999 Addr.getPointer(), 5000 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5001 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5002 CGF.VoidPtrTy); 5003 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5004 // Use default allocator. 5005 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5006 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5007 5008 // _kmpc_free(gtid, addr, nullptr); 5009 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5010 CGM.getModule(), OMPRTL___kmpc_free), 5011 Args); 5012 } 5013 5014 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5015 OpenMPDependClauseKind NewDepKind, 5016 SourceLocation Loc) { 5017 ASTContext &C = CGM.getContext(); 5018 QualType FlagsTy; 5019 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5020 RecordDecl *KmpDependInfoRD = 5021 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5022 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5023 llvm::Value *NumDeps; 5024 LValue Base; 5025 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5026 5027 Address Begin = Base.getAddress(CGF); 5028 // Cast from pointer to array type to pointer to single element. 5029 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5030 // The basic structure here is a while-do loop. 5031 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5032 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5033 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5034 CGF.EmitBlock(BodyBB); 5035 llvm::PHINode *ElementPHI = 5036 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5037 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5038 Begin = Address(ElementPHI, Begin.getAlignment()); 5039 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5040 Base.getTBAAInfo()); 5041 // deps[i].flags = NewDepKind; 5042 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5043 LValue FlagsLVal = CGF.EmitLValueForField( 5044 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5045 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5046 FlagsLVal); 5047 5048 // Shift the address forward by one element. 5049 Address ElementNext = 5050 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5051 ElementPHI->addIncoming(ElementNext.getPointer(), 5052 CGF.Builder.GetInsertBlock()); 5053 llvm::Value *IsEmpty = 5054 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5055 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5056 // Done. 5057 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5058 } 5059 5060 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5061 const OMPExecutableDirective &D, 5062 llvm::Function *TaskFunction, 5063 QualType SharedsTy, Address Shareds, 5064 const Expr *IfCond, 5065 const OMPTaskDataTy &Data) { 5066 if (!CGF.HaveInsertPoint()) 5067 return; 5068 5069 TaskResultTy Result = 5070 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5071 llvm::Value *NewTask = Result.NewTask; 5072 llvm::Function *TaskEntry = Result.TaskEntry; 5073 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5074 LValue TDBase = Result.TDBase; 5075 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5076 // Process list of dependences. 5077 Address DependenciesArray = Address::invalid(); 5078 llvm::Value *NumOfElements; 5079 std::tie(NumOfElements, DependenciesArray) = 5080 emitDependClause(CGF, Data.Dependences, Loc); 5081 5082 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5083 // libcall. 5084 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5085 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5086 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5087 // list is not empty 5088 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5089 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5090 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5091 llvm::Value *DepTaskArgs[7]; 5092 if (!Data.Dependences.empty()) { 5093 DepTaskArgs[0] = UpLoc; 5094 DepTaskArgs[1] = ThreadID; 5095 DepTaskArgs[2] = NewTask; 5096 DepTaskArgs[3] = NumOfElements; 5097 DepTaskArgs[4] = DependenciesArray.getPointer(); 5098 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5099 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5100 } 5101 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5102 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5103 if (!Data.Tied) { 5104 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5105 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5106 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5107 } 5108 if (!Data.Dependences.empty()) { 5109 CGF.EmitRuntimeCall( 5110 OMPBuilder.getOrCreateRuntimeFunction( 5111 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5112 DepTaskArgs); 5113 } else { 5114 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5115 CGM.getModule(), OMPRTL___kmpc_omp_task), 5116 TaskArgs); 5117 } 5118 // Check if parent region is untied and build return for untied task; 5119 if (auto *Region = 5120 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5121 Region->emitUntiedSwitch(CGF); 5122 }; 5123 5124 llvm::Value *DepWaitTaskArgs[6]; 5125 if (!Data.Dependences.empty()) { 5126 DepWaitTaskArgs[0] = UpLoc; 5127 DepWaitTaskArgs[1] = ThreadID; 5128 DepWaitTaskArgs[2] = NumOfElements; 5129 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5130 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5131 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5132 } 5133 auto &M = CGM.getModule(); 5134 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5135 TaskEntry, &Data, &DepWaitTaskArgs, 5136 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5137 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5138 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5139 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5140 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5141 // is specified. 5142 if (!Data.Dependences.empty()) 5143 CGF.EmitRuntimeCall( 5144 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5145 DepWaitTaskArgs); 5146 // Call proxy_task_entry(gtid, new_task); 5147 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5148 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5149 Action.Enter(CGF); 5150 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5151 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5152 OutlinedFnArgs); 5153 }; 5154 5155 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5156 // kmp_task_t *new_task); 5157 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5158 // kmp_task_t *new_task); 5159 RegionCodeGenTy RCG(CodeGen); 5160 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5161 M, OMPRTL___kmpc_omp_task_begin_if0), 5162 TaskArgs, 5163 OMPBuilder.getOrCreateRuntimeFunction( 5164 M, OMPRTL___kmpc_omp_task_complete_if0), 5165 TaskArgs); 5166 RCG.setAction(Action); 5167 RCG(CGF); 5168 }; 5169 5170 if (IfCond) { 5171 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5172 } else { 5173 RegionCodeGenTy ThenRCG(ThenCodeGen); 5174 ThenRCG(CGF); 5175 } 5176 } 5177 5178 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5179 const OMPLoopDirective &D, 5180 llvm::Function *TaskFunction, 5181 QualType SharedsTy, Address Shareds, 5182 const Expr *IfCond, 5183 const OMPTaskDataTy &Data) { 5184 if (!CGF.HaveInsertPoint()) 5185 return; 5186 TaskResultTy Result = 5187 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5188 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5189 // libcall. 5190 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5191 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5192 // sched, kmp_uint64 grainsize, void *task_dup); 5193 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5194 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5195 llvm::Value *IfVal; 5196 if (IfCond) { 5197 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5198 /*isSigned=*/true); 5199 } else { 5200 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5201 } 5202 5203 LValue LBLVal = CGF.EmitLValueForField( 5204 Result.TDBase, 5205 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5206 const auto *LBVar = 5207 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5208 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5209 LBLVal.getQuals(), 5210 /*IsInitializer=*/true); 5211 LValue UBLVal = CGF.EmitLValueForField( 5212 Result.TDBase, 5213 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5214 const auto *UBVar = 5215 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5216 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5217 UBLVal.getQuals(), 5218 /*IsInitializer=*/true); 5219 LValue StLVal = CGF.EmitLValueForField( 5220 Result.TDBase, 5221 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5222 const auto *StVar = 5223 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5224 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5225 StLVal.getQuals(), 5226 /*IsInitializer=*/true); 5227 // Store reductions address. 5228 LValue RedLVal = CGF.EmitLValueForField( 5229 Result.TDBase, 5230 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5231 if (Data.Reductions) { 5232 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5233 } else { 5234 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5235 CGF.getContext().VoidPtrTy); 5236 } 5237 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5238 llvm::Value *TaskArgs[] = { 5239 UpLoc, 5240 ThreadID, 5241 Result.NewTask, 5242 IfVal, 5243 LBLVal.getPointer(CGF), 5244 UBLVal.getPointer(CGF), 5245 CGF.EmitLoadOfScalar(StLVal, Loc), 5246 llvm::ConstantInt::getSigned( 5247 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5248 llvm::ConstantInt::getSigned( 5249 CGF.IntTy, Data.Schedule.getPointer() 5250 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5251 : NoSchedule), 5252 Data.Schedule.getPointer() 5253 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5254 /*isSigned=*/false) 5255 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5256 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5257 Result.TaskDupFn, CGF.VoidPtrTy) 5258 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5259 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5260 CGM.getModule(), OMPRTL___kmpc_taskloop), 5261 TaskArgs); 5262 } 5263 5264 /// Emit reduction operation for each element of array (required for 5265 /// array sections) LHS op = RHS. 5266 /// \param Type Type of array. 5267 /// \param LHSVar Variable on the left side of the reduction operation 5268 /// (references element of array in original variable). 5269 /// \param RHSVar Variable on the right side of the reduction operation 5270 /// (references element of array in original variable). 5271 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5272 /// RHSVar. 5273 static void EmitOMPAggregateReduction( 5274 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5275 const VarDecl *RHSVar, 5276 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5277 const Expr *, const Expr *)> &RedOpGen, 5278 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5279 const Expr *UpExpr = nullptr) { 5280 // Perform element-by-element initialization. 5281 QualType ElementTy; 5282 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5283 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5284 5285 // Drill down to the base element type on both arrays. 5286 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5287 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5288 5289 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5290 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5291 // Cast from pointer to array type to pointer to single element. 5292 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5293 // The basic structure here is a while-do loop. 5294 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5295 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5296 llvm::Value *IsEmpty = 5297 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5298 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5299 5300 // Enter the loop body, making that address the current address. 5301 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5302 CGF.EmitBlock(BodyBB); 5303 5304 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5305 5306 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5307 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5308 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5309 Address RHSElementCurrent = 5310 Address(RHSElementPHI, 5311 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5312 5313 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5314 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5315 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5316 Address LHSElementCurrent = 5317 Address(LHSElementPHI, 5318 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5319 5320 // Emit copy. 5321 CodeGenFunction::OMPPrivateScope Scope(CGF); 5322 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5323 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5324 Scope.Privatize(); 5325 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5326 Scope.ForceCleanup(); 5327 5328 // Shift the address forward by one element. 5329 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5330 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5331 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5332 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5333 // Check whether we've reached the end. 5334 llvm::Value *Done = 5335 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5336 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5337 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5338 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5339 5340 // Done. 5341 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5342 } 5343 5344 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5345 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5346 /// UDR combiner function. 5347 static void emitReductionCombiner(CodeGenFunction &CGF, 5348 const Expr *ReductionOp) { 5349 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5350 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5351 if (const auto *DRE = 5352 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5353 if (const auto *DRD = 5354 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5355 std::pair<llvm::Function *, llvm::Function *> Reduction = 5356 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5357 RValue Func = RValue::get(Reduction.first); 5358 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5359 CGF.EmitIgnoredExpr(ReductionOp); 5360 return; 5361 } 5362 CGF.EmitIgnoredExpr(ReductionOp); 5363 } 5364 5365 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5366 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5367 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5368 ArrayRef<const Expr *> ReductionOps) { 5369 ASTContext &C = CGM.getContext(); 5370 5371 // void reduction_func(void *LHSArg, void *RHSArg); 5372 FunctionArgList Args; 5373 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5374 ImplicitParamDecl::Other); 5375 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5376 ImplicitParamDecl::Other); 5377 Args.push_back(&LHSArg); 5378 Args.push_back(&RHSArg); 5379 const auto &CGFI = 5380 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5381 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5382 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5383 llvm::GlobalValue::InternalLinkage, Name, 5384 &CGM.getModule()); 5385 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5386 Fn->setDoesNotRecurse(); 5387 CodeGenFunction CGF(CGM); 5388 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5389 5390 // Dst = (void*[n])(LHSArg); 5391 // Src = (void*[n])(RHSArg); 5392 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5393 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5394 ArgsType), CGF.getPointerAlign()); 5395 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5396 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5397 ArgsType), CGF.getPointerAlign()); 5398 5399 // ... 5400 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5401 // ... 5402 CodeGenFunction::OMPPrivateScope Scope(CGF); 5403 auto IPriv = Privates.begin(); 5404 unsigned Idx = 0; 5405 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5406 const auto *RHSVar = 5407 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5408 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5409 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5410 }); 5411 const auto *LHSVar = 5412 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5413 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5414 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5415 }); 5416 QualType PrivTy = (*IPriv)->getType(); 5417 if (PrivTy->isVariablyModifiedType()) { 5418 // Get array size and emit VLA type. 5419 ++Idx; 5420 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5421 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5422 const VariableArrayType *VLA = 5423 CGF.getContext().getAsVariableArrayType(PrivTy); 5424 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5425 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5426 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5427 CGF.EmitVariablyModifiedType(PrivTy); 5428 } 5429 } 5430 Scope.Privatize(); 5431 IPriv = Privates.begin(); 5432 auto ILHS = LHSExprs.begin(); 5433 auto IRHS = RHSExprs.begin(); 5434 for (const Expr *E : ReductionOps) { 5435 if ((*IPriv)->getType()->isArrayType()) { 5436 // Emit reduction for array section. 5437 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5438 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5439 EmitOMPAggregateReduction( 5440 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5441 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5442 emitReductionCombiner(CGF, E); 5443 }); 5444 } else { 5445 // Emit reduction for array subscript or single variable. 5446 emitReductionCombiner(CGF, E); 5447 } 5448 ++IPriv; 5449 ++ILHS; 5450 ++IRHS; 5451 } 5452 Scope.ForceCleanup(); 5453 CGF.FinishFunction(); 5454 return Fn; 5455 } 5456 5457 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5458 const Expr *ReductionOp, 5459 const Expr *PrivateRef, 5460 const DeclRefExpr *LHS, 5461 const DeclRefExpr *RHS) { 5462 if (PrivateRef->getType()->isArrayType()) { 5463 // Emit reduction for array section. 5464 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5465 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5466 EmitOMPAggregateReduction( 5467 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5468 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5469 emitReductionCombiner(CGF, ReductionOp); 5470 }); 5471 } else { 5472 // Emit reduction for array subscript or single variable. 5473 emitReductionCombiner(CGF, ReductionOp); 5474 } 5475 } 5476 5477 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5478 ArrayRef<const Expr *> Privates, 5479 ArrayRef<const Expr *> LHSExprs, 5480 ArrayRef<const Expr *> RHSExprs, 5481 ArrayRef<const Expr *> ReductionOps, 5482 ReductionOptionsTy Options) { 5483 if (!CGF.HaveInsertPoint()) 5484 return; 5485 5486 bool WithNowait = Options.WithNowait; 5487 bool SimpleReduction = Options.SimpleReduction; 5488 5489 // Next code should be emitted for reduction: 5490 // 5491 // static kmp_critical_name lock = { 0 }; 5492 // 5493 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5494 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5495 // ... 5496 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5497 // *(Type<n>-1*)rhs[<n>-1]); 5498 // } 5499 // 5500 // ... 5501 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5502 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5503 // RedList, reduce_func, &<lock>)) { 5504 // case 1: 5505 // ... 5506 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5507 // ... 5508 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5509 // break; 5510 // case 2: 5511 // ... 5512 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5513 // ... 5514 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5515 // break; 5516 // default:; 5517 // } 5518 // 5519 // if SimpleReduction is true, only the next code is generated: 5520 // ... 5521 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5522 // ... 5523 5524 ASTContext &C = CGM.getContext(); 5525 5526 if (SimpleReduction) { 5527 CodeGenFunction::RunCleanupsScope Scope(CGF); 5528 auto IPriv = Privates.begin(); 5529 auto ILHS = LHSExprs.begin(); 5530 auto IRHS = RHSExprs.begin(); 5531 for (const Expr *E : ReductionOps) { 5532 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5533 cast<DeclRefExpr>(*IRHS)); 5534 ++IPriv; 5535 ++ILHS; 5536 ++IRHS; 5537 } 5538 return; 5539 } 5540 5541 // 1. Build a list of reduction variables. 5542 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5543 auto Size = RHSExprs.size(); 5544 for (const Expr *E : Privates) { 5545 if (E->getType()->isVariablyModifiedType()) 5546 // Reserve place for array size. 5547 ++Size; 5548 } 5549 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5550 QualType ReductionArrayTy = 5551 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5552 /*IndexTypeQuals=*/0); 5553 Address ReductionList = 5554 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5555 auto IPriv = Privates.begin(); 5556 unsigned Idx = 0; 5557 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5558 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5559 CGF.Builder.CreateStore( 5560 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5561 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5562 Elem); 5563 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5564 // Store array size. 5565 ++Idx; 5566 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5567 llvm::Value *Size = CGF.Builder.CreateIntCast( 5568 CGF.getVLASize( 5569 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5570 .NumElts, 5571 CGF.SizeTy, /*isSigned=*/false); 5572 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5573 Elem); 5574 } 5575 } 5576 5577 // 2. Emit reduce_func(). 5578 llvm::Function *ReductionFn = emitReductionFunction( 5579 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5580 LHSExprs, RHSExprs, ReductionOps); 5581 5582 // 3. Create static kmp_critical_name lock = { 0 }; 5583 std::string Name = getName({"reduction"}); 5584 llvm::Value *Lock = getCriticalRegionLock(Name); 5585 5586 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5587 // RedList, reduce_func, &<lock>); 5588 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5589 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5590 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5591 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5592 ReductionList.getPointer(), CGF.VoidPtrTy); 5593 llvm::Value *Args[] = { 5594 IdentTLoc, // ident_t *<loc> 5595 ThreadId, // i32 <gtid> 5596 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5597 ReductionArrayTySize, // size_type sizeof(RedList) 5598 RL, // void *RedList 5599 ReductionFn, // void (*) (void *, void *) <reduce_func> 5600 Lock // kmp_critical_name *&<lock> 5601 }; 5602 llvm::Value *Res = CGF.EmitRuntimeCall( 5603 OMPBuilder.getOrCreateRuntimeFunction( 5604 CGM.getModule(), 5605 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5606 Args); 5607 5608 // 5. Build switch(res) 5609 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5610 llvm::SwitchInst *SwInst = 5611 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5612 5613 // 6. Build case 1: 5614 // ... 5615 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5616 // ... 5617 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5618 // break; 5619 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5620 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5621 CGF.EmitBlock(Case1BB); 5622 5623 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5624 llvm::Value *EndArgs[] = { 5625 IdentTLoc, // ident_t *<loc> 5626 ThreadId, // i32 <gtid> 5627 Lock // kmp_critical_name *&<lock> 5628 }; 5629 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5630 CodeGenFunction &CGF, PrePostActionTy &Action) { 5631 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5632 auto IPriv = Privates.begin(); 5633 auto ILHS = LHSExprs.begin(); 5634 auto IRHS = RHSExprs.begin(); 5635 for (const Expr *E : ReductionOps) { 5636 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5637 cast<DeclRefExpr>(*IRHS)); 5638 ++IPriv; 5639 ++ILHS; 5640 ++IRHS; 5641 } 5642 }; 5643 RegionCodeGenTy RCG(CodeGen); 5644 CommonActionTy Action( 5645 nullptr, llvm::None, 5646 OMPBuilder.getOrCreateRuntimeFunction( 5647 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5648 : OMPRTL___kmpc_end_reduce), 5649 EndArgs); 5650 RCG.setAction(Action); 5651 RCG(CGF); 5652 5653 CGF.EmitBranch(DefaultBB); 5654 5655 // 7. Build case 2: 5656 // ... 5657 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5658 // ... 5659 // break; 5660 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5661 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5662 CGF.EmitBlock(Case2BB); 5663 5664 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5665 CodeGenFunction &CGF, PrePostActionTy &Action) { 5666 auto ILHS = LHSExprs.begin(); 5667 auto IRHS = RHSExprs.begin(); 5668 auto IPriv = Privates.begin(); 5669 for (const Expr *E : ReductionOps) { 5670 const Expr *XExpr = nullptr; 5671 const Expr *EExpr = nullptr; 5672 const Expr *UpExpr = nullptr; 5673 BinaryOperatorKind BO = BO_Comma; 5674 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5675 if (BO->getOpcode() == BO_Assign) { 5676 XExpr = BO->getLHS(); 5677 UpExpr = BO->getRHS(); 5678 } 5679 } 5680 // Try to emit update expression as a simple atomic. 5681 const Expr *RHSExpr = UpExpr; 5682 if (RHSExpr) { 5683 // Analyze RHS part of the whole expression. 5684 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5685 RHSExpr->IgnoreParenImpCasts())) { 5686 // If this is a conditional operator, analyze its condition for 5687 // min/max reduction operator. 5688 RHSExpr = ACO->getCond(); 5689 } 5690 if (const auto *BORHS = 5691 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5692 EExpr = BORHS->getRHS(); 5693 BO = BORHS->getOpcode(); 5694 } 5695 } 5696 if (XExpr) { 5697 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5698 auto &&AtomicRedGen = [BO, VD, 5699 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5700 const Expr *EExpr, const Expr *UpExpr) { 5701 LValue X = CGF.EmitLValue(XExpr); 5702 RValue E; 5703 if (EExpr) 5704 E = CGF.EmitAnyExpr(EExpr); 5705 CGF.EmitOMPAtomicSimpleUpdateExpr( 5706 X, E, BO, /*IsXLHSInRHSPart=*/true, 5707 llvm::AtomicOrdering::Monotonic, Loc, 5708 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5709 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5710 PrivateScope.addPrivate( 5711 VD, [&CGF, VD, XRValue, Loc]() { 5712 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5713 CGF.emitOMPSimpleStore( 5714 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5715 VD->getType().getNonReferenceType(), Loc); 5716 return LHSTemp; 5717 }); 5718 (void)PrivateScope.Privatize(); 5719 return CGF.EmitAnyExpr(UpExpr); 5720 }); 5721 }; 5722 if ((*IPriv)->getType()->isArrayType()) { 5723 // Emit atomic reduction for array section. 5724 const auto *RHSVar = 5725 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5726 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5727 AtomicRedGen, XExpr, EExpr, UpExpr); 5728 } else { 5729 // Emit atomic reduction for array subscript or single variable. 5730 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5731 } 5732 } else { 5733 // Emit as a critical region. 5734 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5735 const Expr *, const Expr *) { 5736 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5737 std::string Name = RT.getName({"atomic_reduction"}); 5738 RT.emitCriticalRegion( 5739 CGF, Name, 5740 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5741 Action.Enter(CGF); 5742 emitReductionCombiner(CGF, E); 5743 }, 5744 Loc); 5745 }; 5746 if ((*IPriv)->getType()->isArrayType()) { 5747 const auto *LHSVar = 5748 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5749 const auto *RHSVar = 5750 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5751 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5752 CritRedGen); 5753 } else { 5754 CritRedGen(CGF, nullptr, nullptr, nullptr); 5755 } 5756 } 5757 ++ILHS; 5758 ++IRHS; 5759 ++IPriv; 5760 } 5761 }; 5762 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5763 if (!WithNowait) { 5764 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5765 llvm::Value *EndArgs[] = { 5766 IdentTLoc, // ident_t *<loc> 5767 ThreadId, // i32 <gtid> 5768 Lock // kmp_critical_name *&<lock> 5769 }; 5770 CommonActionTy Action(nullptr, llvm::None, 5771 OMPBuilder.getOrCreateRuntimeFunction( 5772 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5773 EndArgs); 5774 AtomicRCG.setAction(Action); 5775 AtomicRCG(CGF); 5776 } else { 5777 AtomicRCG(CGF); 5778 } 5779 5780 CGF.EmitBranch(DefaultBB); 5781 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5782 } 5783 5784 /// Generates unique name for artificial threadprivate variables. 5785 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5786 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5787 const Expr *Ref) { 5788 SmallString<256> Buffer; 5789 llvm::raw_svector_ostream Out(Buffer); 5790 const clang::DeclRefExpr *DE; 5791 const VarDecl *D = ::getBaseDecl(Ref, DE); 5792 if (!D) 5793 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5794 D = D->getCanonicalDecl(); 5795 std::string Name = CGM.getOpenMPRuntime().getName( 5796 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5797 Out << Prefix << Name << "_" 5798 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5799 return std::string(Out.str()); 5800 } 5801 5802 /// Emits reduction initializer function: 5803 /// \code 5804 /// void @.red_init(void* %arg, void* %orig) { 5805 /// %0 = bitcast void* %arg to <type>* 5806 /// store <type> <init>, <type>* %0 5807 /// ret void 5808 /// } 5809 /// \endcode 5810 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5811 SourceLocation Loc, 5812 ReductionCodeGen &RCG, unsigned N) { 5813 ASTContext &C = CGM.getContext(); 5814 QualType VoidPtrTy = C.VoidPtrTy; 5815 VoidPtrTy.addRestrict(); 5816 FunctionArgList Args; 5817 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5818 ImplicitParamDecl::Other); 5819 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5820 ImplicitParamDecl::Other); 5821 Args.emplace_back(&Param); 5822 Args.emplace_back(&ParamOrig); 5823 const auto &FnInfo = 5824 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5825 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5826 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5827 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5828 Name, &CGM.getModule()); 5829 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5830 Fn->setDoesNotRecurse(); 5831 CodeGenFunction CGF(CGM); 5832 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5833 Address PrivateAddr = CGF.EmitLoadOfPointer( 5834 CGF.GetAddrOfLocalVar(&Param), 5835 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5836 llvm::Value *Size = nullptr; 5837 // If the size of the reduction item is non-constant, load it from global 5838 // threadprivate variable. 5839 if (RCG.getSizes(N).second) { 5840 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5841 CGF, CGM.getContext().getSizeType(), 5842 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5843 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5844 CGM.getContext().getSizeType(), Loc); 5845 } 5846 RCG.emitAggregateType(CGF, N, Size); 5847 LValue OrigLVal; 5848 // If initializer uses initializer from declare reduction construct, emit a 5849 // pointer to the address of the original reduction item (reuired by reduction 5850 // initializer) 5851 if (RCG.usesReductionInitializer(N)) { 5852 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5853 SharedAddr = CGF.EmitLoadOfPointer( 5854 SharedAddr, 5855 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5856 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5857 } else { 5858 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5859 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5860 CGM.getContext().VoidPtrTy); 5861 } 5862 // Emit the initializer: 5863 // %0 = bitcast void* %arg to <type>* 5864 // store <type> <init>, <type>* %0 5865 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5866 [](CodeGenFunction &) { return false; }); 5867 CGF.FinishFunction(); 5868 return Fn; 5869 } 5870 5871 /// Emits reduction combiner function: 5872 /// \code 5873 /// void @.red_comb(void* %arg0, void* %arg1) { 5874 /// %lhs = bitcast void* %arg0 to <type>* 5875 /// %rhs = bitcast void* %arg1 to <type>* 5876 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5877 /// store <type> %2, <type>* %lhs 5878 /// ret void 5879 /// } 5880 /// \endcode 5881 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5882 SourceLocation Loc, 5883 ReductionCodeGen &RCG, unsigned N, 5884 const Expr *ReductionOp, 5885 const Expr *LHS, const Expr *RHS, 5886 const Expr *PrivateRef) { 5887 ASTContext &C = CGM.getContext(); 5888 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5889 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5890 FunctionArgList Args; 5891 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5892 C.VoidPtrTy, ImplicitParamDecl::Other); 5893 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5894 ImplicitParamDecl::Other); 5895 Args.emplace_back(&ParamInOut); 5896 Args.emplace_back(&ParamIn); 5897 const auto &FnInfo = 5898 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5899 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5900 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5901 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5902 Name, &CGM.getModule()); 5903 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5904 Fn->setDoesNotRecurse(); 5905 CodeGenFunction CGF(CGM); 5906 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5907 llvm::Value *Size = nullptr; 5908 // If the size of the reduction item is non-constant, load it from global 5909 // threadprivate variable. 5910 if (RCG.getSizes(N).second) { 5911 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5912 CGF, CGM.getContext().getSizeType(), 5913 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5914 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5915 CGM.getContext().getSizeType(), Loc); 5916 } 5917 RCG.emitAggregateType(CGF, N, Size); 5918 // Remap lhs and rhs variables to the addresses of the function arguments. 5919 // %lhs = bitcast void* %arg0 to <type>* 5920 // %rhs = bitcast void* %arg1 to <type>* 5921 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5922 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5923 // Pull out the pointer to the variable. 5924 Address PtrAddr = CGF.EmitLoadOfPointer( 5925 CGF.GetAddrOfLocalVar(&ParamInOut), 5926 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5927 return CGF.Builder.CreateElementBitCast( 5928 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5929 }); 5930 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5931 // Pull out the pointer to the variable. 5932 Address PtrAddr = CGF.EmitLoadOfPointer( 5933 CGF.GetAddrOfLocalVar(&ParamIn), 5934 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5935 return CGF.Builder.CreateElementBitCast( 5936 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5937 }); 5938 PrivateScope.Privatize(); 5939 // Emit the combiner body: 5940 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5941 // store <type> %2, <type>* %lhs 5942 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5943 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5944 cast<DeclRefExpr>(RHS)); 5945 CGF.FinishFunction(); 5946 return Fn; 5947 } 5948 5949 /// Emits reduction finalizer function: 5950 /// \code 5951 /// void @.red_fini(void* %arg) { 5952 /// %0 = bitcast void* %arg to <type>* 5953 /// <destroy>(<type>* %0) 5954 /// ret void 5955 /// } 5956 /// \endcode 5957 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5958 SourceLocation Loc, 5959 ReductionCodeGen &RCG, unsigned N) { 5960 if (!RCG.needCleanups(N)) 5961 return nullptr; 5962 ASTContext &C = CGM.getContext(); 5963 FunctionArgList Args; 5964 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5965 ImplicitParamDecl::Other); 5966 Args.emplace_back(&Param); 5967 const auto &FnInfo = 5968 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5969 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5970 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5971 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5972 Name, &CGM.getModule()); 5973 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5974 Fn->setDoesNotRecurse(); 5975 CodeGenFunction CGF(CGM); 5976 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5977 Address PrivateAddr = CGF.EmitLoadOfPointer( 5978 CGF.GetAddrOfLocalVar(&Param), 5979 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5980 llvm::Value *Size = nullptr; 5981 // If the size of the reduction item is non-constant, load it from global 5982 // threadprivate variable. 5983 if (RCG.getSizes(N).second) { 5984 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5985 CGF, CGM.getContext().getSizeType(), 5986 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5987 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5988 CGM.getContext().getSizeType(), Loc); 5989 } 5990 RCG.emitAggregateType(CGF, N, Size); 5991 // Emit the finalizer body: 5992 // <destroy>(<type>* %0) 5993 RCG.emitCleanups(CGF, N, PrivateAddr); 5994 CGF.FinishFunction(Loc); 5995 return Fn; 5996 } 5997 5998 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5999 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6000 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6001 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6002 return nullptr; 6003 6004 // Build typedef struct: 6005 // kmp_taskred_input { 6006 // void *reduce_shar; // shared reduction item 6007 // void *reduce_orig; // original reduction item used for initialization 6008 // size_t reduce_size; // size of data item 6009 // void *reduce_init; // data initialization routine 6010 // void *reduce_fini; // data finalization routine 6011 // void *reduce_comb; // data combiner routine 6012 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6013 // } kmp_taskred_input_t; 6014 ASTContext &C = CGM.getContext(); 6015 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6016 RD->startDefinition(); 6017 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6018 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6019 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6020 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6021 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6022 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6023 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6024 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6025 RD->completeDefinition(); 6026 QualType RDType = C.getRecordType(RD); 6027 unsigned Size = Data.ReductionVars.size(); 6028 llvm::APInt ArraySize(/*numBits=*/64, Size); 6029 QualType ArrayRDType = C.getConstantArrayType( 6030 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6031 // kmp_task_red_input_t .rd_input.[Size]; 6032 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6033 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6034 Data.ReductionCopies, Data.ReductionOps); 6035 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6036 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6037 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6038 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6039 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6040 TaskRedInput.getPointer(), Idxs, 6041 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6042 ".rd_input.gep."); 6043 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6044 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6045 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6046 RCG.emitSharedOrigLValue(CGF, Cnt); 6047 llvm::Value *CastedShared = 6048 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6049 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6050 // ElemLVal.reduce_orig = &Origs[Cnt]; 6051 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6052 llvm::Value *CastedOrig = 6053 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6054 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6055 RCG.emitAggregateType(CGF, Cnt); 6056 llvm::Value *SizeValInChars; 6057 llvm::Value *SizeVal; 6058 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6059 // We use delayed creation/initialization for VLAs and array sections. It is 6060 // required because runtime does not provide the way to pass the sizes of 6061 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6062 // threadprivate global variables are used to store these values and use 6063 // them in the functions. 6064 bool DelayedCreation = !!SizeVal; 6065 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6066 /*isSigned=*/false); 6067 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6068 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6069 // ElemLVal.reduce_init = init; 6070 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6071 llvm::Value *InitAddr = 6072 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6073 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6074 // ElemLVal.reduce_fini = fini; 6075 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6076 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6077 llvm::Value *FiniAddr = Fini 6078 ? CGF.EmitCastToVoidPtr(Fini) 6079 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6080 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6081 // ElemLVal.reduce_comb = comb; 6082 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6083 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6084 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6085 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6086 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6087 // ElemLVal.flags = 0; 6088 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6089 if (DelayedCreation) { 6090 CGF.EmitStoreOfScalar( 6091 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6092 FlagsLVal); 6093 } else 6094 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6095 FlagsLVal.getType()); 6096 } 6097 if (Data.IsReductionWithTaskMod) { 6098 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6099 // is_ws, int num, void *data); 6100 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6101 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6102 CGM.IntTy, /*isSigned=*/true); 6103 llvm::Value *Args[] = { 6104 IdentTLoc, GTid, 6105 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6106 /*isSigned=*/true), 6107 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6108 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6109 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6110 return CGF.EmitRuntimeCall( 6111 OMPBuilder.getOrCreateRuntimeFunction( 6112 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6113 Args); 6114 } 6115 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6116 llvm::Value *Args[] = { 6117 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6118 /*isSigned=*/true), 6119 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6120 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6121 CGM.VoidPtrTy)}; 6122 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6123 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6124 Args); 6125 } 6126 6127 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6128 SourceLocation Loc, 6129 bool IsWorksharingReduction) { 6130 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6131 // is_ws, int num, void *data); 6132 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6133 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6134 CGM.IntTy, /*isSigned=*/true); 6135 llvm::Value *Args[] = {IdentTLoc, GTid, 6136 llvm::ConstantInt::get(CGM.IntTy, 6137 IsWorksharingReduction ? 1 : 0, 6138 /*isSigned=*/true)}; 6139 (void)CGF.EmitRuntimeCall( 6140 OMPBuilder.getOrCreateRuntimeFunction( 6141 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6142 Args); 6143 } 6144 6145 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6146 SourceLocation Loc, 6147 ReductionCodeGen &RCG, 6148 unsigned N) { 6149 auto Sizes = RCG.getSizes(N); 6150 // Emit threadprivate global variable if the type is non-constant 6151 // (Sizes.second = nullptr). 6152 if (Sizes.second) { 6153 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6154 /*isSigned=*/false); 6155 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6156 CGF, CGM.getContext().getSizeType(), 6157 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6158 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6159 } 6160 } 6161 6162 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6163 SourceLocation Loc, 6164 llvm::Value *ReductionsPtr, 6165 LValue SharedLVal) { 6166 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6167 // *d); 6168 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6169 CGM.IntTy, 6170 /*isSigned=*/true), 6171 ReductionsPtr, 6172 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6173 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6174 return Address( 6175 CGF.EmitRuntimeCall( 6176 OMPBuilder.getOrCreateRuntimeFunction( 6177 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6178 Args), 6179 SharedLVal.getAlignment()); 6180 } 6181 6182 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6183 SourceLocation Loc) { 6184 if (!CGF.HaveInsertPoint()) 6185 return; 6186 6187 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6188 OMPBuilder.createTaskwait(CGF.Builder); 6189 } else { 6190 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6191 // global_tid); 6192 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6193 // Ignore return result until untied tasks are supported. 6194 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6195 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6196 Args); 6197 } 6198 6199 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6200 Region->emitUntiedSwitch(CGF); 6201 } 6202 6203 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6204 OpenMPDirectiveKind InnerKind, 6205 const RegionCodeGenTy &CodeGen, 6206 bool HasCancel) { 6207 if (!CGF.HaveInsertPoint()) 6208 return; 6209 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6210 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6211 } 6212 6213 namespace { 6214 enum RTCancelKind { 6215 CancelNoreq = 0, 6216 CancelParallel = 1, 6217 CancelLoop = 2, 6218 CancelSections = 3, 6219 CancelTaskgroup = 4 6220 }; 6221 } // anonymous namespace 6222 6223 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6224 RTCancelKind CancelKind = CancelNoreq; 6225 if (CancelRegion == OMPD_parallel) 6226 CancelKind = CancelParallel; 6227 else if (CancelRegion == OMPD_for) 6228 CancelKind = CancelLoop; 6229 else if (CancelRegion == OMPD_sections) 6230 CancelKind = CancelSections; 6231 else { 6232 assert(CancelRegion == OMPD_taskgroup); 6233 CancelKind = CancelTaskgroup; 6234 } 6235 return CancelKind; 6236 } 6237 6238 void CGOpenMPRuntime::emitCancellationPointCall( 6239 CodeGenFunction &CGF, SourceLocation Loc, 6240 OpenMPDirectiveKind CancelRegion) { 6241 if (!CGF.HaveInsertPoint()) 6242 return; 6243 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6244 // global_tid, kmp_int32 cncl_kind); 6245 if (auto *OMPRegionInfo = 6246 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6247 // For 'cancellation point taskgroup', the task region info may not have a 6248 // cancel. This may instead happen in another adjacent task. 6249 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6250 llvm::Value *Args[] = { 6251 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6252 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6253 // Ignore return result until untied tasks are supported. 6254 llvm::Value *Result = CGF.EmitRuntimeCall( 6255 OMPBuilder.getOrCreateRuntimeFunction( 6256 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6257 Args); 6258 // if (__kmpc_cancellationpoint()) { 6259 // exit from construct; 6260 // } 6261 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6262 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6263 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6264 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6265 CGF.EmitBlock(ExitBB); 6266 // exit from construct; 6267 CodeGenFunction::JumpDest CancelDest = 6268 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6269 CGF.EmitBranchThroughCleanup(CancelDest); 6270 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6271 } 6272 } 6273 } 6274 6275 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6276 const Expr *IfCond, 6277 OpenMPDirectiveKind CancelRegion) { 6278 if (!CGF.HaveInsertPoint()) 6279 return; 6280 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6281 // kmp_int32 cncl_kind); 6282 auto &M = CGM.getModule(); 6283 if (auto *OMPRegionInfo = 6284 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6285 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6286 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6287 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6288 llvm::Value *Args[] = { 6289 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6290 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6291 // Ignore return result until untied tasks are supported. 6292 llvm::Value *Result = CGF.EmitRuntimeCall( 6293 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6294 // if (__kmpc_cancel()) { 6295 // exit from construct; 6296 // } 6297 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6298 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6299 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6300 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6301 CGF.EmitBlock(ExitBB); 6302 // exit from construct; 6303 CodeGenFunction::JumpDest CancelDest = 6304 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6305 CGF.EmitBranchThroughCleanup(CancelDest); 6306 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6307 }; 6308 if (IfCond) { 6309 emitIfClause(CGF, IfCond, ThenGen, 6310 [](CodeGenFunction &, PrePostActionTy &) {}); 6311 } else { 6312 RegionCodeGenTy ThenRCG(ThenGen); 6313 ThenRCG(CGF); 6314 } 6315 } 6316 } 6317 6318 namespace { 6319 /// Cleanup action for uses_allocators support. 6320 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6321 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6322 6323 public: 6324 OMPUsesAllocatorsActionTy( 6325 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6326 : Allocators(Allocators) {} 6327 void Enter(CodeGenFunction &CGF) override { 6328 if (!CGF.HaveInsertPoint()) 6329 return; 6330 for (const auto &AllocatorData : Allocators) { 6331 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6332 CGF, AllocatorData.first, AllocatorData.second); 6333 } 6334 } 6335 void Exit(CodeGenFunction &CGF) override { 6336 if (!CGF.HaveInsertPoint()) 6337 return; 6338 for (const auto &AllocatorData : Allocators) { 6339 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6340 AllocatorData.first); 6341 } 6342 } 6343 }; 6344 } // namespace 6345 6346 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6347 const OMPExecutableDirective &D, StringRef ParentName, 6348 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6349 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6350 assert(!ParentName.empty() && "Invalid target region parent name!"); 6351 HasEmittedTargetRegion = true; 6352 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6353 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6354 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6355 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6356 if (!D.AllocatorTraits) 6357 continue; 6358 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6359 } 6360 } 6361 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6362 CodeGen.setAction(UsesAllocatorAction); 6363 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6364 IsOffloadEntry, CodeGen); 6365 } 6366 6367 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6368 const Expr *Allocator, 6369 const Expr *AllocatorTraits) { 6370 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6371 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6372 // Use default memspace handle. 6373 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6374 llvm::Value *NumTraits = llvm::ConstantInt::get( 6375 CGF.IntTy, cast<ConstantArrayType>( 6376 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6377 ->getSize() 6378 .getLimitedValue()); 6379 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6380 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6381 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6382 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6383 AllocatorTraitsLVal.getBaseInfo(), 6384 AllocatorTraitsLVal.getTBAAInfo()); 6385 llvm::Value *Traits = 6386 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6387 6388 llvm::Value *AllocatorVal = 6389 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6390 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6391 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6392 // Store to allocator. 6393 CGF.EmitVarDecl(*cast<VarDecl>( 6394 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6395 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6396 AllocatorVal = 6397 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6398 Allocator->getType(), Allocator->getExprLoc()); 6399 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6400 } 6401 6402 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6403 const Expr *Allocator) { 6404 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6405 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6406 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6407 llvm::Value *AllocatorVal = 6408 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6409 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6410 CGF.getContext().VoidPtrTy, 6411 Allocator->getExprLoc()); 6412 (void)CGF.EmitRuntimeCall( 6413 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6414 OMPRTL___kmpc_destroy_allocator), 6415 {ThreadId, AllocatorVal}); 6416 } 6417 6418 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6419 const OMPExecutableDirective &D, StringRef ParentName, 6420 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6421 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6422 // Create a unique name for the entry function using the source location 6423 // information of the current target region. The name will be something like: 6424 // 6425 // __omp_offloading_DD_FFFF_PP_lBB 6426 // 6427 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6428 // mangled name of the function that encloses the target region and BB is the 6429 // line number of the target region. 6430 6431 unsigned DeviceID; 6432 unsigned FileID; 6433 unsigned Line; 6434 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6435 Line); 6436 SmallString<64> EntryFnName; 6437 { 6438 llvm::raw_svector_ostream OS(EntryFnName); 6439 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6440 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6441 } 6442 6443 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6444 6445 CodeGenFunction CGF(CGM, true); 6446 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6447 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6448 6449 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6450 6451 // If this target outline function is not an offload entry, we don't need to 6452 // register it. 6453 if (!IsOffloadEntry) 6454 return; 6455 6456 // The target region ID is used by the runtime library to identify the current 6457 // target region, so it only has to be unique and not necessarily point to 6458 // anything. It could be the pointer to the outlined function that implements 6459 // the target region, but we aren't using that so that the compiler doesn't 6460 // need to keep that, and could therefore inline the host function if proven 6461 // worthwhile during optimization. In the other hand, if emitting code for the 6462 // device, the ID has to be the function address so that it can retrieved from 6463 // the offloading entry and launched by the runtime library. We also mark the 6464 // outlined function to have external linkage in case we are emitting code for 6465 // the device, because these functions will be entry points to the device. 6466 6467 if (CGM.getLangOpts().OpenMPIsDevice) { 6468 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6469 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6470 OutlinedFn->setDSOLocal(false); 6471 if (CGM.getTriple().isAMDGCN()) 6472 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6473 } else { 6474 std::string Name = getName({EntryFnName, "region_id"}); 6475 OutlinedFnID = new llvm::GlobalVariable( 6476 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6477 llvm::GlobalValue::WeakAnyLinkage, 6478 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6479 } 6480 6481 // Register the information for the entry associated with this target region. 6482 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6483 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6484 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6485 } 6486 6487 /// Checks if the expression is constant or does not have non-trivial function 6488 /// calls. 6489 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6490 // We can skip constant expressions. 6491 // We can skip expressions with trivial calls or simple expressions. 6492 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6493 !E->hasNonTrivialCall(Ctx)) && 6494 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6495 } 6496 6497 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6498 const Stmt *Body) { 6499 const Stmt *Child = Body->IgnoreContainers(); 6500 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6501 Child = nullptr; 6502 for (const Stmt *S : C->body()) { 6503 if (const auto *E = dyn_cast<Expr>(S)) { 6504 if (isTrivial(Ctx, E)) 6505 continue; 6506 } 6507 // Some of the statements can be ignored. 6508 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6509 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6510 continue; 6511 // Analyze declarations. 6512 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6513 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6514 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6515 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6516 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6517 isa<UsingDirectiveDecl>(D) || 6518 isa<OMPDeclareReductionDecl>(D) || 6519 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6520 return true; 6521 const auto *VD = dyn_cast<VarDecl>(D); 6522 if (!VD) 6523 return false; 6524 return VD->isConstexpr() || 6525 ((VD->getType().isTrivialType(Ctx) || 6526 VD->getType()->isReferenceType()) && 6527 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6528 })) 6529 continue; 6530 } 6531 // Found multiple children - cannot get the one child only. 6532 if (Child) 6533 return nullptr; 6534 Child = S; 6535 } 6536 if (Child) 6537 Child = Child->IgnoreContainers(); 6538 } 6539 return Child; 6540 } 6541 6542 /// Emit the number of teams for a target directive. Inspect the num_teams 6543 /// clause associated with a teams construct combined or closely nested 6544 /// with the target directive. 6545 /// 6546 /// Emit a team of size one for directives such as 'target parallel' that 6547 /// have no associated teams construct. 6548 /// 6549 /// Otherwise, return nullptr. 6550 static llvm::Value * 6551 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6552 const OMPExecutableDirective &D) { 6553 assert(!CGF.getLangOpts().OpenMPIsDevice && 6554 "Clauses associated with the teams directive expected to be emitted " 6555 "only for the host!"); 6556 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6557 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6558 "Expected target-based executable directive."); 6559 CGBuilderTy &Bld = CGF.Builder; 6560 switch (DirectiveKind) { 6561 case OMPD_target: { 6562 const auto *CS = D.getInnermostCapturedStmt(); 6563 const auto *Body = 6564 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6565 const Stmt *ChildStmt = 6566 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6567 if (const auto *NestedDir = 6568 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6569 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6570 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6571 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6572 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6573 const Expr *NumTeams = 6574 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6575 llvm::Value *NumTeamsVal = 6576 CGF.EmitScalarExpr(NumTeams, 6577 /*IgnoreResultAssign*/ true); 6578 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6579 /*isSigned=*/true); 6580 } 6581 return Bld.getInt32(0); 6582 } 6583 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6584 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6585 return Bld.getInt32(1); 6586 return Bld.getInt32(0); 6587 } 6588 return nullptr; 6589 } 6590 case OMPD_target_teams: 6591 case OMPD_target_teams_distribute: 6592 case OMPD_target_teams_distribute_simd: 6593 case OMPD_target_teams_distribute_parallel_for: 6594 case OMPD_target_teams_distribute_parallel_for_simd: { 6595 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6596 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6597 const Expr *NumTeams = 6598 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6599 llvm::Value *NumTeamsVal = 6600 CGF.EmitScalarExpr(NumTeams, 6601 /*IgnoreResultAssign*/ true); 6602 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6603 /*isSigned=*/true); 6604 } 6605 return Bld.getInt32(0); 6606 } 6607 case OMPD_target_parallel: 6608 case OMPD_target_parallel_for: 6609 case OMPD_target_parallel_for_simd: 6610 case OMPD_target_simd: 6611 return Bld.getInt32(1); 6612 case OMPD_parallel: 6613 case OMPD_for: 6614 case OMPD_parallel_for: 6615 case OMPD_parallel_master: 6616 case OMPD_parallel_sections: 6617 case OMPD_for_simd: 6618 case OMPD_parallel_for_simd: 6619 case OMPD_cancel: 6620 case OMPD_cancellation_point: 6621 case OMPD_ordered: 6622 case OMPD_threadprivate: 6623 case OMPD_allocate: 6624 case OMPD_task: 6625 case OMPD_simd: 6626 case OMPD_sections: 6627 case OMPD_section: 6628 case OMPD_single: 6629 case OMPD_master: 6630 case OMPD_critical: 6631 case OMPD_taskyield: 6632 case OMPD_barrier: 6633 case OMPD_taskwait: 6634 case OMPD_taskgroup: 6635 case OMPD_atomic: 6636 case OMPD_flush: 6637 case OMPD_depobj: 6638 case OMPD_scan: 6639 case OMPD_teams: 6640 case OMPD_target_data: 6641 case OMPD_target_exit_data: 6642 case OMPD_target_enter_data: 6643 case OMPD_distribute: 6644 case OMPD_distribute_simd: 6645 case OMPD_distribute_parallel_for: 6646 case OMPD_distribute_parallel_for_simd: 6647 case OMPD_teams_distribute: 6648 case OMPD_teams_distribute_simd: 6649 case OMPD_teams_distribute_parallel_for: 6650 case OMPD_teams_distribute_parallel_for_simd: 6651 case OMPD_target_update: 6652 case OMPD_declare_simd: 6653 case OMPD_declare_variant: 6654 case OMPD_begin_declare_variant: 6655 case OMPD_end_declare_variant: 6656 case OMPD_declare_target: 6657 case OMPD_end_declare_target: 6658 case OMPD_declare_reduction: 6659 case OMPD_declare_mapper: 6660 case OMPD_taskloop: 6661 case OMPD_taskloop_simd: 6662 case OMPD_master_taskloop: 6663 case OMPD_master_taskloop_simd: 6664 case OMPD_parallel_master_taskloop: 6665 case OMPD_parallel_master_taskloop_simd: 6666 case OMPD_requires: 6667 case OMPD_unknown: 6668 break; 6669 default: 6670 break; 6671 } 6672 llvm_unreachable("Unexpected directive kind."); 6673 } 6674 6675 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6676 llvm::Value *DefaultThreadLimitVal) { 6677 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6678 CGF.getContext(), CS->getCapturedStmt()); 6679 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6680 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6681 llvm::Value *NumThreads = nullptr; 6682 llvm::Value *CondVal = nullptr; 6683 // Handle if clause. If if clause present, the number of threads is 6684 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6685 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6686 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6687 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6688 const OMPIfClause *IfClause = nullptr; 6689 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6690 if (C->getNameModifier() == OMPD_unknown || 6691 C->getNameModifier() == OMPD_parallel) { 6692 IfClause = C; 6693 break; 6694 } 6695 } 6696 if (IfClause) { 6697 const Expr *Cond = IfClause->getCondition(); 6698 bool Result; 6699 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6700 if (!Result) 6701 return CGF.Builder.getInt32(1); 6702 } else { 6703 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6704 if (const auto *PreInit = 6705 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6706 for (const auto *I : PreInit->decls()) { 6707 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6708 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6709 } else { 6710 CodeGenFunction::AutoVarEmission Emission = 6711 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6712 CGF.EmitAutoVarCleanups(Emission); 6713 } 6714 } 6715 } 6716 CondVal = CGF.EvaluateExprAsBool(Cond); 6717 } 6718 } 6719 } 6720 // Check the value of num_threads clause iff if clause was not specified 6721 // or is not evaluated to false. 6722 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6723 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6724 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6725 const auto *NumThreadsClause = 6726 Dir->getSingleClause<OMPNumThreadsClause>(); 6727 CodeGenFunction::LexicalScope Scope( 6728 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6729 if (const auto *PreInit = 6730 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6731 for (const auto *I : PreInit->decls()) { 6732 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6733 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6734 } else { 6735 CodeGenFunction::AutoVarEmission Emission = 6736 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6737 CGF.EmitAutoVarCleanups(Emission); 6738 } 6739 } 6740 } 6741 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6742 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6743 /*isSigned=*/false); 6744 if (DefaultThreadLimitVal) 6745 NumThreads = CGF.Builder.CreateSelect( 6746 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6747 DefaultThreadLimitVal, NumThreads); 6748 } else { 6749 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6750 : CGF.Builder.getInt32(0); 6751 } 6752 // Process condition of the if clause. 6753 if (CondVal) { 6754 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6755 CGF.Builder.getInt32(1)); 6756 } 6757 return NumThreads; 6758 } 6759 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6760 return CGF.Builder.getInt32(1); 6761 return DefaultThreadLimitVal; 6762 } 6763 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6764 : CGF.Builder.getInt32(0); 6765 } 6766 6767 /// Emit the number of threads for a target directive. Inspect the 6768 /// thread_limit clause associated with a teams construct combined or closely 6769 /// nested with the target directive. 6770 /// 6771 /// Emit the num_threads clause for directives such as 'target parallel' that 6772 /// have no associated teams construct. 6773 /// 6774 /// Otherwise, return nullptr. 6775 static llvm::Value * 6776 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6777 const OMPExecutableDirective &D) { 6778 assert(!CGF.getLangOpts().OpenMPIsDevice && 6779 "Clauses associated with the teams directive expected to be emitted " 6780 "only for the host!"); 6781 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6782 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6783 "Expected target-based executable directive."); 6784 CGBuilderTy &Bld = CGF.Builder; 6785 llvm::Value *ThreadLimitVal = nullptr; 6786 llvm::Value *NumThreadsVal = nullptr; 6787 switch (DirectiveKind) { 6788 case OMPD_target: { 6789 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6790 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6791 return NumThreads; 6792 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6793 CGF.getContext(), CS->getCapturedStmt()); 6794 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6795 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6796 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6797 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6798 const auto *ThreadLimitClause = 6799 Dir->getSingleClause<OMPThreadLimitClause>(); 6800 CodeGenFunction::LexicalScope Scope( 6801 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6802 if (const auto *PreInit = 6803 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6804 for (const auto *I : PreInit->decls()) { 6805 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6806 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6807 } else { 6808 CodeGenFunction::AutoVarEmission Emission = 6809 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6810 CGF.EmitAutoVarCleanups(Emission); 6811 } 6812 } 6813 } 6814 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6815 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6816 ThreadLimitVal = 6817 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6818 } 6819 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6820 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6821 CS = Dir->getInnermostCapturedStmt(); 6822 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6823 CGF.getContext(), CS->getCapturedStmt()); 6824 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6825 } 6826 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6827 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6828 CS = Dir->getInnermostCapturedStmt(); 6829 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6830 return NumThreads; 6831 } 6832 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6833 return Bld.getInt32(1); 6834 } 6835 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6836 } 6837 case OMPD_target_teams: { 6838 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6839 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6840 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6841 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6842 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6843 ThreadLimitVal = 6844 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6845 } 6846 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6847 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6848 return NumThreads; 6849 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6850 CGF.getContext(), CS->getCapturedStmt()); 6851 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6852 if (Dir->getDirectiveKind() == OMPD_distribute) { 6853 CS = Dir->getInnermostCapturedStmt(); 6854 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6855 return NumThreads; 6856 } 6857 } 6858 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6859 } 6860 case OMPD_target_teams_distribute: 6861 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6862 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6863 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6864 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6865 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6866 ThreadLimitVal = 6867 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6868 } 6869 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6870 case OMPD_target_parallel: 6871 case OMPD_target_parallel_for: 6872 case OMPD_target_parallel_for_simd: 6873 case OMPD_target_teams_distribute_parallel_for: 6874 case OMPD_target_teams_distribute_parallel_for_simd: { 6875 llvm::Value *CondVal = nullptr; 6876 // Handle if clause. If if clause present, the number of threads is 6877 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6878 if (D.hasClausesOfKind<OMPIfClause>()) { 6879 const OMPIfClause *IfClause = nullptr; 6880 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6881 if (C->getNameModifier() == OMPD_unknown || 6882 C->getNameModifier() == OMPD_parallel) { 6883 IfClause = C; 6884 break; 6885 } 6886 } 6887 if (IfClause) { 6888 const Expr *Cond = IfClause->getCondition(); 6889 bool Result; 6890 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6891 if (!Result) 6892 return Bld.getInt32(1); 6893 } else { 6894 CodeGenFunction::RunCleanupsScope Scope(CGF); 6895 CondVal = CGF.EvaluateExprAsBool(Cond); 6896 } 6897 } 6898 } 6899 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6900 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6901 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6902 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6903 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6904 ThreadLimitVal = 6905 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6906 } 6907 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6908 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6909 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6910 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6911 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6912 NumThreadsVal = 6913 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6914 ThreadLimitVal = ThreadLimitVal 6915 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6916 ThreadLimitVal), 6917 NumThreadsVal, ThreadLimitVal) 6918 : NumThreadsVal; 6919 } 6920 if (!ThreadLimitVal) 6921 ThreadLimitVal = Bld.getInt32(0); 6922 if (CondVal) 6923 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6924 return ThreadLimitVal; 6925 } 6926 case OMPD_target_teams_distribute_simd: 6927 case OMPD_target_simd: 6928 return Bld.getInt32(1); 6929 case OMPD_parallel: 6930 case OMPD_for: 6931 case OMPD_parallel_for: 6932 case OMPD_parallel_master: 6933 case OMPD_parallel_sections: 6934 case OMPD_for_simd: 6935 case OMPD_parallel_for_simd: 6936 case OMPD_cancel: 6937 case OMPD_cancellation_point: 6938 case OMPD_ordered: 6939 case OMPD_threadprivate: 6940 case OMPD_allocate: 6941 case OMPD_task: 6942 case OMPD_simd: 6943 case OMPD_sections: 6944 case OMPD_section: 6945 case OMPD_single: 6946 case OMPD_master: 6947 case OMPD_critical: 6948 case OMPD_taskyield: 6949 case OMPD_barrier: 6950 case OMPD_taskwait: 6951 case OMPD_taskgroup: 6952 case OMPD_atomic: 6953 case OMPD_flush: 6954 case OMPD_depobj: 6955 case OMPD_scan: 6956 case OMPD_teams: 6957 case OMPD_target_data: 6958 case OMPD_target_exit_data: 6959 case OMPD_target_enter_data: 6960 case OMPD_distribute: 6961 case OMPD_distribute_simd: 6962 case OMPD_distribute_parallel_for: 6963 case OMPD_distribute_parallel_for_simd: 6964 case OMPD_teams_distribute: 6965 case OMPD_teams_distribute_simd: 6966 case OMPD_teams_distribute_parallel_for: 6967 case OMPD_teams_distribute_parallel_for_simd: 6968 case OMPD_target_update: 6969 case OMPD_declare_simd: 6970 case OMPD_declare_variant: 6971 case OMPD_begin_declare_variant: 6972 case OMPD_end_declare_variant: 6973 case OMPD_declare_target: 6974 case OMPD_end_declare_target: 6975 case OMPD_declare_reduction: 6976 case OMPD_declare_mapper: 6977 case OMPD_taskloop: 6978 case OMPD_taskloop_simd: 6979 case OMPD_master_taskloop: 6980 case OMPD_master_taskloop_simd: 6981 case OMPD_parallel_master_taskloop: 6982 case OMPD_parallel_master_taskloop_simd: 6983 case OMPD_requires: 6984 case OMPD_unknown: 6985 break; 6986 default: 6987 break; 6988 } 6989 llvm_unreachable("Unsupported directive kind."); 6990 } 6991 6992 namespace { 6993 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6994 6995 // Utility to handle information from clauses associated with a given 6996 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6997 // It provides a convenient interface to obtain the information and generate 6998 // code for that information. 6999 class MappableExprsHandler { 7000 public: 7001 /// Values for bit flags used to specify the mapping type for 7002 /// offloading. 7003 enum OpenMPOffloadMappingFlags : uint64_t { 7004 /// No flags 7005 OMP_MAP_NONE = 0x0, 7006 /// Allocate memory on the device and move data from host to device. 7007 OMP_MAP_TO = 0x01, 7008 /// Allocate memory on the device and move data from device to host. 7009 OMP_MAP_FROM = 0x02, 7010 /// Always perform the requested mapping action on the element, even 7011 /// if it was already mapped before. 7012 OMP_MAP_ALWAYS = 0x04, 7013 /// Delete the element from the device environment, ignoring the 7014 /// current reference count associated with the element. 7015 OMP_MAP_DELETE = 0x08, 7016 /// The element being mapped is a pointer-pointee pair; both the 7017 /// pointer and the pointee should be mapped. 7018 OMP_MAP_PTR_AND_OBJ = 0x10, 7019 /// This flags signals that the base address of an entry should be 7020 /// passed to the target kernel as an argument. 7021 OMP_MAP_TARGET_PARAM = 0x20, 7022 /// Signal that the runtime library has to return the device pointer 7023 /// in the current position for the data being mapped. Used when we have the 7024 /// use_device_ptr or use_device_addr clause. 7025 OMP_MAP_RETURN_PARAM = 0x40, 7026 /// This flag signals that the reference being passed is a pointer to 7027 /// private data. 7028 OMP_MAP_PRIVATE = 0x80, 7029 /// Pass the element to the device by value. 7030 OMP_MAP_LITERAL = 0x100, 7031 /// Implicit map 7032 OMP_MAP_IMPLICIT = 0x200, 7033 /// Close is a hint to the runtime to allocate memory close to 7034 /// the target device. 7035 OMP_MAP_CLOSE = 0x400, 7036 /// 0x800 is reserved for compatibility with XLC. 7037 /// Produce a runtime error if the data is not already allocated. 7038 OMP_MAP_PRESENT = 0x1000, 7039 /// Signal that the runtime library should use args as an array of 7040 /// descriptor_dim pointers and use args_size as dims. Used when we have 7041 /// non-contiguous list items in target update directive 7042 OMP_MAP_NON_CONTIG = 0x100000000000, 7043 /// The 16 MSBs of the flags indicate whether the entry is member of some 7044 /// struct/class. 7045 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7046 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7047 }; 7048 7049 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7050 static unsigned getFlagMemberOffset() { 7051 unsigned Offset = 0; 7052 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7053 Remain = Remain >> 1) 7054 Offset++; 7055 return Offset; 7056 } 7057 7058 /// Class that holds debugging information for a data mapping to be passed to 7059 /// the runtime library. 7060 class MappingExprInfo { 7061 /// The variable declaration used for the data mapping. 7062 const ValueDecl *MapDecl = nullptr; 7063 /// The original expression used in the map clause, or null if there is 7064 /// none. 7065 const Expr *MapExpr = nullptr; 7066 7067 public: 7068 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7069 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7070 7071 const ValueDecl *getMapDecl() const { return MapDecl; } 7072 const Expr *getMapExpr() const { return MapExpr; } 7073 }; 7074 7075 /// Class that associates information with a base pointer to be passed to the 7076 /// runtime library. 7077 class BasePointerInfo { 7078 /// The base pointer. 7079 llvm::Value *Ptr = nullptr; 7080 /// The base declaration that refers to this device pointer, or null if 7081 /// there is none. 7082 const ValueDecl *DevPtrDecl = nullptr; 7083 7084 public: 7085 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7086 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7087 llvm::Value *operator*() const { return Ptr; } 7088 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7089 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7090 }; 7091 7092 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7093 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7094 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7095 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7096 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7097 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7098 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7099 7100 /// This structure contains combined information generated for mappable 7101 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7102 /// mappers, and non-contiguous information. 7103 struct MapCombinedInfoTy { 7104 struct StructNonContiguousInfo { 7105 bool IsNonContiguous = false; 7106 MapDimArrayTy Dims; 7107 MapNonContiguousArrayTy Offsets; 7108 MapNonContiguousArrayTy Counts; 7109 MapNonContiguousArrayTy Strides; 7110 }; 7111 MapExprsArrayTy Exprs; 7112 MapBaseValuesArrayTy BasePointers; 7113 MapValuesArrayTy Pointers; 7114 MapValuesArrayTy Sizes; 7115 MapFlagsArrayTy Types; 7116 MapMappersArrayTy Mappers; 7117 StructNonContiguousInfo NonContigInfo; 7118 7119 /// Append arrays in \a CurInfo. 7120 void append(MapCombinedInfoTy &CurInfo) { 7121 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7122 BasePointers.append(CurInfo.BasePointers.begin(), 7123 CurInfo.BasePointers.end()); 7124 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7125 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7126 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7127 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7128 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7129 CurInfo.NonContigInfo.Dims.end()); 7130 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7131 CurInfo.NonContigInfo.Offsets.end()); 7132 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7133 CurInfo.NonContigInfo.Counts.end()); 7134 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7135 CurInfo.NonContigInfo.Strides.end()); 7136 } 7137 }; 7138 7139 /// Map between a struct and the its lowest & highest elements which have been 7140 /// mapped. 7141 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7142 /// HE(FieldIndex, Pointer)} 7143 struct StructRangeInfoTy { 7144 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7145 0, Address::invalid()}; 7146 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7147 0, Address::invalid()}; 7148 Address Base = Address::invalid(); 7149 bool IsArraySection = false; 7150 }; 7151 7152 private: 7153 /// Kind that defines how a device pointer has to be returned. 7154 struct MapInfo { 7155 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7156 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7157 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7158 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7159 bool ReturnDevicePointer = false; 7160 bool IsImplicit = false; 7161 const ValueDecl *Mapper = nullptr; 7162 const Expr *VarRef = nullptr; 7163 bool ForDeviceAddr = false; 7164 7165 MapInfo() = default; 7166 MapInfo( 7167 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7168 OpenMPMapClauseKind MapType, 7169 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7170 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7171 bool ReturnDevicePointer, bool IsImplicit, 7172 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7173 bool ForDeviceAddr = false) 7174 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7175 MotionModifiers(MotionModifiers), 7176 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7177 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7178 }; 7179 7180 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7181 /// member and there is no map information about it, then emission of that 7182 /// entry is deferred until the whole struct has been processed. 7183 struct DeferredDevicePtrEntryTy { 7184 const Expr *IE = nullptr; 7185 const ValueDecl *VD = nullptr; 7186 bool ForDeviceAddr = false; 7187 7188 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7189 bool ForDeviceAddr) 7190 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7191 }; 7192 7193 /// The target directive from where the mappable clauses were extracted. It 7194 /// is either a executable directive or a user-defined mapper directive. 7195 llvm::PointerUnion<const OMPExecutableDirective *, 7196 const OMPDeclareMapperDecl *> 7197 CurDir; 7198 7199 /// Function the directive is being generated for. 7200 CodeGenFunction &CGF; 7201 7202 /// Set of all first private variables in the current directive. 7203 /// bool data is set to true if the variable is implicitly marked as 7204 /// firstprivate, false otherwise. 7205 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7206 7207 /// Map between device pointer declarations and their expression components. 7208 /// The key value for declarations in 'this' is null. 7209 llvm::DenseMap< 7210 const ValueDecl *, 7211 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7212 DevPointersMap; 7213 7214 llvm::Value *getExprTypeSize(const Expr *E) const { 7215 QualType ExprTy = E->getType().getCanonicalType(); 7216 7217 // Calculate the size for array shaping expression. 7218 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7219 llvm::Value *Size = 7220 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7221 for (const Expr *SE : OAE->getDimensions()) { 7222 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7223 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7224 CGF.getContext().getSizeType(), 7225 SE->getExprLoc()); 7226 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7227 } 7228 return Size; 7229 } 7230 7231 // Reference types are ignored for mapping purposes. 7232 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7233 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7234 7235 // Given that an array section is considered a built-in type, we need to 7236 // do the calculation based on the length of the section instead of relying 7237 // on CGF.getTypeSize(E->getType()). 7238 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7239 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7240 OAE->getBase()->IgnoreParenImpCasts()) 7241 .getCanonicalType(); 7242 7243 // If there is no length associated with the expression and lower bound is 7244 // not specified too, that means we are using the whole length of the 7245 // base. 7246 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7247 !OAE->getLowerBound()) 7248 return CGF.getTypeSize(BaseTy); 7249 7250 llvm::Value *ElemSize; 7251 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7252 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7253 } else { 7254 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7255 assert(ATy && "Expecting array type if not a pointer type."); 7256 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7257 } 7258 7259 // If we don't have a length at this point, that is because we have an 7260 // array section with a single element. 7261 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7262 return ElemSize; 7263 7264 if (const Expr *LenExpr = OAE->getLength()) { 7265 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7266 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7267 CGF.getContext().getSizeType(), 7268 LenExpr->getExprLoc()); 7269 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7270 } 7271 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7272 OAE->getLowerBound() && "expected array_section[lb:]."); 7273 // Size = sizetype - lb * elemtype; 7274 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7275 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7276 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7277 CGF.getContext().getSizeType(), 7278 OAE->getLowerBound()->getExprLoc()); 7279 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7280 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7281 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7282 LengthVal = CGF.Builder.CreateSelect( 7283 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7284 return LengthVal; 7285 } 7286 return CGF.getTypeSize(ExprTy); 7287 } 7288 7289 /// Return the corresponding bits for a given map clause modifier. Add 7290 /// a flag marking the map as a pointer if requested. Add a flag marking the 7291 /// map as the first one of a series of maps that relate to the same map 7292 /// expression. 7293 OpenMPOffloadMappingFlags getMapTypeBits( 7294 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7295 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7296 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7297 OpenMPOffloadMappingFlags Bits = 7298 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7299 switch (MapType) { 7300 case OMPC_MAP_alloc: 7301 case OMPC_MAP_release: 7302 // alloc and release is the default behavior in the runtime library, i.e. 7303 // if we don't pass any bits alloc/release that is what the runtime is 7304 // going to do. Therefore, we don't need to signal anything for these two 7305 // type modifiers. 7306 break; 7307 case OMPC_MAP_to: 7308 Bits |= OMP_MAP_TO; 7309 break; 7310 case OMPC_MAP_from: 7311 Bits |= OMP_MAP_FROM; 7312 break; 7313 case OMPC_MAP_tofrom: 7314 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7315 break; 7316 case OMPC_MAP_delete: 7317 Bits |= OMP_MAP_DELETE; 7318 break; 7319 case OMPC_MAP_unknown: 7320 llvm_unreachable("Unexpected map type!"); 7321 } 7322 if (AddPtrFlag) 7323 Bits |= OMP_MAP_PTR_AND_OBJ; 7324 if (AddIsTargetParamFlag) 7325 Bits |= OMP_MAP_TARGET_PARAM; 7326 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7327 != MapModifiers.end()) 7328 Bits |= OMP_MAP_ALWAYS; 7329 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7330 != MapModifiers.end()) 7331 Bits |= OMP_MAP_CLOSE; 7332 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7333 != MapModifiers.end()) 7334 Bits |= OMP_MAP_PRESENT; 7335 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7336 != MotionModifiers.end()) 7337 Bits |= OMP_MAP_PRESENT; 7338 if (IsNonContiguous) 7339 Bits |= OMP_MAP_NON_CONTIG; 7340 return Bits; 7341 } 7342 7343 /// Return true if the provided expression is a final array section. A 7344 /// final array section, is one whose length can't be proved to be one. 7345 bool isFinalArraySectionExpression(const Expr *E) const { 7346 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7347 7348 // It is not an array section and therefore not a unity-size one. 7349 if (!OASE) 7350 return false; 7351 7352 // An array section with no colon always refer to a single element. 7353 if (OASE->getColonLocFirst().isInvalid()) 7354 return false; 7355 7356 const Expr *Length = OASE->getLength(); 7357 7358 // If we don't have a length we have to check if the array has size 1 7359 // for this dimension. Also, we should always expect a length if the 7360 // base type is pointer. 7361 if (!Length) { 7362 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7363 OASE->getBase()->IgnoreParenImpCasts()) 7364 .getCanonicalType(); 7365 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7366 return ATy->getSize().getSExtValue() != 1; 7367 // If we don't have a constant dimension length, we have to consider 7368 // the current section as having any size, so it is not necessarily 7369 // unitary. If it happen to be unity size, that's user fault. 7370 return true; 7371 } 7372 7373 // Check if the length evaluates to 1. 7374 Expr::EvalResult Result; 7375 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7376 return true; // Can have more that size 1. 7377 7378 llvm::APSInt ConstLength = Result.Val.getInt(); 7379 return ConstLength.getSExtValue() != 1; 7380 } 7381 7382 /// Generate the base pointers, section pointers, sizes, map type bits, and 7383 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7384 /// map type, map or motion modifiers, and expression components. 7385 /// \a IsFirstComponent should be set to true if the provided set of 7386 /// components is the first associated with a capture. 7387 void generateInfoForComponentList( 7388 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7389 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7390 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7391 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7392 bool IsFirstComponentList, bool IsImplicit, 7393 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7394 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7395 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7396 OverlappedElements = llvm::None) const { 7397 // The following summarizes what has to be generated for each map and the 7398 // types below. The generated information is expressed in this order: 7399 // base pointer, section pointer, size, flags 7400 // (to add to the ones that come from the map type and modifier). 7401 // 7402 // double d; 7403 // int i[100]; 7404 // float *p; 7405 // 7406 // struct S1 { 7407 // int i; 7408 // float f[50]; 7409 // } 7410 // struct S2 { 7411 // int i; 7412 // float f[50]; 7413 // S1 s; 7414 // double *p; 7415 // struct S2 *ps; 7416 // } 7417 // S2 s; 7418 // S2 *ps; 7419 // 7420 // map(d) 7421 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7422 // 7423 // map(i) 7424 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7425 // 7426 // map(i[1:23]) 7427 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7428 // 7429 // map(p) 7430 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7431 // 7432 // map(p[1:24]) 7433 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7434 // in unified shared memory mode or for local pointers 7435 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7436 // 7437 // map(s) 7438 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7439 // 7440 // map(s.i) 7441 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7442 // 7443 // map(s.s.f) 7444 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7445 // 7446 // map(s.p) 7447 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7448 // 7449 // map(to: s.p[:22]) 7450 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7451 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7452 // &(s.p), &(s.p[0]), 22*sizeof(double), 7453 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7454 // (*) alloc space for struct members, only this is a target parameter 7455 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7456 // optimizes this entry out, same in the examples below) 7457 // (***) map the pointee (map: to) 7458 // 7459 // map(s.ps) 7460 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7461 // 7462 // map(from: s.ps->s.i) 7463 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7464 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7465 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7466 // 7467 // map(to: s.ps->ps) 7468 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7469 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7470 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7471 // 7472 // map(s.ps->ps->ps) 7473 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7474 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7475 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7476 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7477 // 7478 // map(to: s.ps->ps->s.f[:22]) 7479 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7480 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7481 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7482 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7483 // 7484 // map(ps) 7485 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7486 // 7487 // map(ps->i) 7488 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7489 // 7490 // map(ps->s.f) 7491 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7492 // 7493 // map(from: ps->p) 7494 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7495 // 7496 // map(to: ps->p[:22]) 7497 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7498 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7499 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7500 // 7501 // map(ps->ps) 7502 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7503 // 7504 // map(from: ps->ps->s.i) 7505 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7506 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7507 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7508 // 7509 // map(from: ps->ps->ps) 7510 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7511 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7512 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7513 // 7514 // map(ps->ps->ps->ps) 7515 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7516 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7517 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7518 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7519 // 7520 // map(to: ps->ps->ps->s.f[:22]) 7521 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7522 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7523 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7524 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7525 // 7526 // map(to: s.f[:22]) map(from: s.p[:33]) 7527 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7528 // sizeof(double*) (**), TARGET_PARAM 7529 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7530 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7531 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7532 // (*) allocate contiguous space needed to fit all mapped members even if 7533 // we allocate space for members not mapped (in this example, 7534 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7535 // them as well because they fall between &s.f[0] and &s.p) 7536 // 7537 // map(from: s.f[:22]) map(to: ps->p[:33]) 7538 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7539 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7540 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7541 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7542 // (*) the struct this entry pertains to is the 2nd element in the list of 7543 // arguments, hence MEMBER_OF(2) 7544 // 7545 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7546 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7547 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7548 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7549 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7550 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7551 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7552 // (*) the struct this entry pertains to is the 4th element in the list 7553 // of arguments, hence MEMBER_OF(4) 7554 7555 // Track if the map information being generated is the first for a capture. 7556 bool IsCaptureFirstInfo = IsFirstComponentList; 7557 // When the variable is on a declare target link or in a to clause with 7558 // unified memory, a reference is needed to hold the host/device address 7559 // of the variable. 7560 bool RequiresReference = false; 7561 7562 // Scan the components from the base to the complete expression. 7563 auto CI = Components.rbegin(); 7564 auto CE = Components.rend(); 7565 auto I = CI; 7566 7567 // Track if the map information being generated is the first for a list of 7568 // components. 7569 bool IsExpressionFirstInfo = true; 7570 bool FirstPointerInComplexData = false; 7571 Address BP = Address::invalid(); 7572 const Expr *AssocExpr = I->getAssociatedExpression(); 7573 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7574 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7575 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7576 7577 if (isa<MemberExpr>(AssocExpr)) { 7578 // The base is the 'this' pointer. The content of the pointer is going 7579 // to be the base of the field being mapped. 7580 BP = CGF.LoadCXXThisAddress(); 7581 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7582 (OASE && 7583 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7584 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7585 } else if (OAShE && 7586 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7587 BP = Address( 7588 CGF.EmitScalarExpr(OAShE->getBase()), 7589 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7590 } else { 7591 // The base is the reference to the variable. 7592 // BP = &Var. 7593 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7594 if (const auto *VD = 7595 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7596 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7597 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7598 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7599 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7600 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7601 RequiresReference = true; 7602 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7603 } 7604 } 7605 } 7606 7607 // If the variable is a pointer and is being dereferenced (i.e. is not 7608 // the last component), the base has to be the pointer itself, not its 7609 // reference. References are ignored for mapping purposes. 7610 QualType Ty = 7611 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7612 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7613 // No need to generate individual map information for the pointer, it 7614 // can be associated with the combined storage if shared memory mode is 7615 // active or the base declaration is not global variable. 7616 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7617 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7618 !VD || VD->hasLocalStorage()) 7619 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7620 else 7621 FirstPointerInComplexData = true; 7622 ++I; 7623 } 7624 } 7625 7626 // Track whether a component of the list should be marked as MEMBER_OF some 7627 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7628 // in a component list should be marked as MEMBER_OF, all subsequent entries 7629 // do not belong to the base struct. E.g. 7630 // struct S2 s; 7631 // s.ps->ps->ps->f[:] 7632 // (1) (2) (3) (4) 7633 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7634 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7635 // is the pointee of ps(2) which is not member of struct s, so it should not 7636 // be marked as such (it is still PTR_AND_OBJ). 7637 // The variable is initialized to false so that PTR_AND_OBJ entries which 7638 // are not struct members are not considered (e.g. array of pointers to 7639 // data). 7640 bool ShouldBeMemberOf = false; 7641 7642 // Variable keeping track of whether or not we have encountered a component 7643 // in the component list which is a member expression. Useful when we have a 7644 // pointer or a final array section, in which case it is the previous 7645 // component in the list which tells us whether we have a member expression. 7646 // E.g. X.f[:] 7647 // While processing the final array section "[:]" it is "f" which tells us 7648 // whether we are dealing with a member of a declared struct. 7649 const MemberExpr *EncounteredME = nullptr; 7650 7651 // Track for the total number of dimension. Start from one for the dummy 7652 // dimension. 7653 uint64_t DimSize = 1; 7654 7655 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7656 7657 for (; I != CE; ++I) { 7658 // If the current component is member of a struct (parent struct) mark it. 7659 if (!EncounteredME) { 7660 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7661 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7662 // as MEMBER_OF the parent struct. 7663 if (EncounteredME) { 7664 ShouldBeMemberOf = true; 7665 // Do not emit as complex pointer if this is actually not array-like 7666 // expression. 7667 if (FirstPointerInComplexData) { 7668 QualType Ty = std::prev(I) 7669 ->getAssociatedDeclaration() 7670 ->getType() 7671 .getNonReferenceType(); 7672 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7673 FirstPointerInComplexData = false; 7674 } 7675 } 7676 } 7677 7678 auto Next = std::next(I); 7679 7680 // We need to generate the addresses and sizes if this is the last 7681 // component, if the component is a pointer or if it is an array section 7682 // whose length can't be proved to be one. If this is a pointer, it 7683 // becomes the base address for the following components. 7684 7685 // A final array section, is one whose length can't be proved to be one. 7686 // If the map item is non-contiguous then we don't treat any array section 7687 // as final array section. 7688 bool IsFinalArraySection = 7689 !IsNonContiguous && 7690 isFinalArraySectionExpression(I->getAssociatedExpression()); 7691 7692 // If we have a declaration for the mapping use that, otherwise use 7693 // the base declaration of the map clause. 7694 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7695 ? I->getAssociatedDeclaration() 7696 : BaseDecl; 7697 7698 // Get information on whether the element is a pointer. Have to do a 7699 // special treatment for array sections given that they are built-in 7700 // types. 7701 const auto *OASE = 7702 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7703 const auto *OAShE = 7704 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7705 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7706 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7707 bool IsPointer = 7708 OAShE || 7709 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7710 .getCanonicalType() 7711 ->isAnyPointerType()) || 7712 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7713 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7714 7715 if (OASE) 7716 ++DimSize; 7717 7718 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7719 // If this is not the last component, we expect the pointer to be 7720 // associated with an array expression or member expression. 7721 assert((Next == CE || 7722 isa<MemberExpr>(Next->getAssociatedExpression()) || 7723 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7724 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7725 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7726 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7727 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7728 "Unexpected expression"); 7729 7730 Address LB = Address::invalid(); 7731 if (OAShE) { 7732 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7733 CGF.getContext().getTypeAlignInChars( 7734 OAShE->getBase()->getType())); 7735 } else { 7736 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7737 .getAddress(CGF); 7738 } 7739 7740 // If this component is a pointer inside the base struct then we don't 7741 // need to create any entry for it - it will be combined with the object 7742 // it is pointing to into a single PTR_AND_OBJ entry. 7743 bool IsMemberPointerOrAddr = 7744 (IsPointer || ForDeviceAddr) && EncounteredME && 7745 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7746 EncounteredME); 7747 if (!OverlappedElements.empty()) { 7748 // Handle base element with the info for overlapped elements. 7749 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7750 assert(Next == CE && 7751 "Expected last element for the overlapped elements."); 7752 assert(!IsPointer && 7753 "Unexpected base element with the pointer type."); 7754 // Mark the whole struct as the struct that requires allocation on the 7755 // device. 7756 PartialStruct.LowestElem = {0, LB}; 7757 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7758 I->getAssociatedExpression()->getType()); 7759 Address HB = CGF.Builder.CreateConstGEP( 7760 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7761 CGF.VoidPtrTy), 7762 TypeSize.getQuantity() - 1); 7763 PartialStruct.HighestElem = { 7764 std::numeric_limits<decltype( 7765 PartialStruct.HighestElem.first)>::max(), 7766 HB}; 7767 PartialStruct.Base = BP; 7768 // Emit data for non-overlapped data. 7769 OpenMPOffloadMappingFlags Flags = 7770 OMP_MAP_MEMBER_OF | 7771 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7772 /*AddPtrFlag=*/false, 7773 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7774 LB = BP; 7775 llvm::Value *Size = nullptr; 7776 // Do bitcopy of all non-overlapped structure elements. 7777 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7778 Component : OverlappedElements) { 7779 Address ComponentLB = Address::invalid(); 7780 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7781 Component) { 7782 if (MC.getAssociatedDeclaration()) { 7783 ComponentLB = 7784 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7785 .getAddress(CGF); 7786 Size = CGF.Builder.CreatePtrDiff( 7787 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7788 CGF.EmitCastToVoidPtr(LB.getPointer())); 7789 break; 7790 } 7791 } 7792 assert(Size && "Failed to determine structure size"); 7793 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7794 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7795 CombinedInfo.Pointers.push_back(LB.getPointer()); 7796 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7797 Size, CGF.Int64Ty, /*isSigned=*/true)); 7798 CombinedInfo.Types.push_back(Flags); 7799 CombinedInfo.Mappers.push_back(nullptr); 7800 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7801 : 1); 7802 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7803 } 7804 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7805 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7806 CombinedInfo.Pointers.push_back(LB.getPointer()); 7807 Size = CGF.Builder.CreatePtrDiff( 7808 CGF.EmitCastToVoidPtr( 7809 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7810 CGF.EmitCastToVoidPtr(LB.getPointer())); 7811 CombinedInfo.Sizes.push_back( 7812 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7813 CombinedInfo.Types.push_back(Flags); 7814 CombinedInfo.Mappers.push_back(nullptr); 7815 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7816 : 1); 7817 break; 7818 } 7819 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7820 if (!IsMemberPointerOrAddr || 7821 (Next == CE && MapType != OMPC_MAP_unknown)) { 7822 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7823 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7824 CombinedInfo.Pointers.push_back(LB.getPointer()); 7825 CombinedInfo.Sizes.push_back( 7826 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7827 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7828 : 1); 7829 7830 // If Mapper is valid, the last component inherits the mapper. 7831 bool HasMapper = Mapper && Next == CE; 7832 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7833 7834 // We need to add a pointer flag for each map that comes from the 7835 // same expression except for the first one. We also need to signal 7836 // this map is the first one that relates with the current capture 7837 // (there is a set of entries for each capture). 7838 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7839 MapType, MapModifiers, MotionModifiers, IsImplicit, 7840 !IsExpressionFirstInfo || RequiresReference || 7841 FirstPointerInComplexData, 7842 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7843 7844 if (!IsExpressionFirstInfo) { 7845 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7846 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7847 if (IsPointer) 7848 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7849 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7850 7851 if (ShouldBeMemberOf) { 7852 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7853 // should be later updated with the correct value of MEMBER_OF. 7854 Flags |= OMP_MAP_MEMBER_OF; 7855 // From now on, all subsequent PTR_AND_OBJ entries should not be 7856 // marked as MEMBER_OF. 7857 ShouldBeMemberOf = false; 7858 } 7859 } 7860 7861 CombinedInfo.Types.push_back(Flags); 7862 } 7863 7864 // If we have encountered a member expression so far, keep track of the 7865 // mapped member. If the parent is "*this", then the value declaration 7866 // is nullptr. 7867 if (EncounteredME) { 7868 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7869 unsigned FieldIndex = FD->getFieldIndex(); 7870 7871 // Update info about the lowest and highest elements for this struct 7872 if (!PartialStruct.Base.isValid()) { 7873 PartialStruct.LowestElem = {FieldIndex, LB}; 7874 if (IsFinalArraySection) { 7875 Address HB = 7876 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7877 .getAddress(CGF); 7878 PartialStruct.HighestElem = {FieldIndex, HB}; 7879 } else { 7880 PartialStruct.HighestElem = {FieldIndex, LB}; 7881 } 7882 PartialStruct.Base = BP; 7883 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7884 PartialStruct.LowestElem = {FieldIndex, LB}; 7885 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7886 PartialStruct.HighestElem = {FieldIndex, LB}; 7887 } 7888 } 7889 7890 // Need to emit combined struct for array sections. 7891 if (IsFinalArraySection || IsNonContiguous) 7892 PartialStruct.IsArraySection = true; 7893 7894 // If we have a final array section, we are done with this expression. 7895 if (IsFinalArraySection) 7896 break; 7897 7898 // The pointer becomes the base for the next element. 7899 if (Next != CE) 7900 BP = LB; 7901 7902 IsExpressionFirstInfo = false; 7903 IsCaptureFirstInfo = false; 7904 FirstPointerInComplexData = false; 7905 } else if (FirstPointerInComplexData) { 7906 QualType Ty = Components.rbegin() 7907 ->getAssociatedDeclaration() 7908 ->getType() 7909 .getNonReferenceType(); 7910 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7911 FirstPointerInComplexData = false; 7912 } 7913 } 7914 7915 if (!IsNonContiguous) 7916 return; 7917 7918 const ASTContext &Context = CGF.getContext(); 7919 7920 // For supporting stride in array section, we need to initialize the first 7921 // dimension size as 1, first offset as 0, and first count as 1 7922 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7923 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7924 MapValuesArrayTy CurStrides; 7925 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7926 uint64_t ElementTypeSize; 7927 7928 // Collect Size information for each dimension and get the element size as 7929 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7930 // should be [10, 10] and the first stride is 4 btyes. 7931 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7932 Components) { 7933 const Expr *AssocExpr = Component.getAssociatedExpression(); 7934 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7935 7936 if (!OASE) 7937 continue; 7938 7939 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7940 auto *CAT = Context.getAsConstantArrayType(Ty); 7941 auto *VAT = Context.getAsVariableArrayType(Ty); 7942 7943 // We need all the dimension size except for the last dimension. 7944 assert((VAT || CAT || &Component == &*Components.begin()) && 7945 "Should be either ConstantArray or VariableArray if not the " 7946 "first Component"); 7947 7948 // Get element size if CurStrides is empty. 7949 if (CurStrides.empty()) { 7950 const Type *ElementType = nullptr; 7951 if (CAT) 7952 ElementType = CAT->getElementType().getTypePtr(); 7953 else if (VAT) 7954 ElementType = VAT->getElementType().getTypePtr(); 7955 else 7956 assert(&Component == &*Components.begin() && 7957 "Only expect pointer (non CAT or VAT) when this is the " 7958 "first Component"); 7959 // If ElementType is null, then it means the base is a pointer 7960 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7961 // for next iteration. 7962 if (ElementType) { 7963 // For the case that having pointer as base, we need to remove one 7964 // level of indirection. 7965 if (&Component != &*Components.begin()) 7966 ElementType = ElementType->getPointeeOrArrayElementType(); 7967 ElementTypeSize = 7968 Context.getTypeSizeInChars(ElementType).getQuantity(); 7969 CurStrides.push_back( 7970 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7971 } 7972 } 7973 // Get dimension value except for the last dimension since we don't need 7974 // it. 7975 if (DimSizes.size() < Components.size() - 1) { 7976 if (CAT) 7977 DimSizes.push_back(llvm::ConstantInt::get( 7978 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7979 else if (VAT) 7980 DimSizes.push_back(CGF.Builder.CreateIntCast( 7981 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7982 /*IsSigned=*/false)); 7983 } 7984 } 7985 7986 // Skip the dummy dimension since we have already have its information. 7987 auto DI = DimSizes.begin() + 1; 7988 // Product of dimension. 7989 llvm::Value *DimProd = 7990 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7991 7992 // Collect info for non-contiguous. Notice that offset, count, and stride 7993 // are only meaningful for array-section, so we insert a null for anything 7994 // other than array-section. 7995 // Also, the size of offset, count, and stride are not the same as 7996 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7997 // count, and stride are the same as the number of non-contiguous 7998 // declaration in target update to/from clause. 7999 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8000 Components) { 8001 const Expr *AssocExpr = Component.getAssociatedExpression(); 8002 8003 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8004 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8005 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8006 /*isSigned=*/false); 8007 CurOffsets.push_back(Offset); 8008 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8009 CurStrides.push_back(CurStrides.back()); 8010 continue; 8011 } 8012 8013 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8014 8015 if (!OASE) 8016 continue; 8017 8018 // Offset 8019 const Expr *OffsetExpr = OASE->getLowerBound(); 8020 llvm::Value *Offset = nullptr; 8021 if (!OffsetExpr) { 8022 // If offset is absent, then we just set it to zero. 8023 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8024 } else { 8025 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8026 CGF.Int64Ty, 8027 /*isSigned=*/false); 8028 } 8029 CurOffsets.push_back(Offset); 8030 8031 // Count 8032 const Expr *CountExpr = OASE->getLength(); 8033 llvm::Value *Count = nullptr; 8034 if (!CountExpr) { 8035 // In Clang, once a high dimension is an array section, we construct all 8036 // the lower dimension as array section, however, for case like 8037 // arr[0:2][2], Clang construct the inner dimension as an array section 8038 // but it actually is not in an array section form according to spec. 8039 if (!OASE->getColonLocFirst().isValid() && 8040 !OASE->getColonLocSecond().isValid()) { 8041 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8042 } else { 8043 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8044 // When the length is absent it defaults to ⌈(size − 8045 // lower-bound)/stride⌉, where size is the size of the array 8046 // dimension. 8047 const Expr *StrideExpr = OASE->getStride(); 8048 llvm::Value *Stride = 8049 StrideExpr 8050 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8051 CGF.Int64Ty, /*isSigned=*/false) 8052 : nullptr; 8053 if (Stride) 8054 Count = CGF.Builder.CreateUDiv( 8055 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8056 else 8057 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8058 } 8059 } else { 8060 Count = CGF.EmitScalarExpr(CountExpr); 8061 } 8062 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8063 CurCounts.push_back(Count); 8064 8065 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8066 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8067 // Offset Count Stride 8068 // D0 0 1 4 (int) <- dummy dimension 8069 // D1 0 2 8 (2 * (1) * 4) 8070 // D2 1 2 20 (1 * (1 * 5) * 4) 8071 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8072 const Expr *StrideExpr = OASE->getStride(); 8073 llvm::Value *Stride = 8074 StrideExpr 8075 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8076 CGF.Int64Ty, /*isSigned=*/false) 8077 : nullptr; 8078 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8079 if (Stride) 8080 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8081 else 8082 CurStrides.push_back(DimProd); 8083 if (DI != DimSizes.end()) 8084 ++DI; 8085 } 8086 8087 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8088 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8089 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8090 } 8091 8092 /// Return the adjusted map modifiers if the declaration a capture refers to 8093 /// appears in a first-private clause. This is expected to be used only with 8094 /// directives that start with 'target'. 8095 MappableExprsHandler::OpenMPOffloadMappingFlags 8096 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8097 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8098 8099 // A first private variable captured by reference will use only the 8100 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8101 // declaration is known as first-private in this handler. 8102 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8103 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8104 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8105 return MappableExprsHandler::OMP_MAP_ALWAYS | 8106 MappableExprsHandler::OMP_MAP_TO; 8107 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8108 return MappableExprsHandler::OMP_MAP_TO | 8109 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8110 return MappableExprsHandler::OMP_MAP_PRIVATE | 8111 MappableExprsHandler::OMP_MAP_TO; 8112 } 8113 return MappableExprsHandler::OMP_MAP_TO | 8114 MappableExprsHandler::OMP_MAP_FROM; 8115 } 8116 8117 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8118 // Rotate by getFlagMemberOffset() bits. 8119 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8120 << getFlagMemberOffset()); 8121 } 8122 8123 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8124 OpenMPOffloadMappingFlags MemberOfFlag) { 8125 // If the entry is PTR_AND_OBJ but has not been marked with the special 8126 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8127 // marked as MEMBER_OF. 8128 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8129 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8130 return; 8131 8132 // Reset the placeholder value to prepare the flag for the assignment of the 8133 // proper MEMBER_OF value. 8134 Flags &= ~OMP_MAP_MEMBER_OF; 8135 Flags |= MemberOfFlag; 8136 } 8137 8138 void getPlainLayout(const CXXRecordDecl *RD, 8139 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8140 bool AsBase) const { 8141 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8142 8143 llvm::StructType *St = 8144 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8145 8146 unsigned NumElements = St->getNumElements(); 8147 llvm::SmallVector< 8148 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8149 RecordLayout(NumElements); 8150 8151 // Fill bases. 8152 for (const auto &I : RD->bases()) { 8153 if (I.isVirtual()) 8154 continue; 8155 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8156 // Ignore empty bases. 8157 if (Base->isEmpty() || CGF.getContext() 8158 .getASTRecordLayout(Base) 8159 .getNonVirtualSize() 8160 .isZero()) 8161 continue; 8162 8163 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8164 RecordLayout[FieldIndex] = Base; 8165 } 8166 // Fill in virtual bases. 8167 for (const auto &I : RD->vbases()) { 8168 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8169 // Ignore empty bases. 8170 if (Base->isEmpty()) 8171 continue; 8172 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8173 if (RecordLayout[FieldIndex]) 8174 continue; 8175 RecordLayout[FieldIndex] = Base; 8176 } 8177 // Fill in all the fields. 8178 assert(!RD->isUnion() && "Unexpected union."); 8179 for (const auto *Field : RD->fields()) { 8180 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8181 // will fill in later.) 8182 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8183 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8184 RecordLayout[FieldIndex] = Field; 8185 } 8186 } 8187 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8188 &Data : RecordLayout) { 8189 if (Data.isNull()) 8190 continue; 8191 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8192 getPlainLayout(Base, Layout, /*AsBase=*/true); 8193 else 8194 Layout.push_back(Data.get<const FieldDecl *>()); 8195 } 8196 } 8197 8198 public: 8199 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8200 : CurDir(&Dir), CGF(CGF) { 8201 // Extract firstprivate clause information. 8202 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8203 for (const auto *D : C->varlists()) 8204 FirstPrivateDecls.try_emplace( 8205 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8206 // Extract implicit firstprivates from uses_allocators clauses. 8207 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8208 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8209 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8210 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8211 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8212 /*Implicit=*/true); 8213 else if (const auto *VD = dyn_cast<VarDecl>( 8214 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8215 ->getDecl())) 8216 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8217 } 8218 } 8219 // Extract device pointer clause information. 8220 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8221 for (auto L : C->component_lists()) 8222 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8223 } 8224 8225 /// Constructor for the declare mapper directive. 8226 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8227 : CurDir(&Dir), CGF(CGF) {} 8228 8229 /// Generate code for the combined entry if we have a partially mapped struct 8230 /// and take care of the mapping flags of the arguments corresponding to 8231 /// individual struct members. 8232 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8233 MapFlagsArrayTy &CurTypes, 8234 const StructRangeInfoTy &PartialStruct, 8235 const ValueDecl *VD = nullptr, 8236 bool NotTargetParams = true) const { 8237 if (CurTypes.size() == 1 && 8238 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8239 !PartialStruct.IsArraySection) 8240 return; 8241 CombinedInfo.Exprs.push_back(VD); 8242 // Base is the base of the struct 8243 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8244 // Pointer is the address of the lowest element 8245 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8246 CombinedInfo.Pointers.push_back(LB); 8247 // There should not be a mapper for a combined entry. 8248 CombinedInfo.Mappers.push_back(nullptr); 8249 // Size is (addr of {highest+1} element) - (addr of lowest element) 8250 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8251 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8252 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8253 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8254 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8255 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8256 /*isSigned=*/false); 8257 CombinedInfo.Sizes.push_back(Size); 8258 // Map type is always TARGET_PARAM, if generate info for captures. 8259 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8260 : OMP_MAP_TARGET_PARAM); 8261 // If any element has the present modifier, then make sure the runtime 8262 // doesn't attempt to allocate the struct. 8263 if (CurTypes.end() != 8264 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8265 return Type & OMP_MAP_PRESENT; 8266 })) 8267 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8268 // Remove TARGET_PARAM flag from the first element if any. 8269 if (!CurTypes.empty()) 8270 CurTypes.front() &= ~OMP_MAP_TARGET_PARAM; 8271 8272 // All other current entries will be MEMBER_OF the combined entry 8273 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8274 // 0xFFFF in the MEMBER_OF field). 8275 OpenMPOffloadMappingFlags MemberOfFlag = 8276 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8277 for (auto &M : CurTypes) 8278 setCorrectMemberOfFlag(M, MemberOfFlag); 8279 } 8280 8281 /// Generate all the base pointers, section pointers, sizes, map types, and 8282 /// mappers for the extracted mappable expressions (all included in \a 8283 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8284 /// pair of the relevant declaration and index where it occurs is appended to 8285 /// the device pointers info array. 8286 void generateAllInfo( 8287 MapCombinedInfoTy &CombinedInfo, 8288 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8289 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8290 // We have to process the component lists that relate with the same 8291 // declaration in a single chunk so that we can generate the map flags 8292 // correctly. Therefore, we organize all lists in a map. 8293 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8294 8295 // Helper function to fill the information map for the different supported 8296 // clauses. 8297 auto &&InfoGen = 8298 [&Info, &SkipVarSet]( 8299 const ValueDecl *D, 8300 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8301 OpenMPMapClauseKind MapType, 8302 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8303 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8304 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8305 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8306 const ValueDecl *VD = 8307 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8308 if (SkipVarSet.count(VD)) 8309 return; 8310 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 8311 ReturnDevicePointer, IsImplicit, Mapper, VarRef, 8312 ForDeviceAddr); 8313 }; 8314 8315 assert(CurDir.is<const OMPExecutableDirective *>() && 8316 "Expect a executable directive"); 8317 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8318 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8319 const auto *EI = C->getVarRefs().begin(); 8320 for (const auto L : C->component_lists()) { 8321 // The Expression is not correct if the mapping is implicit 8322 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8323 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8324 C->getMapTypeModifiers(), llvm::None, 8325 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8326 E); 8327 ++EI; 8328 } 8329 } 8330 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) { 8331 const auto *EI = C->getVarRefs().begin(); 8332 for (const auto L : C->component_lists()) { 8333 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8334 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8335 C->isImplicit(), std::get<2>(L), *EI); 8336 ++EI; 8337 } 8338 } 8339 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) { 8340 const auto *EI = C->getVarRefs().begin(); 8341 for (const auto L : C->component_lists()) { 8342 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8343 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8344 C->isImplicit(), std::get<2>(L), *EI); 8345 ++EI; 8346 } 8347 } 8348 8349 // Look at the use_device_ptr clause information and mark the existing map 8350 // entries as such. If there is no map information for an entry in the 8351 // use_device_ptr list, we create one with map type 'alloc' and zero size 8352 // section. It is the user fault if that was not mapped before. If there is 8353 // no map information and the pointer is a struct member, then we defer the 8354 // emission of that entry until the whole struct has been processed. 8355 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8356 DeferredInfo; 8357 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8358 8359 for (const auto *C : 8360 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8361 for (const auto L : C->component_lists()) { 8362 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8363 std::get<1>(L); 8364 assert(!Components.empty() && 8365 "Not expecting empty list of components!"); 8366 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8367 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8368 const Expr *IE = Components.back().getAssociatedExpression(); 8369 // If the first component is a member expression, we have to look into 8370 // 'this', which maps to null in the map of map information. Otherwise 8371 // look directly for the information. 8372 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8373 8374 // We potentially have map information for this declaration already. 8375 // Look for the first set of components that refer to it. 8376 if (It != Info.end()) { 8377 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8378 return MI.Components.back().getAssociatedDeclaration() == VD; 8379 }); 8380 // If we found a map entry, signal that the pointer has to be returned 8381 // and move on to the next declaration. 8382 // Exclude cases where the base pointer is mapped as array subscript, 8383 // array section or array shaping. The base address is passed as a 8384 // pointer to base in this case and cannot be used as a base for 8385 // use_device_ptr list item. 8386 if (CI != It->second.end()) { 8387 auto PrevCI = std::next(CI->Components.rbegin()); 8388 const auto *VarD = dyn_cast<VarDecl>(VD); 8389 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8390 isa<MemberExpr>(IE) || 8391 !VD->getType().getNonReferenceType()->isPointerType() || 8392 PrevCI == CI->Components.rend() || 8393 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8394 VarD->hasLocalStorage()) { 8395 CI->ReturnDevicePointer = true; 8396 continue; 8397 } 8398 } 8399 } 8400 8401 // We didn't find any match in our map information - generate a zero 8402 // size array section - if the pointer is a struct member we defer this 8403 // action until the whole struct has been processed. 8404 if (isa<MemberExpr>(IE)) { 8405 // Insert the pointer into Info to be processed by 8406 // generateInfoForComponentList. Because it is a member pointer 8407 // without a pointee, no entry will be generated for it, therefore 8408 // we need to generate one after the whole struct has been processed. 8409 // Nonetheless, generateInfoForComponentList must be called to take 8410 // the pointer into account for the calculation of the range of the 8411 // partial struct. 8412 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8413 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8414 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8415 } else { 8416 llvm::Value *Ptr = 8417 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8418 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8419 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8420 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8421 UseDevicePtrCombinedInfo.Sizes.push_back( 8422 llvm::Constant::getNullValue(CGF.Int64Ty)); 8423 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8424 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8425 } 8426 } 8427 } 8428 8429 // Look at the use_device_addr clause information and mark the existing map 8430 // entries as such. If there is no map information for an entry in the 8431 // use_device_addr list, we create one with map type 'alloc' and zero size 8432 // section. It is the user fault if that was not mapped before. If there is 8433 // no map information and the pointer is a struct member, then we defer the 8434 // emission of that entry until the whole struct has been processed. 8435 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8436 for (const auto *C : 8437 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8438 for (const auto L : C->component_lists()) { 8439 assert(!std::get<1>(L).empty() && 8440 "Not expecting empty list of components!"); 8441 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8442 if (!Processed.insert(VD).second) 8443 continue; 8444 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8445 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8446 // If the first component is a member expression, we have to look into 8447 // 'this', which maps to null in the map of map information. Otherwise 8448 // look directly for the information. 8449 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8450 8451 // We potentially have map information for this declaration already. 8452 // Look for the first set of components that refer to it. 8453 if (It != Info.end()) { 8454 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8455 return MI.Components.back().getAssociatedDeclaration() == VD; 8456 }); 8457 // If we found a map entry, signal that the pointer has to be returned 8458 // and move on to the next declaration. 8459 if (CI != It->second.end()) { 8460 CI->ReturnDevicePointer = true; 8461 continue; 8462 } 8463 } 8464 8465 // We didn't find any match in our map information - generate a zero 8466 // size array section - if the pointer is a struct member we defer this 8467 // action until the whole struct has been processed. 8468 if (isa<MemberExpr>(IE)) { 8469 // Insert the pointer into Info to be processed by 8470 // generateInfoForComponentList. Because it is a member pointer 8471 // without a pointee, no entry will be generated for it, therefore 8472 // we need to generate one after the whole struct has been processed. 8473 // Nonetheless, generateInfoForComponentList must be called to take 8474 // the pointer into account for the calculation of the range of the 8475 // partial struct. 8476 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8477 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8478 nullptr, nullptr, /*ForDeviceAddr=*/true); 8479 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8480 } else { 8481 llvm::Value *Ptr; 8482 if (IE->isGLValue()) 8483 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8484 else 8485 Ptr = CGF.EmitScalarExpr(IE); 8486 CombinedInfo.Exprs.push_back(VD); 8487 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8488 CombinedInfo.Pointers.push_back(Ptr); 8489 CombinedInfo.Sizes.push_back( 8490 llvm::Constant::getNullValue(CGF.Int64Ty)); 8491 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8492 CombinedInfo.Mappers.push_back(nullptr); 8493 } 8494 } 8495 } 8496 8497 for (const auto &M : Info) { 8498 // Underlying variable declaration used in the map clause. 8499 const ValueDecl *VD = std::get<0>(M); 8500 8501 // Temporary generated information. 8502 MapCombinedInfoTy CurInfo; 8503 StructRangeInfoTy PartialStruct; 8504 8505 for (const MapInfo &L : M.second) { 8506 assert(!L.Components.empty() && 8507 "Not expecting declaration with no component lists."); 8508 8509 // Remember the current base pointer index. 8510 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8511 CurInfo.NonContigInfo.IsNonContiguous = 8512 L.Components.back().isNonContiguous(); 8513 generateInfoForComponentList( 8514 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, 8515 PartialStruct, /*IsFirstComponentList=*/false, L.IsImplicit, 8516 L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8517 8518 // If this entry relates with a device pointer, set the relevant 8519 // declaration and add the 'return pointer' flag. 8520 if (L.ReturnDevicePointer) { 8521 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8522 "Unexpected number of mapped base pointers."); 8523 8524 const ValueDecl *RelevantVD = 8525 L.Components.back().getAssociatedDeclaration(); 8526 assert(RelevantVD && 8527 "No relevant declaration related with device pointer??"); 8528 8529 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8530 RelevantVD); 8531 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8532 } 8533 } 8534 8535 // Append any pending zero-length pointers which are struct members and 8536 // used with use_device_ptr or use_device_addr. 8537 auto CI = DeferredInfo.find(M.first); 8538 if (CI != DeferredInfo.end()) { 8539 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8540 llvm::Value *BasePtr; 8541 llvm::Value *Ptr; 8542 if (L.ForDeviceAddr) { 8543 if (L.IE->isGLValue()) 8544 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8545 else 8546 Ptr = this->CGF.EmitScalarExpr(L.IE); 8547 BasePtr = Ptr; 8548 // Entry is RETURN_PARAM. Also, set the placeholder value 8549 // MEMBER_OF=FFFF so that the entry is later updated with the 8550 // correct value of MEMBER_OF. 8551 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8552 } else { 8553 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8554 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8555 L.IE->getExprLoc()); 8556 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8557 // value MEMBER_OF=FFFF so that the entry is later updated with the 8558 // correct value of MEMBER_OF. 8559 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8560 OMP_MAP_MEMBER_OF); 8561 } 8562 CurInfo.Exprs.push_back(L.VD); 8563 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8564 CurInfo.Pointers.push_back(Ptr); 8565 CurInfo.Sizes.push_back( 8566 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8567 CurInfo.Mappers.push_back(nullptr); 8568 } 8569 } 8570 8571 // If there is an entry in PartialStruct it means we have a struct with 8572 // individual members mapped. Emit an extra combined entry. 8573 if (PartialStruct.Base.isValid()) 8574 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8575 8576 // We need to append the results of this capture to what we already have. 8577 CombinedInfo.append(CurInfo); 8578 } 8579 // Append data for use_device_ptr clauses. 8580 CombinedInfo.append(UseDevicePtrCombinedInfo); 8581 } 8582 8583 /// Generate all the base pointers, section pointers, sizes, map types, and 8584 /// mappers for the extracted map clauses of user-defined mapper (all included 8585 /// in \a CombinedInfo). 8586 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8587 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8588 "Expect a declare mapper directive"); 8589 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8590 // We have to process the component lists that relate with the same 8591 // declaration in a single chunk so that we can generate the map flags 8592 // correctly. Therefore, we organize all lists in a map. 8593 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8594 8595 // Fill the information map for map clauses. 8596 for (const auto *C : CurMapperDir->clauselists()) { 8597 const auto *MC = cast<OMPMapClause>(C); 8598 const auto *EI = MC->getVarRefs().begin(); 8599 for (const auto L : MC->component_lists()) { 8600 // The Expression is not correct if the mapping is implicit 8601 const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr; 8602 const ValueDecl *VD = 8603 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8604 : nullptr; 8605 // Get the corresponding user-defined mapper. 8606 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8607 MC->getMapTypeModifiers(), llvm::None, 8608 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8609 std::get<2>(L), E); 8610 ++EI; 8611 } 8612 } 8613 8614 for (const auto &M : Info) { 8615 // We need to know when we generate information for the first component 8616 // associated with a capture, because the mapping flags depend on it. 8617 bool IsFirstComponentList = true; 8618 8619 // Underlying variable declaration used in the map clause. 8620 const ValueDecl *VD = std::get<0>(M); 8621 8622 // Temporary generated information. 8623 MapCombinedInfoTy CurInfo; 8624 StructRangeInfoTy PartialStruct; 8625 8626 for (const MapInfo &L : M.second) { 8627 assert(!L.Components.empty() && 8628 "Not expecting declaration with no component lists."); 8629 generateInfoForComponentList( 8630 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, 8631 PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper, 8632 L.ForDeviceAddr, VD, L.VarRef); 8633 IsFirstComponentList = false; 8634 } 8635 8636 // If there is an entry in PartialStruct it means we have a struct with 8637 // individual members mapped. Emit an extra combined entry. 8638 if (PartialStruct.Base.isValid()) { 8639 CurInfo.NonContigInfo.Dims.push_back(0); 8640 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8641 } 8642 8643 // We need to append the results of this capture to what we already have. 8644 CombinedInfo.append(CurInfo); 8645 } 8646 } 8647 8648 /// Emit capture info for lambdas for variables captured by reference. 8649 void generateInfoForLambdaCaptures( 8650 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8651 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8652 const auto *RD = VD->getType() 8653 .getCanonicalType() 8654 .getNonReferenceType() 8655 ->getAsCXXRecordDecl(); 8656 if (!RD || !RD->isLambda()) 8657 return; 8658 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8659 LValue VDLVal = CGF.MakeAddrLValue( 8660 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8661 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8662 FieldDecl *ThisCapture = nullptr; 8663 RD->getCaptureFields(Captures, ThisCapture); 8664 if (ThisCapture) { 8665 LValue ThisLVal = 8666 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8667 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8668 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8669 VDLVal.getPointer(CGF)); 8670 CombinedInfo.Exprs.push_back(VD); 8671 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8672 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8673 CombinedInfo.Sizes.push_back( 8674 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8675 CGF.Int64Ty, /*isSigned=*/true)); 8676 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8677 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8678 CombinedInfo.Mappers.push_back(nullptr); 8679 } 8680 for (const LambdaCapture &LC : RD->captures()) { 8681 if (!LC.capturesVariable()) 8682 continue; 8683 const VarDecl *VD = LC.getCapturedVar(); 8684 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8685 continue; 8686 auto It = Captures.find(VD); 8687 assert(It != Captures.end() && "Found lambda capture without field."); 8688 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8689 if (LC.getCaptureKind() == LCK_ByRef) { 8690 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8691 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8692 VDLVal.getPointer(CGF)); 8693 CombinedInfo.Exprs.push_back(VD); 8694 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8695 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8696 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8697 CGF.getTypeSize( 8698 VD->getType().getCanonicalType().getNonReferenceType()), 8699 CGF.Int64Ty, /*isSigned=*/true)); 8700 } else { 8701 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8702 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8703 VDLVal.getPointer(CGF)); 8704 CombinedInfo.Exprs.push_back(VD); 8705 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8706 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8707 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8708 } 8709 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8710 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8711 CombinedInfo.Mappers.push_back(nullptr); 8712 } 8713 } 8714 8715 /// Set correct indices for lambdas captures. 8716 void adjustMemberOfForLambdaCaptures( 8717 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8718 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8719 MapFlagsArrayTy &Types) const { 8720 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8721 // Set correct member_of idx for all implicit lambda captures. 8722 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8723 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8724 continue; 8725 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8726 assert(BasePtr && "Unable to find base lambda address."); 8727 int TgtIdx = -1; 8728 for (unsigned J = I; J > 0; --J) { 8729 unsigned Idx = J - 1; 8730 if (Pointers[Idx] != BasePtr) 8731 continue; 8732 TgtIdx = Idx; 8733 break; 8734 } 8735 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8736 // All other current entries will be MEMBER_OF the combined entry 8737 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8738 // 0xFFFF in the MEMBER_OF field). 8739 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8740 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8741 } 8742 } 8743 8744 /// Generate the base pointers, section pointers, sizes, map types, and 8745 /// mappers associated to a given capture (all included in \a CombinedInfo). 8746 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8747 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8748 StructRangeInfoTy &PartialStruct) const { 8749 assert(!Cap->capturesVariableArrayType() && 8750 "Not expecting to generate map info for a variable array type!"); 8751 8752 // We need to know when we generating information for the first component 8753 const ValueDecl *VD = Cap->capturesThis() 8754 ? nullptr 8755 : Cap->getCapturedVar()->getCanonicalDecl(); 8756 8757 // If this declaration appears in a is_device_ptr clause we just have to 8758 // pass the pointer by value. If it is a reference to a declaration, we just 8759 // pass its value. 8760 if (DevPointersMap.count(VD)) { 8761 CombinedInfo.Exprs.push_back(VD); 8762 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8763 CombinedInfo.Pointers.push_back(Arg); 8764 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8765 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8766 /*isSigned=*/true)); 8767 CombinedInfo.Types.push_back( 8768 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8769 OMP_MAP_TARGET_PARAM); 8770 CombinedInfo.Mappers.push_back(nullptr); 8771 return; 8772 } 8773 8774 using MapData = 8775 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8776 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8777 const ValueDecl *, const Expr *>; 8778 SmallVector<MapData, 4> DeclComponentLists; 8779 assert(CurDir.is<const OMPExecutableDirective *>() && 8780 "Expect a executable directive"); 8781 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8782 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8783 const auto *EI = C->getVarRefs().begin(); 8784 for (const auto L : C->decl_component_lists(VD)) { 8785 const ValueDecl *VDecl, *Mapper; 8786 // The Expression is not correct if the mapping is implicit 8787 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8788 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8789 std::tie(VDecl, Components, Mapper) = L; 8790 assert(VDecl == VD && "We got information for the wrong declaration??"); 8791 assert(!Components.empty() && 8792 "Not expecting declaration with no component lists."); 8793 DeclComponentLists.emplace_back(Components, C->getMapType(), 8794 C->getMapTypeModifiers(), 8795 C->isImplicit(), Mapper, E); 8796 ++EI; 8797 } 8798 } 8799 8800 // Find overlapping elements (including the offset from the base element). 8801 llvm::SmallDenseMap< 8802 const MapData *, 8803 llvm::SmallVector< 8804 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8805 4> 8806 OverlappedData; 8807 size_t Count = 0; 8808 for (const MapData &L : DeclComponentLists) { 8809 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8810 OpenMPMapClauseKind MapType; 8811 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8812 bool IsImplicit; 8813 const ValueDecl *Mapper; 8814 const Expr *VarRef; 8815 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8816 L; 8817 ++Count; 8818 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8819 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8820 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8821 VarRef) = L1; 8822 auto CI = Components.rbegin(); 8823 auto CE = Components.rend(); 8824 auto SI = Components1.rbegin(); 8825 auto SE = Components1.rend(); 8826 for (; CI != CE && SI != SE; ++CI, ++SI) { 8827 if (CI->getAssociatedExpression()->getStmtClass() != 8828 SI->getAssociatedExpression()->getStmtClass()) 8829 break; 8830 // Are we dealing with different variables/fields? 8831 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8832 break; 8833 } 8834 // Found overlapping if, at least for one component, reached the head of 8835 // the components list. 8836 if (CI == CE || SI == SE) { 8837 assert((CI != CE || SI != SE) && 8838 "Unexpected full match of the mapping components."); 8839 const MapData &BaseData = CI == CE ? L : L1; 8840 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8841 SI == SE ? Components : Components1; 8842 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8843 OverlappedElements.getSecond().push_back(SubData); 8844 } 8845 } 8846 } 8847 // Sort the overlapped elements for each item. 8848 llvm::SmallVector<const FieldDecl *, 4> Layout; 8849 if (!OverlappedData.empty()) { 8850 if (const auto *CRD = 8851 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8852 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8853 else { 8854 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8855 Layout.append(RD->field_begin(), RD->field_end()); 8856 } 8857 } 8858 for (auto &Pair : OverlappedData) { 8859 llvm::sort( 8860 Pair.getSecond(), 8861 [&Layout]( 8862 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8863 OMPClauseMappableExprCommon::MappableExprComponentListRef 8864 Second) { 8865 auto CI = First.rbegin(); 8866 auto CE = First.rend(); 8867 auto SI = Second.rbegin(); 8868 auto SE = Second.rend(); 8869 for (; CI != CE && SI != SE; ++CI, ++SI) { 8870 if (CI->getAssociatedExpression()->getStmtClass() != 8871 SI->getAssociatedExpression()->getStmtClass()) 8872 break; 8873 // Are we dealing with different variables/fields? 8874 if (CI->getAssociatedDeclaration() != 8875 SI->getAssociatedDeclaration()) 8876 break; 8877 } 8878 8879 // Lists contain the same elements. 8880 if (CI == CE && SI == SE) 8881 return false; 8882 8883 // List with less elements is less than list with more elements. 8884 if (CI == CE || SI == SE) 8885 return CI == CE; 8886 8887 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8888 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8889 if (FD1->getParent() == FD2->getParent()) 8890 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8891 const auto It = 8892 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8893 return FD == FD1 || FD == FD2; 8894 }); 8895 return *It == FD1; 8896 }); 8897 } 8898 8899 // Associated with a capture, because the mapping flags depend on it. 8900 // Go through all of the elements with the overlapped elements. 8901 for (const auto &Pair : OverlappedData) { 8902 const MapData &L = *Pair.getFirst(); 8903 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8904 OpenMPMapClauseKind MapType; 8905 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8906 bool IsImplicit; 8907 const ValueDecl *Mapper; 8908 const Expr *VarRef; 8909 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8910 L; 8911 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8912 OverlappedComponents = Pair.getSecond(); 8913 bool IsFirstComponentList = true; 8914 generateInfoForComponentList( 8915 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8916 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8917 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8918 } 8919 // Go through other elements without overlapped elements. 8920 bool IsFirstComponentList = OverlappedData.empty(); 8921 for (const MapData &L : DeclComponentLists) { 8922 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8923 OpenMPMapClauseKind MapType; 8924 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8925 bool IsImplicit; 8926 const ValueDecl *Mapper; 8927 const Expr *VarRef; 8928 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8929 L; 8930 auto It = OverlappedData.find(&L); 8931 if (It == OverlappedData.end()) 8932 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8933 Components, CombinedInfo, PartialStruct, 8934 IsFirstComponentList, IsImplicit, Mapper, 8935 /*ForDeviceAddr=*/false, VD, VarRef); 8936 IsFirstComponentList = false; 8937 } 8938 } 8939 8940 /// Generate the default map information for a given capture \a CI, 8941 /// record field declaration \a RI and captured value \a CV. 8942 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8943 const FieldDecl &RI, llvm::Value *CV, 8944 MapCombinedInfoTy &CombinedInfo) const { 8945 bool IsImplicit = true; 8946 // Do the default mapping. 8947 if (CI.capturesThis()) { 8948 CombinedInfo.Exprs.push_back(nullptr); 8949 CombinedInfo.BasePointers.push_back(CV); 8950 CombinedInfo.Pointers.push_back(CV); 8951 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8952 CombinedInfo.Sizes.push_back( 8953 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8954 CGF.Int64Ty, /*isSigned=*/true)); 8955 // Default map type. 8956 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8957 } else if (CI.capturesVariableByCopy()) { 8958 const VarDecl *VD = CI.getCapturedVar(); 8959 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8960 CombinedInfo.BasePointers.push_back(CV); 8961 CombinedInfo.Pointers.push_back(CV); 8962 if (!RI.getType()->isAnyPointerType()) { 8963 // We have to signal to the runtime captures passed by value that are 8964 // not pointers. 8965 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8966 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8967 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8968 } else { 8969 // Pointers are implicitly mapped with a zero size and no flags 8970 // (other than first map that is added for all implicit maps). 8971 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8972 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8973 } 8974 auto I = FirstPrivateDecls.find(VD); 8975 if (I != FirstPrivateDecls.end()) 8976 IsImplicit = I->getSecond(); 8977 } else { 8978 assert(CI.capturesVariable() && "Expected captured reference."); 8979 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8980 QualType ElementType = PtrTy->getPointeeType(); 8981 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8982 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8983 // The default map type for a scalar/complex type is 'to' because by 8984 // default the value doesn't have to be retrieved. For an aggregate 8985 // type, the default is 'tofrom'. 8986 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8987 const VarDecl *VD = CI.getCapturedVar(); 8988 auto I = FirstPrivateDecls.find(VD); 8989 if (I != FirstPrivateDecls.end() && 8990 VD->getType().isConstant(CGF.getContext())) { 8991 llvm::Constant *Addr = 8992 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8993 // Copy the value of the original variable to the new global copy. 8994 CGF.Builder.CreateMemCpy( 8995 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8996 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8997 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 8998 // Use new global variable as the base pointers. 8999 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9000 CombinedInfo.BasePointers.push_back(Addr); 9001 CombinedInfo.Pointers.push_back(Addr); 9002 } else { 9003 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9004 CombinedInfo.BasePointers.push_back(CV); 9005 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9006 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9007 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9008 AlignmentSource::Decl)); 9009 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9010 } else { 9011 CombinedInfo.Pointers.push_back(CV); 9012 } 9013 } 9014 if (I != FirstPrivateDecls.end()) 9015 IsImplicit = I->getSecond(); 9016 } 9017 // Every default map produces a single argument which is a target parameter. 9018 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9019 9020 // Add flag stating this is an implicit map. 9021 if (IsImplicit) 9022 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9023 9024 // No user-defined mapper for default mapping. 9025 CombinedInfo.Mappers.push_back(nullptr); 9026 } 9027 }; 9028 } // anonymous namespace 9029 9030 static void emitNonContiguousDescriptor( 9031 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9032 CGOpenMPRuntime::TargetDataInfo &Info) { 9033 CodeGenModule &CGM = CGF.CGM; 9034 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9035 &NonContigInfo = CombinedInfo.NonContigInfo; 9036 9037 // Build an array of struct descriptor_dim and then assign it to 9038 // offload_args. 9039 // 9040 // struct descriptor_dim { 9041 // uint64_t offset; 9042 // uint64_t count; 9043 // uint64_t stride 9044 // }; 9045 ASTContext &C = CGF.getContext(); 9046 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9047 RecordDecl *RD; 9048 RD = C.buildImplicitRecord("descriptor_dim"); 9049 RD->startDefinition(); 9050 addFieldToRecordDecl(C, RD, Int64Ty); 9051 addFieldToRecordDecl(C, RD, Int64Ty); 9052 addFieldToRecordDecl(C, RD, Int64Ty); 9053 RD->completeDefinition(); 9054 QualType DimTy = C.getRecordType(RD); 9055 9056 enum { OffsetFD = 0, CountFD, StrideFD }; 9057 // We need two index variable here since the size of "Dims" is the same as the 9058 // size of Components, however, the size of offset, count, and stride is equal 9059 // to the size of base declaration that is non-contiguous. 9060 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9061 // Skip emitting ir if dimension size is 1 since it cannot be 9062 // non-contiguous. 9063 if (NonContigInfo.Dims[I] == 1) 9064 continue; 9065 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9066 QualType ArrayTy = 9067 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9068 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9069 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9070 unsigned RevIdx = EE - II - 1; 9071 LValue DimsLVal = CGF.MakeAddrLValue( 9072 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9073 // Offset 9074 LValue OffsetLVal = CGF.EmitLValueForField( 9075 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9076 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9077 // Count 9078 LValue CountLVal = CGF.EmitLValueForField( 9079 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9080 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9081 // Stride 9082 LValue StrideLVal = CGF.EmitLValueForField( 9083 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9084 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9085 } 9086 // args[I] = &dims 9087 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9088 DimsAddr, CGM.Int8PtrTy); 9089 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9090 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9091 Info.PointersArray, 0, I); 9092 Address PAddr(P, CGF.getPointerAlign()); 9093 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9094 ++L; 9095 } 9096 } 9097 9098 /// Emit a string constant containing the names of the values mapped to the 9099 /// offloading runtime library. 9100 llvm::Constant * 9101 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9102 MappableExprsHandler::MappingExprInfo &MapExprs) { 9103 llvm::Constant *SrcLocStr; 9104 if (!MapExprs.getMapDecl()) { 9105 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9106 } else { 9107 std::string ExprName = ""; 9108 if (MapExprs.getMapExpr()) { 9109 PrintingPolicy P(CGF.getContext().getLangOpts()); 9110 llvm::raw_string_ostream OS(ExprName); 9111 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9112 OS.flush(); 9113 } else { 9114 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9115 } 9116 9117 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9118 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9119 const char *FileName = PLoc.getFilename(); 9120 unsigned Line = PLoc.getLine(); 9121 unsigned Column = PLoc.getColumn(); 9122 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9123 Line, Column); 9124 } 9125 9126 return SrcLocStr; 9127 } 9128 9129 /// Emit the arrays used to pass the captures and map information to the 9130 /// offloading runtime library. If there is no map or capture information, 9131 /// return nullptr by reference. 9132 static void emitOffloadingArrays( 9133 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9134 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9135 bool IsNonContiguous = false) { 9136 CodeGenModule &CGM = CGF.CGM; 9137 ASTContext &Ctx = CGF.getContext(); 9138 9139 // Reset the array information. 9140 Info.clearArrayInfo(); 9141 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9142 9143 if (Info.NumberOfPtrs) { 9144 // Detect if we have any capture size requiring runtime evaluation of the 9145 // size so that a constant array could be eventually used. 9146 bool hasRuntimeEvaluationCaptureSize = false; 9147 for (llvm::Value *S : CombinedInfo.Sizes) 9148 if (!isa<llvm::Constant>(S)) { 9149 hasRuntimeEvaluationCaptureSize = true; 9150 break; 9151 } 9152 9153 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9154 QualType PointerArrayType = Ctx.getConstantArrayType( 9155 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9156 /*IndexTypeQuals=*/0); 9157 9158 Info.BasePointersArray = 9159 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9160 Info.PointersArray = 9161 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9162 Address MappersArray = 9163 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9164 Info.MappersArray = MappersArray.getPointer(); 9165 9166 // If we don't have any VLA types or other types that require runtime 9167 // evaluation, we can use a constant array for the map sizes, otherwise we 9168 // need to fill up the arrays as we do for the pointers. 9169 QualType Int64Ty = 9170 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9171 if (hasRuntimeEvaluationCaptureSize) { 9172 QualType SizeArrayType = Ctx.getConstantArrayType( 9173 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9174 /*IndexTypeQuals=*/0); 9175 Info.SizesArray = 9176 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9177 } else { 9178 // We expect all the sizes to be constant, so we collect them to create 9179 // a constant array. 9180 SmallVector<llvm::Constant *, 16> ConstSizes; 9181 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9182 if (IsNonContiguous && 9183 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9184 ConstSizes.push_back(llvm::ConstantInt::get( 9185 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9186 } else { 9187 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9188 } 9189 } 9190 9191 auto *SizesArrayInit = llvm::ConstantArray::get( 9192 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9193 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9194 auto *SizesArrayGbl = new llvm::GlobalVariable( 9195 CGM.getModule(), SizesArrayInit->getType(), 9196 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9197 SizesArrayInit, Name); 9198 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9199 Info.SizesArray = SizesArrayGbl; 9200 } 9201 9202 // The map types are always constant so we don't need to generate code to 9203 // fill arrays. Instead, we create an array constant. 9204 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9205 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9206 llvm::Constant *MapTypesArrayInit = 9207 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9208 std::string MaptypesName = 9209 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9210 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9211 CGM.getModule(), MapTypesArrayInit->getType(), 9212 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9213 MapTypesArrayInit, MaptypesName); 9214 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9215 Info.MapTypesArray = MapTypesArrayGbl; 9216 9217 // The information types are only built if there is debug information 9218 // requested. 9219 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9220 Info.MapNamesArray = llvm::Constant::getNullValue( 9221 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9222 } else { 9223 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9224 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9225 }; 9226 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9227 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9228 9229 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9230 llvm::ArrayType::get( 9231 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9232 CombinedInfo.Exprs.size()), 9233 InfoMap); 9234 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9235 CGM.getModule(), MapNamesArrayInit->getType(), 9236 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9237 MapNamesArrayInit, 9238 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9239 Info.MapNamesArray = MapNamesArrayGbl; 9240 } 9241 9242 // If there's a present map type modifier, it must not be applied to the end 9243 // of a region, so generate a separate map type array in that case. 9244 if (Info.separateBeginEndCalls()) { 9245 bool EndMapTypesDiffer = false; 9246 for (uint64_t &Type : Mapping) { 9247 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9248 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9249 EndMapTypesDiffer = true; 9250 } 9251 } 9252 if (EndMapTypesDiffer) { 9253 MapTypesArrayInit = 9254 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9255 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9256 MapTypesArrayGbl = new llvm::GlobalVariable( 9257 CGM.getModule(), MapTypesArrayInit->getType(), 9258 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9259 MapTypesArrayInit, MaptypesName); 9260 MapTypesArrayGbl->setUnnamedAddr( 9261 llvm::GlobalValue::UnnamedAddr::Global); 9262 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9263 } 9264 } 9265 9266 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9267 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9268 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9269 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9270 Info.BasePointersArray, 0, I); 9271 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9272 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9273 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9274 CGF.Builder.CreateStore(BPVal, BPAddr); 9275 9276 if (Info.requiresDevicePointerInfo()) 9277 if (const ValueDecl *DevVD = 9278 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9279 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9280 9281 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9282 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9283 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9284 Info.PointersArray, 0, I); 9285 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9286 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9287 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9288 CGF.Builder.CreateStore(PVal, PAddr); 9289 9290 if (hasRuntimeEvaluationCaptureSize) { 9291 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9292 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9293 Info.SizesArray, 9294 /*Idx0=*/0, 9295 /*Idx1=*/I); 9296 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9297 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9298 CGM.Int64Ty, 9299 /*isSigned=*/true), 9300 SAddr); 9301 } 9302 9303 // Fill up the mapper array. 9304 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9305 if (CombinedInfo.Mappers[I]) { 9306 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9307 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9308 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9309 Info.HasMapper = true; 9310 } 9311 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9312 CGF.Builder.CreateStore(MFunc, MAddr); 9313 } 9314 } 9315 9316 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9317 Info.NumberOfPtrs == 0) 9318 return; 9319 9320 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9321 } 9322 9323 namespace { 9324 /// Additional arguments for emitOffloadingArraysArgument function. 9325 struct ArgumentsOptions { 9326 bool ForEndCall = false; 9327 ArgumentsOptions() = default; 9328 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9329 }; 9330 } // namespace 9331 9332 /// Emit the arguments to be passed to the runtime library based on the 9333 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9334 /// ForEndCall, emit map types to be passed for the end of the region instead of 9335 /// the beginning. 9336 static void emitOffloadingArraysArgument( 9337 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9338 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9339 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9340 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9341 const ArgumentsOptions &Options = ArgumentsOptions()) { 9342 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9343 "expected region end call to runtime only when end call is separate"); 9344 CodeGenModule &CGM = CGF.CGM; 9345 if (Info.NumberOfPtrs) { 9346 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9347 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9348 Info.BasePointersArray, 9349 /*Idx0=*/0, /*Idx1=*/0); 9350 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9351 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9352 Info.PointersArray, 9353 /*Idx0=*/0, 9354 /*Idx1=*/0); 9355 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9356 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9357 /*Idx0=*/0, /*Idx1=*/0); 9358 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9359 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9360 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9361 : Info.MapTypesArray, 9362 /*Idx0=*/0, 9363 /*Idx1=*/0); 9364 9365 // Only emit the mapper information arrays if debug information is 9366 // requested. 9367 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9368 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9369 else 9370 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9371 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9372 Info.MapNamesArray, 9373 /*Idx0=*/0, 9374 /*Idx1=*/0); 9375 // If there is no user-defined mapper, set the mapper array to nullptr to 9376 // avoid an unnecessary data privatization 9377 if (!Info.HasMapper) 9378 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9379 else 9380 MappersArrayArg = 9381 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9382 } else { 9383 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9384 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9385 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9386 MapTypesArrayArg = 9387 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9388 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9389 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9390 } 9391 } 9392 9393 /// Check for inner distribute directive. 9394 static const OMPExecutableDirective * 9395 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9396 const auto *CS = D.getInnermostCapturedStmt(); 9397 const auto *Body = 9398 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9399 const Stmt *ChildStmt = 9400 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9401 9402 if (const auto *NestedDir = 9403 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9404 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9405 switch (D.getDirectiveKind()) { 9406 case OMPD_target: 9407 if (isOpenMPDistributeDirective(DKind)) 9408 return NestedDir; 9409 if (DKind == OMPD_teams) { 9410 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9411 /*IgnoreCaptured=*/true); 9412 if (!Body) 9413 return nullptr; 9414 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9415 if (const auto *NND = 9416 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9417 DKind = NND->getDirectiveKind(); 9418 if (isOpenMPDistributeDirective(DKind)) 9419 return NND; 9420 } 9421 } 9422 return nullptr; 9423 case OMPD_target_teams: 9424 if (isOpenMPDistributeDirective(DKind)) 9425 return NestedDir; 9426 return nullptr; 9427 case OMPD_target_parallel: 9428 case OMPD_target_simd: 9429 case OMPD_target_parallel_for: 9430 case OMPD_target_parallel_for_simd: 9431 return nullptr; 9432 case OMPD_target_teams_distribute: 9433 case OMPD_target_teams_distribute_simd: 9434 case OMPD_target_teams_distribute_parallel_for: 9435 case OMPD_target_teams_distribute_parallel_for_simd: 9436 case OMPD_parallel: 9437 case OMPD_for: 9438 case OMPD_parallel_for: 9439 case OMPD_parallel_master: 9440 case OMPD_parallel_sections: 9441 case OMPD_for_simd: 9442 case OMPD_parallel_for_simd: 9443 case OMPD_cancel: 9444 case OMPD_cancellation_point: 9445 case OMPD_ordered: 9446 case OMPD_threadprivate: 9447 case OMPD_allocate: 9448 case OMPD_task: 9449 case OMPD_simd: 9450 case OMPD_sections: 9451 case OMPD_section: 9452 case OMPD_single: 9453 case OMPD_master: 9454 case OMPD_critical: 9455 case OMPD_taskyield: 9456 case OMPD_barrier: 9457 case OMPD_taskwait: 9458 case OMPD_taskgroup: 9459 case OMPD_atomic: 9460 case OMPD_flush: 9461 case OMPD_depobj: 9462 case OMPD_scan: 9463 case OMPD_teams: 9464 case OMPD_target_data: 9465 case OMPD_target_exit_data: 9466 case OMPD_target_enter_data: 9467 case OMPD_distribute: 9468 case OMPD_distribute_simd: 9469 case OMPD_distribute_parallel_for: 9470 case OMPD_distribute_parallel_for_simd: 9471 case OMPD_teams_distribute: 9472 case OMPD_teams_distribute_simd: 9473 case OMPD_teams_distribute_parallel_for: 9474 case OMPD_teams_distribute_parallel_for_simd: 9475 case OMPD_target_update: 9476 case OMPD_declare_simd: 9477 case OMPD_declare_variant: 9478 case OMPD_begin_declare_variant: 9479 case OMPD_end_declare_variant: 9480 case OMPD_declare_target: 9481 case OMPD_end_declare_target: 9482 case OMPD_declare_reduction: 9483 case OMPD_declare_mapper: 9484 case OMPD_taskloop: 9485 case OMPD_taskloop_simd: 9486 case OMPD_master_taskloop: 9487 case OMPD_master_taskloop_simd: 9488 case OMPD_parallel_master_taskloop: 9489 case OMPD_parallel_master_taskloop_simd: 9490 case OMPD_requires: 9491 case OMPD_unknown: 9492 default: 9493 llvm_unreachable("Unexpected directive."); 9494 } 9495 } 9496 9497 return nullptr; 9498 } 9499 9500 /// Emit the user-defined mapper function. The code generation follows the 9501 /// pattern in the example below. 9502 /// \code 9503 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9504 /// void *base, void *begin, 9505 /// int64_t size, int64_t type) { 9506 /// // Allocate space for an array section first. 9507 /// if (size > 1 && !maptype.IsDelete) 9508 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9509 /// size*sizeof(Ty), clearToFrom(type)); 9510 /// // Map members. 9511 /// for (unsigned i = 0; i < size; i++) { 9512 /// // For each component specified by this mapper: 9513 /// for (auto c : all_components) { 9514 /// if (c.hasMapper()) 9515 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9516 /// c.arg_type); 9517 /// else 9518 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9519 /// c.arg_begin, c.arg_size, c.arg_type); 9520 /// } 9521 /// } 9522 /// // Delete the array section. 9523 /// if (size > 1 && maptype.IsDelete) 9524 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9525 /// size*sizeof(Ty), clearToFrom(type)); 9526 /// } 9527 /// \endcode 9528 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9529 CodeGenFunction *CGF) { 9530 if (UDMMap.count(D) > 0) 9531 return; 9532 ASTContext &C = CGM.getContext(); 9533 QualType Ty = D->getType(); 9534 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9535 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9536 auto *MapperVarDecl = 9537 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9538 SourceLocation Loc = D->getLocation(); 9539 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9540 9541 // Prepare mapper function arguments and attributes. 9542 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9543 C.VoidPtrTy, ImplicitParamDecl::Other); 9544 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9545 ImplicitParamDecl::Other); 9546 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9547 C.VoidPtrTy, ImplicitParamDecl::Other); 9548 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9549 ImplicitParamDecl::Other); 9550 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9551 ImplicitParamDecl::Other); 9552 FunctionArgList Args; 9553 Args.push_back(&HandleArg); 9554 Args.push_back(&BaseArg); 9555 Args.push_back(&BeginArg); 9556 Args.push_back(&SizeArg); 9557 Args.push_back(&TypeArg); 9558 const CGFunctionInfo &FnInfo = 9559 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9560 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9561 SmallString<64> TyStr; 9562 llvm::raw_svector_ostream Out(TyStr); 9563 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9564 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9565 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9566 Name, &CGM.getModule()); 9567 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9568 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9569 // Start the mapper function code generation. 9570 CodeGenFunction MapperCGF(CGM); 9571 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9572 // Compute the starting and end addreses of array elements. 9573 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9574 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9575 C.getPointerType(Int64Ty), Loc); 9576 // Convert the size in bytes into the number of array elements. 9577 Size = MapperCGF.Builder.CreateExactUDiv( 9578 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9579 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9580 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9581 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9582 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9583 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9584 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9585 C.getPointerType(Int64Ty), Loc); 9586 // Prepare common arguments for array initiation and deletion. 9587 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9588 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9589 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9590 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9591 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9592 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9593 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9594 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9595 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9596 9597 // Emit array initiation if this is an array section and \p MapType indicates 9598 // that memory allocation is required. 9599 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9600 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9601 ElementSize, HeadBB, /*IsInit=*/true); 9602 9603 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9604 9605 // Emit the loop header block. 9606 MapperCGF.EmitBlock(HeadBB); 9607 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9608 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9609 // Evaluate whether the initial condition is satisfied. 9610 llvm::Value *IsEmpty = 9611 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9612 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9613 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9614 9615 // Emit the loop body block. 9616 MapperCGF.EmitBlock(BodyBB); 9617 llvm::BasicBlock *LastBB = BodyBB; 9618 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9619 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9620 PtrPHI->addIncoming(PtrBegin, EntryBB); 9621 Address PtrCurrent = 9622 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9623 .getAlignment() 9624 .alignmentOfArrayElement(ElementSize)); 9625 // Privatize the declared variable of mapper to be the current array element. 9626 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9627 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9628 return MapperCGF 9629 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9630 .getAddress(MapperCGF); 9631 }); 9632 (void)Scope.Privatize(); 9633 9634 // Get map clause information. Fill up the arrays with all mapped variables. 9635 MappableExprsHandler::MapCombinedInfoTy Info; 9636 MappableExprsHandler MEHandler(*D, MapperCGF); 9637 MEHandler.generateAllInfoForMapper(Info); 9638 9639 // Call the runtime API __tgt_mapper_num_components to get the number of 9640 // pre-existing components. 9641 llvm::Value *OffloadingArgs[] = {Handle}; 9642 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9643 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9644 OMPRTL___tgt_mapper_num_components), 9645 OffloadingArgs); 9646 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9647 PreviousSize, 9648 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9649 9650 // Fill up the runtime mapper handle for all components. 9651 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9652 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9653 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9654 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9655 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9656 llvm::Value *CurSizeArg = Info.Sizes[I]; 9657 9658 // Extract the MEMBER_OF field from the map type. 9659 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9660 MapperCGF.EmitBlock(MemberBB); 9661 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9662 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9663 OriMapType, 9664 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9665 llvm::BasicBlock *MemberCombineBB = 9666 MapperCGF.createBasicBlock("omp.member.combine"); 9667 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9668 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9669 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9670 // Add the number of pre-existing components to the MEMBER_OF field if it 9671 // is valid. 9672 MapperCGF.EmitBlock(MemberCombineBB); 9673 llvm::Value *CombinedMember = 9674 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9675 // Do nothing if it is not a member of previous components. 9676 MapperCGF.EmitBlock(TypeBB); 9677 llvm::PHINode *MemberMapType = 9678 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9679 MemberMapType->addIncoming(OriMapType, MemberBB); 9680 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9681 9682 // Combine the map type inherited from user-defined mapper with that 9683 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9684 // bits of the \a MapType, which is the input argument of the mapper 9685 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9686 // bits of MemberMapType. 9687 // [OpenMP 5.0], 1.2.6. map-type decay. 9688 // | alloc | to | from | tofrom | release | delete 9689 // ---------------------------------------------------------- 9690 // alloc | alloc | alloc | alloc | alloc | release | delete 9691 // to | alloc | to | alloc | to | release | delete 9692 // from | alloc | alloc | from | from | release | delete 9693 // tofrom | alloc | to | from | tofrom | release | delete 9694 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9695 MapType, 9696 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9697 MappableExprsHandler::OMP_MAP_FROM)); 9698 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9699 llvm::BasicBlock *AllocElseBB = 9700 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9701 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9702 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9703 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9704 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9705 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9706 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9707 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9708 MapperCGF.EmitBlock(AllocBB); 9709 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9710 MemberMapType, 9711 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9712 MappableExprsHandler::OMP_MAP_FROM))); 9713 MapperCGF.Builder.CreateBr(EndBB); 9714 MapperCGF.EmitBlock(AllocElseBB); 9715 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9716 LeftToFrom, 9717 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9718 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9719 // In case of to, clear OMP_MAP_FROM. 9720 MapperCGF.EmitBlock(ToBB); 9721 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9722 MemberMapType, 9723 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9724 MapperCGF.Builder.CreateBr(EndBB); 9725 MapperCGF.EmitBlock(ToElseBB); 9726 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9727 LeftToFrom, 9728 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9729 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9730 // In case of from, clear OMP_MAP_TO. 9731 MapperCGF.EmitBlock(FromBB); 9732 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9733 MemberMapType, 9734 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9735 // In case of tofrom, do nothing. 9736 MapperCGF.EmitBlock(EndBB); 9737 LastBB = EndBB; 9738 llvm::PHINode *CurMapType = 9739 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9740 CurMapType->addIncoming(AllocMapType, AllocBB); 9741 CurMapType->addIncoming(ToMapType, ToBB); 9742 CurMapType->addIncoming(FromMapType, FromBB); 9743 CurMapType->addIncoming(MemberMapType, ToElseBB); 9744 9745 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9746 CurSizeArg, CurMapType}; 9747 if (Info.Mappers[I]) { 9748 // Call the corresponding mapper function. 9749 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9750 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9751 assert(MapperFunc && "Expect a valid mapper function is available."); 9752 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9753 } else { 9754 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9755 // data structure. 9756 MapperCGF.EmitRuntimeCall( 9757 OMPBuilder.getOrCreateRuntimeFunction( 9758 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9759 OffloadingArgs); 9760 } 9761 } 9762 9763 // Update the pointer to point to the next element that needs to be mapped, 9764 // and check whether we have mapped all elements. 9765 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9766 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9767 PtrPHI->addIncoming(PtrNext, LastBB); 9768 llvm::Value *IsDone = 9769 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9770 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9771 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9772 9773 MapperCGF.EmitBlock(ExitBB); 9774 // Emit array deletion if this is an array section and \p MapType indicates 9775 // that deletion is required. 9776 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9777 ElementSize, DoneBB, /*IsInit=*/false); 9778 9779 // Emit the function exit block. 9780 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9781 MapperCGF.FinishFunction(); 9782 UDMMap.try_emplace(D, Fn); 9783 if (CGF) { 9784 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9785 Decls.second.push_back(D); 9786 } 9787 } 9788 9789 /// Emit the array initialization or deletion portion for user-defined mapper 9790 /// code generation. First, it evaluates whether an array section is mapped and 9791 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9792 /// true, and \a MapType indicates to not delete this array, array 9793 /// initialization code is generated. If \a IsInit is false, and \a MapType 9794 /// indicates to not this array, array deletion code is generated. 9795 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9796 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9797 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9798 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9799 StringRef Prefix = IsInit ? ".init" : ".del"; 9800 9801 // Evaluate if this is an array section. 9802 llvm::BasicBlock *IsDeleteBB = 9803 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9804 llvm::BasicBlock *BodyBB = 9805 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9806 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9807 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9808 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9809 9810 // Evaluate if we are going to delete this section. 9811 MapperCGF.EmitBlock(IsDeleteBB); 9812 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9813 MapType, 9814 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9815 llvm::Value *DeleteCond; 9816 if (IsInit) { 9817 DeleteCond = MapperCGF.Builder.CreateIsNull( 9818 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9819 } else { 9820 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9821 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9822 } 9823 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9824 9825 MapperCGF.EmitBlock(BodyBB); 9826 // Get the array size by multiplying element size and element number (i.e., \p 9827 // Size). 9828 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9829 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9830 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9831 // memory allocation/deletion purpose only. 9832 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9833 MapType, 9834 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9835 MappableExprsHandler::OMP_MAP_FROM))); 9836 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9837 // data structure. 9838 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9839 MapperCGF.EmitRuntimeCall( 9840 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9841 OMPRTL___tgt_push_mapper_component), 9842 OffloadingArgs); 9843 } 9844 9845 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9846 const OMPDeclareMapperDecl *D) { 9847 auto I = UDMMap.find(D); 9848 if (I != UDMMap.end()) 9849 return I->second; 9850 emitUserDefinedMapper(D); 9851 return UDMMap.lookup(D); 9852 } 9853 9854 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9855 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9856 llvm::Value *DeviceID, 9857 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9858 const OMPLoopDirective &D)> 9859 SizeEmitter) { 9860 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9861 const OMPExecutableDirective *TD = &D; 9862 // Get nested teams distribute kind directive, if any. 9863 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9864 TD = getNestedDistributeDirective(CGM.getContext(), D); 9865 if (!TD) 9866 return; 9867 const auto *LD = cast<OMPLoopDirective>(TD); 9868 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 9869 PrePostActionTy &) { 9870 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9871 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9872 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 9873 CGF.EmitRuntimeCall( 9874 OMPBuilder.getOrCreateRuntimeFunction( 9875 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9876 Args); 9877 } 9878 }; 9879 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9880 } 9881 9882 void CGOpenMPRuntime::emitTargetCall( 9883 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9884 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9885 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9886 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9887 const OMPLoopDirective &D)> 9888 SizeEmitter) { 9889 if (!CGF.HaveInsertPoint()) 9890 return; 9891 9892 assert(OutlinedFn && "Invalid outlined function!"); 9893 9894 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9895 D.hasClausesOfKind<OMPNowaitClause>(); 9896 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9897 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9898 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9899 PrePostActionTy &) { 9900 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9901 }; 9902 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9903 9904 CodeGenFunction::OMPTargetDataInfo InputInfo; 9905 llvm::Value *MapTypesArray = nullptr; 9906 llvm::Value *MapNamesArray = nullptr; 9907 // Fill up the pointer arrays and transfer execution to the device. 9908 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9909 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 9910 &CapturedVars, 9911 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9912 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9913 // Reverse offloading is not supported, so just execute on the host. 9914 if (RequiresOuterTask) { 9915 CapturedVars.clear(); 9916 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9917 } 9918 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9919 return; 9920 } 9921 9922 // On top of the arrays that were filled up, the target offloading call 9923 // takes as arguments the device id as well as the host pointer. The host 9924 // pointer is used by the runtime library to identify the current target 9925 // region, so it only has to be unique and not necessarily point to 9926 // anything. It could be the pointer to the outlined function that 9927 // implements the target region, but we aren't using that so that the 9928 // compiler doesn't need to keep that, and could therefore inline the host 9929 // function if proven worthwhile during optimization. 9930 9931 // From this point on, we need to have an ID of the target region defined. 9932 assert(OutlinedFnID && "Invalid outlined function ID!"); 9933 9934 // Emit device ID if any. 9935 llvm::Value *DeviceID; 9936 if (Device.getPointer()) { 9937 assert((Device.getInt() == OMPC_DEVICE_unknown || 9938 Device.getInt() == OMPC_DEVICE_device_num) && 9939 "Expected device_num modifier."); 9940 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9941 DeviceID = 9942 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9943 } else { 9944 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9945 } 9946 9947 // Emit the number of elements in the offloading arrays. 9948 llvm::Value *PointerNum = 9949 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9950 9951 // Return value of the runtime offloading call. 9952 llvm::Value *Return; 9953 9954 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9955 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9956 9957 // Source location for the ident struct 9958 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9959 9960 // Emit tripcount for the target loop-based directive. 9961 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9962 9963 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9964 // The target region is an outlined function launched by the runtime 9965 // via calls __tgt_target() or __tgt_target_teams(). 9966 // 9967 // __tgt_target() launches a target region with one team and one thread, 9968 // executing a serial region. This master thread may in turn launch 9969 // more threads within its team upon encountering a parallel region, 9970 // however, no additional teams can be launched on the device. 9971 // 9972 // __tgt_target_teams() launches a target region with one or more teams, 9973 // each with one or more threads. This call is required for target 9974 // constructs such as: 9975 // 'target teams' 9976 // 'target' / 'teams' 9977 // 'target teams distribute parallel for' 9978 // 'target parallel' 9979 // and so on. 9980 // 9981 // Note that on the host and CPU targets, the runtime implementation of 9982 // these calls simply call the outlined function without forking threads. 9983 // The outlined functions themselves have runtime calls to 9984 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9985 // the compiler in emitTeamsCall() and emitParallelCall(). 9986 // 9987 // In contrast, on the NVPTX target, the implementation of 9988 // __tgt_target_teams() launches a GPU kernel with the requested number 9989 // of teams and threads so no additional calls to the runtime are required. 9990 if (NumTeams) { 9991 // If we have NumTeams defined this means that we have an enclosed teams 9992 // region. Therefore we also expect to have NumThreads defined. These two 9993 // values should be defined in the presence of a teams directive, 9994 // regardless of having any clauses associated. If the user is using teams 9995 // but no clauses, these two values will be the default that should be 9996 // passed to the runtime library - a 32-bit integer with the value zero. 9997 assert(NumThreads && "Thread limit expression should be available along " 9998 "with number of teams."); 9999 llvm::Value *OffloadingArgs[] = {RTLoc, 10000 DeviceID, 10001 OutlinedFnID, 10002 PointerNum, 10003 InputInfo.BasePointersArray.getPointer(), 10004 InputInfo.PointersArray.getPointer(), 10005 InputInfo.SizesArray.getPointer(), 10006 MapTypesArray, 10007 MapNamesArray, 10008 InputInfo.MappersArray.getPointer(), 10009 NumTeams, 10010 NumThreads}; 10011 Return = CGF.EmitRuntimeCall( 10012 OMPBuilder.getOrCreateRuntimeFunction( 10013 CGM.getModule(), HasNowait 10014 ? OMPRTL___tgt_target_teams_nowait_mapper 10015 : OMPRTL___tgt_target_teams_mapper), 10016 OffloadingArgs); 10017 } else { 10018 llvm::Value *OffloadingArgs[] = {RTLoc, 10019 DeviceID, 10020 OutlinedFnID, 10021 PointerNum, 10022 InputInfo.BasePointersArray.getPointer(), 10023 InputInfo.PointersArray.getPointer(), 10024 InputInfo.SizesArray.getPointer(), 10025 MapTypesArray, 10026 MapNamesArray, 10027 InputInfo.MappersArray.getPointer()}; 10028 Return = CGF.EmitRuntimeCall( 10029 OMPBuilder.getOrCreateRuntimeFunction( 10030 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10031 : OMPRTL___tgt_target_mapper), 10032 OffloadingArgs); 10033 } 10034 10035 // Check the error code and execute the host version if required. 10036 llvm::BasicBlock *OffloadFailedBlock = 10037 CGF.createBasicBlock("omp_offload.failed"); 10038 llvm::BasicBlock *OffloadContBlock = 10039 CGF.createBasicBlock("omp_offload.cont"); 10040 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10041 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10042 10043 CGF.EmitBlock(OffloadFailedBlock); 10044 if (RequiresOuterTask) { 10045 CapturedVars.clear(); 10046 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10047 } 10048 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10049 CGF.EmitBranch(OffloadContBlock); 10050 10051 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10052 }; 10053 10054 // Notify that the host version must be executed. 10055 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10056 RequiresOuterTask](CodeGenFunction &CGF, 10057 PrePostActionTy &) { 10058 if (RequiresOuterTask) { 10059 CapturedVars.clear(); 10060 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10061 } 10062 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10063 }; 10064 10065 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10066 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10067 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10068 // Fill up the arrays with all the captured variables. 10069 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10070 10071 // Get mappable expression information. 10072 MappableExprsHandler MEHandler(D, CGF); 10073 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10074 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10075 10076 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10077 auto CV = CapturedVars.begin(); 10078 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10079 CE = CS.capture_end(); 10080 CI != CE; ++CI, ++RI, ++CV) { 10081 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10082 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10083 10084 // VLA sizes are passed to the outlined region by copy and do not have map 10085 // information associated. 10086 if (CI->capturesVariableArrayType()) { 10087 CurInfo.Exprs.push_back(nullptr); 10088 CurInfo.BasePointers.push_back(*CV); 10089 CurInfo.Pointers.push_back(*CV); 10090 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10091 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10092 // Copy to the device as an argument. No need to retrieve it. 10093 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10094 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10095 MappableExprsHandler::OMP_MAP_IMPLICIT); 10096 CurInfo.Mappers.push_back(nullptr); 10097 } else { 10098 // If we have any information in the map clause, we use it, otherwise we 10099 // just do a default mapping. 10100 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10101 if (!CI->capturesThis()) 10102 MappedVarSet.insert(CI->getCapturedVar()); 10103 else 10104 MappedVarSet.insert(nullptr); 10105 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10106 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10107 // Generate correct mapping for variables captured by reference in 10108 // lambdas. 10109 if (CI->capturesVariable()) 10110 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10111 CurInfo, LambdaPointers); 10112 } 10113 // We expect to have at least an element of information for this capture. 10114 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10115 "Non-existing map pointer for capture!"); 10116 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10117 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10118 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10119 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10120 "Inconsistent map information sizes!"); 10121 10122 // If there is an entry in PartialStruct it means we have a struct with 10123 // individual members mapped. Emit an extra combined entry. 10124 if (PartialStruct.Base.isValid()) 10125 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, 10126 nullptr, /*NoTargetParam=*/false); 10127 10128 // We need to append the results of this capture to what we already have. 10129 CombinedInfo.append(CurInfo); 10130 } 10131 // Adjust MEMBER_OF flags for the lambdas captures. 10132 MEHandler.adjustMemberOfForLambdaCaptures( 10133 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10134 CombinedInfo.Types); 10135 // Map any list items in a map clause that were not captures because they 10136 // weren't referenced within the construct. 10137 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10138 10139 TargetDataInfo Info; 10140 // Fill up the arrays and create the arguments. 10141 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10142 emitOffloadingArraysArgument( 10143 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10144 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10145 {/*ForEndTask=*/false}); 10146 10147 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10148 InputInfo.BasePointersArray = 10149 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10150 InputInfo.PointersArray = 10151 Address(Info.PointersArray, CGM.getPointerAlign()); 10152 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10153 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10154 MapTypesArray = Info.MapTypesArray; 10155 MapNamesArray = Info.MapNamesArray; 10156 if (RequiresOuterTask) 10157 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10158 else 10159 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10160 }; 10161 10162 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10163 CodeGenFunction &CGF, PrePostActionTy &) { 10164 if (RequiresOuterTask) { 10165 CodeGenFunction::OMPTargetDataInfo InputInfo; 10166 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10167 } else { 10168 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10169 } 10170 }; 10171 10172 // If we have a target function ID it means that we need to support 10173 // offloading, otherwise, just execute on the host. We need to execute on host 10174 // regardless of the conditional in the if clause if, e.g., the user do not 10175 // specify target triples. 10176 if (OutlinedFnID) { 10177 if (IfCond) { 10178 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10179 } else { 10180 RegionCodeGenTy ThenRCG(TargetThenGen); 10181 ThenRCG(CGF); 10182 } 10183 } else { 10184 RegionCodeGenTy ElseRCG(TargetElseGen); 10185 ElseRCG(CGF); 10186 } 10187 } 10188 10189 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10190 StringRef ParentName) { 10191 if (!S) 10192 return; 10193 10194 // Codegen OMP target directives that offload compute to the device. 10195 bool RequiresDeviceCodegen = 10196 isa<OMPExecutableDirective>(S) && 10197 isOpenMPTargetExecutionDirective( 10198 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10199 10200 if (RequiresDeviceCodegen) { 10201 const auto &E = *cast<OMPExecutableDirective>(S); 10202 unsigned DeviceID; 10203 unsigned FileID; 10204 unsigned Line; 10205 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10206 FileID, Line); 10207 10208 // Is this a target region that should not be emitted as an entry point? If 10209 // so just signal we are done with this target region. 10210 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10211 ParentName, Line)) 10212 return; 10213 10214 switch (E.getDirectiveKind()) { 10215 case OMPD_target: 10216 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10217 cast<OMPTargetDirective>(E)); 10218 break; 10219 case OMPD_target_parallel: 10220 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10221 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10222 break; 10223 case OMPD_target_teams: 10224 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10225 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10226 break; 10227 case OMPD_target_teams_distribute: 10228 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10229 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10230 break; 10231 case OMPD_target_teams_distribute_simd: 10232 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10233 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10234 break; 10235 case OMPD_target_parallel_for: 10236 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10237 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10238 break; 10239 case OMPD_target_parallel_for_simd: 10240 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10241 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10242 break; 10243 case OMPD_target_simd: 10244 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10245 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10246 break; 10247 case OMPD_target_teams_distribute_parallel_for: 10248 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10249 CGM, ParentName, 10250 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10251 break; 10252 case OMPD_target_teams_distribute_parallel_for_simd: 10253 CodeGenFunction:: 10254 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10255 CGM, ParentName, 10256 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10257 break; 10258 case OMPD_parallel: 10259 case OMPD_for: 10260 case OMPD_parallel_for: 10261 case OMPD_parallel_master: 10262 case OMPD_parallel_sections: 10263 case OMPD_for_simd: 10264 case OMPD_parallel_for_simd: 10265 case OMPD_cancel: 10266 case OMPD_cancellation_point: 10267 case OMPD_ordered: 10268 case OMPD_threadprivate: 10269 case OMPD_allocate: 10270 case OMPD_task: 10271 case OMPD_simd: 10272 case OMPD_sections: 10273 case OMPD_section: 10274 case OMPD_single: 10275 case OMPD_master: 10276 case OMPD_critical: 10277 case OMPD_taskyield: 10278 case OMPD_barrier: 10279 case OMPD_taskwait: 10280 case OMPD_taskgroup: 10281 case OMPD_atomic: 10282 case OMPD_flush: 10283 case OMPD_depobj: 10284 case OMPD_scan: 10285 case OMPD_teams: 10286 case OMPD_target_data: 10287 case OMPD_target_exit_data: 10288 case OMPD_target_enter_data: 10289 case OMPD_distribute: 10290 case OMPD_distribute_simd: 10291 case OMPD_distribute_parallel_for: 10292 case OMPD_distribute_parallel_for_simd: 10293 case OMPD_teams_distribute: 10294 case OMPD_teams_distribute_simd: 10295 case OMPD_teams_distribute_parallel_for: 10296 case OMPD_teams_distribute_parallel_for_simd: 10297 case OMPD_target_update: 10298 case OMPD_declare_simd: 10299 case OMPD_declare_variant: 10300 case OMPD_begin_declare_variant: 10301 case OMPD_end_declare_variant: 10302 case OMPD_declare_target: 10303 case OMPD_end_declare_target: 10304 case OMPD_declare_reduction: 10305 case OMPD_declare_mapper: 10306 case OMPD_taskloop: 10307 case OMPD_taskloop_simd: 10308 case OMPD_master_taskloop: 10309 case OMPD_master_taskloop_simd: 10310 case OMPD_parallel_master_taskloop: 10311 case OMPD_parallel_master_taskloop_simd: 10312 case OMPD_requires: 10313 case OMPD_unknown: 10314 default: 10315 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10316 } 10317 return; 10318 } 10319 10320 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10321 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10322 return; 10323 10324 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10325 return; 10326 } 10327 10328 // If this is a lambda function, look into its body. 10329 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10330 S = L->getBody(); 10331 10332 // Keep looking for target regions recursively. 10333 for (const Stmt *II : S->children()) 10334 scanForTargetRegionsFunctions(II, ParentName); 10335 } 10336 10337 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10338 // If emitting code for the host, we do not process FD here. Instead we do 10339 // the normal code generation. 10340 if (!CGM.getLangOpts().OpenMPIsDevice) { 10341 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10342 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10343 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10344 // Do not emit device_type(nohost) functions for the host. 10345 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10346 return true; 10347 } 10348 return false; 10349 } 10350 10351 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10352 // Try to detect target regions in the function. 10353 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10354 StringRef Name = CGM.getMangledName(GD); 10355 scanForTargetRegionsFunctions(FD->getBody(), Name); 10356 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10357 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10358 // Do not emit device_type(nohost) functions for the host. 10359 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10360 return true; 10361 } 10362 10363 // Do not to emit function if it is not marked as declare target. 10364 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10365 AlreadyEmittedTargetDecls.count(VD) == 0; 10366 } 10367 10368 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10369 if (!CGM.getLangOpts().OpenMPIsDevice) 10370 return false; 10371 10372 // Check if there are Ctors/Dtors in this declaration and look for target 10373 // regions in it. We use the complete variant to produce the kernel name 10374 // mangling. 10375 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10376 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10377 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10378 StringRef ParentName = 10379 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10380 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10381 } 10382 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10383 StringRef ParentName = 10384 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10385 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10386 } 10387 } 10388 10389 // Do not to emit variable if it is not marked as declare target. 10390 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10391 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10392 cast<VarDecl>(GD.getDecl())); 10393 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10394 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10395 HasRequiresUnifiedSharedMemory)) { 10396 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10397 return true; 10398 } 10399 return false; 10400 } 10401 10402 llvm::Constant * 10403 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10404 const VarDecl *VD) { 10405 assert(VD->getType().isConstant(CGM.getContext()) && 10406 "Expected constant variable."); 10407 StringRef VarName; 10408 llvm::Constant *Addr; 10409 llvm::GlobalValue::LinkageTypes Linkage; 10410 QualType Ty = VD->getType(); 10411 SmallString<128> Buffer; 10412 { 10413 unsigned DeviceID; 10414 unsigned FileID; 10415 unsigned Line; 10416 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10417 FileID, Line); 10418 llvm::raw_svector_ostream OS(Buffer); 10419 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10420 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10421 VarName = OS.str(); 10422 } 10423 Linkage = llvm::GlobalValue::InternalLinkage; 10424 Addr = 10425 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10426 getDefaultFirstprivateAddressSpace()); 10427 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10428 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10429 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10430 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10431 VarName, Addr, VarSize, 10432 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10433 return Addr; 10434 } 10435 10436 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10437 llvm::Constant *Addr) { 10438 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10439 !CGM.getLangOpts().OpenMPIsDevice) 10440 return; 10441 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10442 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10443 if (!Res) { 10444 if (CGM.getLangOpts().OpenMPIsDevice) { 10445 // Register non-target variables being emitted in device code (debug info 10446 // may cause this). 10447 StringRef VarName = CGM.getMangledName(VD); 10448 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10449 } 10450 return; 10451 } 10452 // Register declare target variables. 10453 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10454 StringRef VarName; 10455 CharUnits VarSize; 10456 llvm::GlobalValue::LinkageTypes Linkage; 10457 10458 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10459 !HasRequiresUnifiedSharedMemory) { 10460 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10461 VarName = CGM.getMangledName(VD); 10462 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10463 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10464 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10465 } else { 10466 VarSize = CharUnits::Zero(); 10467 } 10468 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10469 // Temp solution to prevent optimizations of the internal variables. 10470 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10471 std::string RefName = getName({VarName, "ref"}); 10472 if (!CGM.GetGlobalValue(RefName)) { 10473 llvm::Constant *AddrRef = 10474 getOrCreateInternalVariable(Addr->getType(), RefName); 10475 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10476 GVAddrRef->setConstant(/*Val=*/true); 10477 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10478 GVAddrRef->setInitializer(Addr); 10479 CGM.addCompilerUsedGlobal(GVAddrRef); 10480 } 10481 } 10482 } else { 10483 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10484 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10485 HasRequiresUnifiedSharedMemory)) && 10486 "Declare target attribute must link or to with unified memory."); 10487 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10488 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10489 else 10490 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10491 10492 if (CGM.getLangOpts().OpenMPIsDevice) { 10493 VarName = Addr->getName(); 10494 Addr = nullptr; 10495 } else { 10496 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10497 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10498 } 10499 VarSize = CGM.getPointerSize(); 10500 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10501 } 10502 10503 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10504 VarName, Addr, VarSize, Flags, Linkage); 10505 } 10506 10507 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10508 if (isa<FunctionDecl>(GD.getDecl()) || 10509 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10510 return emitTargetFunctions(GD); 10511 10512 return emitTargetGlobalVariable(GD); 10513 } 10514 10515 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10516 for (const VarDecl *VD : DeferredGlobalVariables) { 10517 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10519 if (!Res) 10520 continue; 10521 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10522 !HasRequiresUnifiedSharedMemory) { 10523 CGM.EmitGlobal(VD); 10524 } else { 10525 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10526 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10527 HasRequiresUnifiedSharedMemory)) && 10528 "Expected link clause or to clause with unified memory."); 10529 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10530 } 10531 } 10532 } 10533 10534 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10535 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10536 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10537 " Expected target-based directive."); 10538 } 10539 10540 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10541 for (const OMPClause *Clause : D->clauselists()) { 10542 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10543 HasRequiresUnifiedSharedMemory = true; 10544 } else if (const auto *AC = 10545 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10546 switch (AC->getAtomicDefaultMemOrderKind()) { 10547 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10548 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10549 break; 10550 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10551 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10552 break; 10553 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10554 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10555 break; 10556 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10557 break; 10558 } 10559 } 10560 } 10561 } 10562 10563 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10564 return RequiresAtomicOrdering; 10565 } 10566 10567 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10568 LangAS &AS) { 10569 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10570 return false; 10571 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10572 switch(A->getAllocatorType()) { 10573 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10574 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10575 // Not supported, fallback to the default mem space. 10576 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10577 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10578 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10579 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10580 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10581 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10582 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10583 AS = LangAS::Default; 10584 return true; 10585 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10586 llvm_unreachable("Expected predefined allocator for the variables with the " 10587 "static storage."); 10588 } 10589 return false; 10590 } 10591 10592 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10593 return HasRequiresUnifiedSharedMemory; 10594 } 10595 10596 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10597 CodeGenModule &CGM) 10598 : CGM(CGM) { 10599 if (CGM.getLangOpts().OpenMPIsDevice) { 10600 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10601 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10602 } 10603 } 10604 10605 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10606 if (CGM.getLangOpts().OpenMPIsDevice) 10607 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10608 } 10609 10610 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10611 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10612 return true; 10613 10614 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10615 // Do not to emit function if it is marked as declare target as it was already 10616 // emitted. 10617 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10618 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10619 if (auto *F = dyn_cast_or_null<llvm::Function>( 10620 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10621 return !F->isDeclaration(); 10622 return false; 10623 } 10624 return true; 10625 } 10626 10627 return !AlreadyEmittedTargetDecls.insert(D).second; 10628 } 10629 10630 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10631 // If we don't have entries or if we are emitting code for the device, we 10632 // don't need to do anything. 10633 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10634 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10635 (OffloadEntriesInfoManager.empty() && 10636 !HasEmittedDeclareTargetRegion && 10637 !HasEmittedTargetRegion)) 10638 return nullptr; 10639 10640 // Create and register the function that handles the requires directives. 10641 ASTContext &C = CGM.getContext(); 10642 10643 llvm::Function *RequiresRegFn; 10644 { 10645 CodeGenFunction CGF(CGM); 10646 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10647 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10648 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10649 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10650 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10651 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10652 // TODO: check for other requires clauses. 10653 // The requires directive takes effect only when a target region is 10654 // present in the compilation unit. Otherwise it is ignored and not 10655 // passed to the runtime. This avoids the runtime from throwing an error 10656 // for mismatching requires clauses across compilation units that don't 10657 // contain at least 1 target region. 10658 assert((HasEmittedTargetRegion || 10659 HasEmittedDeclareTargetRegion || 10660 !OffloadEntriesInfoManager.empty()) && 10661 "Target or declare target region expected."); 10662 if (HasRequiresUnifiedSharedMemory) 10663 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10664 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10665 CGM.getModule(), OMPRTL___tgt_register_requires), 10666 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10667 CGF.FinishFunction(); 10668 } 10669 return RequiresRegFn; 10670 } 10671 10672 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10673 const OMPExecutableDirective &D, 10674 SourceLocation Loc, 10675 llvm::Function *OutlinedFn, 10676 ArrayRef<llvm::Value *> CapturedVars) { 10677 if (!CGF.HaveInsertPoint()) 10678 return; 10679 10680 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10681 CodeGenFunction::RunCleanupsScope Scope(CGF); 10682 10683 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10684 llvm::Value *Args[] = { 10685 RTLoc, 10686 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10687 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10688 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10689 RealArgs.append(std::begin(Args), std::end(Args)); 10690 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10691 10692 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10693 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10694 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10695 } 10696 10697 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10698 const Expr *NumTeams, 10699 const Expr *ThreadLimit, 10700 SourceLocation Loc) { 10701 if (!CGF.HaveInsertPoint()) 10702 return; 10703 10704 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10705 10706 llvm::Value *NumTeamsVal = 10707 NumTeams 10708 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10709 CGF.CGM.Int32Ty, /* isSigned = */ true) 10710 : CGF.Builder.getInt32(0); 10711 10712 llvm::Value *ThreadLimitVal = 10713 ThreadLimit 10714 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10715 CGF.CGM.Int32Ty, /* isSigned = */ true) 10716 : CGF.Builder.getInt32(0); 10717 10718 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10719 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10720 ThreadLimitVal}; 10721 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10722 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10723 PushNumTeamsArgs); 10724 } 10725 10726 void CGOpenMPRuntime::emitTargetDataCalls( 10727 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10728 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10729 if (!CGF.HaveInsertPoint()) 10730 return; 10731 10732 // Action used to replace the default codegen action and turn privatization 10733 // off. 10734 PrePostActionTy NoPrivAction; 10735 10736 // Generate the code for the opening of the data environment. Capture all the 10737 // arguments of the runtime call by reference because they are used in the 10738 // closing of the region. 10739 auto &&BeginThenGen = [this, &D, Device, &Info, 10740 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10741 // Fill up the arrays with all the mapped variables. 10742 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10743 10744 // Get map clause information. 10745 MappableExprsHandler MEHandler(D, CGF); 10746 MEHandler.generateAllInfo(CombinedInfo); 10747 10748 // Fill up the arrays and create the arguments. 10749 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10750 /*IsNonContiguous=*/true); 10751 10752 llvm::Value *BasePointersArrayArg = nullptr; 10753 llvm::Value *PointersArrayArg = nullptr; 10754 llvm::Value *SizesArrayArg = nullptr; 10755 llvm::Value *MapTypesArrayArg = nullptr; 10756 llvm::Value *MapNamesArrayArg = nullptr; 10757 llvm::Value *MappersArrayArg = nullptr; 10758 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10759 SizesArrayArg, MapTypesArrayArg, 10760 MapNamesArrayArg, MappersArrayArg, Info); 10761 10762 // Emit device ID if any. 10763 llvm::Value *DeviceID = nullptr; 10764 if (Device) { 10765 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10766 CGF.Int64Ty, /*isSigned=*/true); 10767 } else { 10768 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10769 } 10770 10771 // Emit the number of elements in the offloading arrays. 10772 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10773 // 10774 // Source location for the ident struct 10775 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10776 10777 llvm::Value *OffloadingArgs[] = {RTLoc, 10778 DeviceID, 10779 PointerNum, 10780 BasePointersArrayArg, 10781 PointersArrayArg, 10782 SizesArrayArg, 10783 MapTypesArrayArg, 10784 MapNamesArrayArg, 10785 MappersArrayArg}; 10786 CGF.EmitRuntimeCall( 10787 OMPBuilder.getOrCreateRuntimeFunction( 10788 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10789 OffloadingArgs); 10790 10791 // If device pointer privatization is required, emit the body of the region 10792 // here. It will have to be duplicated: with and without privatization. 10793 if (!Info.CaptureDeviceAddrMap.empty()) 10794 CodeGen(CGF); 10795 }; 10796 10797 // Generate code for the closing of the data region. 10798 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10799 PrePostActionTy &) { 10800 assert(Info.isValid() && "Invalid data environment closing arguments."); 10801 10802 llvm::Value *BasePointersArrayArg = nullptr; 10803 llvm::Value *PointersArrayArg = nullptr; 10804 llvm::Value *SizesArrayArg = nullptr; 10805 llvm::Value *MapTypesArrayArg = nullptr; 10806 llvm::Value *MapNamesArrayArg = nullptr; 10807 llvm::Value *MappersArrayArg = nullptr; 10808 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10809 SizesArrayArg, MapTypesArrayArg, 10810 MapNamesArrayArg, MappersArrayArg, Info, 10811 {/*ForEndCall=*/true}); 10812 10813 // Emit device ID if any. 10814 llvm::Value *DeviceID = nullptr; 10815 if (Device) { 10816 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10817 CGF.Int64Ty, /*isSigned=*/true); 10818 } else { 10819 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10820 } 10821 10822 // Emit the number of elements in the offloading arrays. 10823 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10824 10825 // Source location for the ident struct 10826 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10827 10828 llvm::Value *OffloadingArgs[] = {RTLoc, 10829 DeviceID, 10830 PointerNum, 10831 BasePointersArrayArg, 10832 PointersArrayArg, 10833 SizesArrayArg, 10834 MapTypesArrayArg, 10835 MapNamesArrayArg, 10836 MappersArrayArg}; 10837 CGF.EmitRuntimeCall( 10838 OMPBuilder.getOrCreateRuntimeFunction( 10839 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10840 OffloadingArgs); 10841 }; 10842 10843 // If we need device pointer privatization, we need to emit the body of the 10844 // region with no privatization in the 'else' branch of the conditional. 10845 // Otherwise, we don't have to do anything. 10846 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10847 PrePostActionTy &) { 10848 if (!Info.CaptureDeviceAddrMap.empty()) { 10849 CodeGen.setAction(NoPrivAction); 10850 CodeGen(CGF); 10851 } 10852 }; 10853 10854 // We don't have to do anything to close the region if the if clause evaluates 10855 // to false. 10856 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10857 10858 if (IfCond) { 10859 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10860 } else { 10861 RegionCodeGenTy RCG(BeginThenGen); 10862 RCG(CGF); 10863 } 10864 10865 // If we don't require privatization of device pointers, we emit the body in 10866 // between the runtime calls. This avoids duplicating the body code. 10867 if (Info.CaptureDeviceAddrMap.empty()) { 10868 CodeGen.setAction(NoPrivAction); 10869 CodeGen(CGF); 10870 } 10871 10872 if (IfCond) { 10873 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10874 } else { 10875 RegionCodeGenTy RCG(EndThenGen); 10876 RCG(CGF); 10877 } 10878 } 10879 10880 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10881 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10882 const Expr *Device) { 10883 if (!CGF.HaveInsertPoint()) 10884 return; 10885 10886 assert((isa<OMPTargetEnterDataDirective>(D) || 10887 isa<OMPTargetExitDataDirective>(D) || 10888 isa<OMPTargetUpdateDirective>(D)) && 10889 "Expecting either target enter, exit data, or update directives."); 10890 10891 CodeGenFunction::OMPTargetDataInfo InputInfo; 10892 llvm::Value *MapTypesArray = nullptr; 10893 llvm::Value *MapNamesArray = nullptr; 10894 // Generate the code for the opening of the data environment. 10895 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10896 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10897 // Emit device ID if any. 10898 llvm::Value *DeviceID = nullptr; 10899 if (Device) { 10900 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10901 CGF.Int64Ty, /*isSigned=*/true); 10902 } else { 10903 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10904 } 10905 10906 // Emit the number of elements in the offloading arrays. 10907 llvm::Constant *PointerNum = 10908 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10909 10910 // Source location for the ident struct 10911 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10912 10913 llvm::Value *OffloadingArgs[] = {RTLoc, 10914 DeviceID, 10915 PointerNum, 10916 InputInfo.BasePointersArray.getPointer(), 10917 InputInfo.PointersArray.getPointer(), 10918 InputInfo.SizesArray.getPointer(), 10919 MapTypesArray, 10920 MapNamesArray, 10921 InputInfo.MappersArray.getPointer()}; 10922 10923 // Select the right runtime function call for each standalone 10924 // directive. 10925 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10926 RuntimeFunction RTLFn; 10927 switch (D.getDirectiveKind()) { 10928 case OMPD_target_enter_data: 10929 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10930 : OMPRTL___tgt_target_data_begin_mapper; 10931 break; 10932 case OMPD_target_exit_data: 10933 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10934 : OMPRTL___tgt_target_data_end_mapper; 10935 break; 10936 case OMPD_target_update: 10937 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10938 : OMPRTL___tgt_target_data_update_mapper; 10939 break; 10940 case OMPD_parallel: 10941 case OMPD_for: 10942 case OMPD_parallel_for: 10943 case OMPD_parallel_master: 10944 case OMPD_parallel_sections: 10945 case OMPD_for_simd: 10946 case OMPD_parallel_for_simd: 10947 case OMPD_cancel: 10948 case OMPD_cancellation_point: 10949 case OMPD_ordered: 10950 case OMPD_threadprivate: 10951 case OMPD_allocate: 10952 case OMPD_task: 10953 case OMPD_simd: 10954 case OMPD_sections: 10955 case OMPD_section: 10956 case OMPD_single: 10957 case OMPD_master: 10958 case OMPD_critical: 10959 case OMPD_taskyield: 10960 case OMPD_barrier: 10961 case OMPD_taskwait: 10962 case OMPD_taskgroup: 10963 case OMPD_atomic: 10964 case OMPD_flush: 10965 case OMPD_depobj: 10966 case OMPD_scan: 10967 case OMPD_teams: 10968 case OMPD_target_data: 10969 case OMPD_distribute: 10970 case OMPD_distribute_simd: 10971 case OMPD_distribute_parallel_for: 10972 case OMPD_distribute_parallel_for_simd: 10973 case OMPD_teams_distribute: 10974 case OMPD_teams_distribute_simd: 10975 case OMPD_teams_distribute_parallel_for: 10976 case OMPD_teams_distribute_parallel_for_simd: 10977 case OMPD_declare_simd: 10978 case OMPD_declare_variant: 10979 case OMPD_begin_declare_variant: 10980 case OMPD_end_declare_variant: 10981 case OMPD_declare_target: 10982 case OMPD_end_declare_target: 10983 case OMPD_declare_reduction: 10984 case OMPD_declare_mapper: 10985 case OMPD_taskloop: 10986 case OMPD_taskloop_simd: 10987 case OMPD_master_taskloop: 10988 case OMPD_master_taskloop_simd: 10989 case OMPD_parallel_master_taskloop: 10990 case OMPD_parallel_master_taskloop_simd: 10991 case OMPD_target: 10992 case OMPD_target_simd: 10993 case OMPD_target_teams_distribute: 10994 case OMPD_target_teams_distribute_simd: 10995 case OMPD_target_teams_distribute_parallel_for: 10996 case OMPD_target_teams_distribute_parallel_for_simd: 10997 case OMPD_target_teams: 10998 case OMPD_target_parallel: 10999 case OMPD_target_parallel_for: 11000 case OMPD_target_parallel_for_simd: 11001 case OMPD_requires: 11002 case OMPD_unknown: 11003 default: 11004 llvm_unreachable("Unexpected standalone target data directive."); 11005 break; 11006 } 11007 CGF.EmitRuntimeCall( 11008 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11009 OffloadingArgs); 11010 }; 11011 11012 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11013 &MapNamesArray](CodeGenFunction &CGF, 11014 PrePostActionTy &) { 11015 // Fill up the arrays with all the mapped variables. 11016 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11017 11018 // Get map clause information. 11019 MappableExprsHandler MEHandler(D, CGF); 11020 MEHandler.generateAllInfo(CombinedInfo); 11021 11022 TargetDataInfo Info; 11023 // Fill up the arrays and create the arguments. 11024 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11025 /*IsNonContiguous=*/true); 11026 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11027 D.hasClausesOfKind<OMPNowaitClause>(); 11028 emitOffloadingArraysArgument( 11029 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11030 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11031 {/*ForEndTask=*/false}); 11032 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11033 InputInfo.BasePointersArray = 11034 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11035 InputInfo.PointersArray = 11036 Address(Info.PointersArray, CGM.getPointerAlign()); 11037 InputInfo.SizesArray = 11038 Address(Info.SizesArray, CGM.getPointerAlign()); 11039 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11040 MapTypesArray = Info.MapTypesArray; 11041 MapNamesArray = Info.MapNamesArray; 11042 if (RequiresOuterTask) 11043 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11044 else 11045 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11046 }; 11047 11048 if (IfCond) { 11049 emitIfClause(CGF, IfCond, TargetThenGen, 11050 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11051 } else { 11052 RegionCodeGenTy ThenRCG(TargetThenGen); 11053 ThenRCG(CGF); 11054 } 11055 } 11056 11057 namespace { 11058 /// Kind of parameter in a function with 'declare simd' directive. 11059 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11060 /// Attribute set of the parameter. 11061 struct ParamAttrTy { 11062 ParamKindTy Kind = Vector; 11063 llvm::APSInt StrideOrArg; 11064 llvm::APSInt Alignment; 11065 }; 11066 } // namespace 11067 11068 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11069 ArrayRef<ParamAttrTy> ParamAttrs) { 11070 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11071 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11072 // of that clause. The VLEN value must be power of 2. 11073 // In other case the notion of the function`s "characteristic data type" (CDT) 11074 // is used to compute the vector length. 11075 // CDT is defined in the following order: 11076 // a) For non-void function, the CDT is the return type. 11077 // b) If the function has any non-uniform, non-linear parameters, then the 11078 // CDT is the type of the first such parameter. 11079 // c) If the CDT determined by a) or b) above is struct, union, or class 11080 // type which is pass-by-value (except for the type that maps to the 11081 // built-in complex data type), the characteristic data type is int. 11082 // d) If none of the above three cases is applicable, the CDT is int. 11083 // The VLEN is then determined based on the CDT and the size of vector 11084 // register of that ISA for which current vector version is generated. The 11085 // VLEN is computed using the formula below: 11086 // VLEN = sizeof(vector_register) / sizeof(CDT), 11087 // where vector register size specified in section 3.2.1 Registers and the 11088 // Stack Frame of original AMD64 ABI document. 11089 QualType RetType = FD->getReturnType(); 11090 if (RetType.isNull()) 11091 return 0; 11092 ASTContext &C = FD->getASTContext(); 11093 QualType CDT; 11094 if (!RetType.isNull() && !RetType->isVoidType()) { 11095 CDT = RetType; 11096 } else { 11097 unsigned Offset = 0; 11098 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11099 if (ParamAttrs[Offset].Kind == Vector) 11100 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11101 ++Offset; 11102 } 11103 if (CDT.isNull()) { 11104 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11105 if (ParamAttrs[I + Offset].Kind == Vector) { 11106 CDT = FD->getParamDecl(I)->getType(); 11107 break; 11108 } 11109 } 11110 } 11111 } 11112 if (CDT.isNull()) 11113 CDT = C.IntTy; 11114 CDT = CDT->getCanonicalTypeUnqualified(); 11115 if (CDT->isRecordType() || CDT->isUnionType()) 11116 CDT = C.IntTy; 11117 return C.getTypeSize(CDT); 11118 } 11119 11120 static void 11121 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11122 const llvm::APSInt &VLENVal, 11123 ArrayRef<ParamAttrTy> ParamAttrs, 11124 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11125 struct ISADataTy { 11126 char ISA; 11127 unsigned VecRegSize; 11128 }; 11129 ISADataTy ISAData[] = { 11130 { 11131 'b', 128 11132 }, // SSE 11133 { 11134 'c', 256 11135 }, // AVX 11136 { 11137 'd', 256 11138 }, // AVX2 11139 { 11140 'e', 512 11141 }, // AVX512 11142 }; 11143 llvm::SmallVector<char, 2> Masked; 11144 switch (State) { 11145 case OMPDeclareSimdDeclAttr::BS_Undefined: 11146 Masked.push_back('N'); 11147 Masked.push_back('M'); 11148 break; 11149 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11150 Masked.push_back('N'); 11151 break; 11152 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11153 Masked.push_back('M'); 11154 break; 11155 } 11156 for (char Mask : Masked) { 11157 for (const ISADataTy &Data : ISAData) { 11158 SmallString<256> Buffer; 11159 llvm::raw_svector_ostream Out(Buffer); 11160 Out << "_ZGV" << Data.ISA << Mask; 11161 if (!VLENVal) { 11162 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11163 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11164 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11165 } else { 11166 Out << VLENVal; 11167 } 11168 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11169 switch (ParamAttr.Kind){ 11170 case LinearWithVarStride: 11171 Out << 's' << ParamAttr.StrideOrArg; 11172 break; 11173 case Linear: 11174 Out << 'l'; 11175 if (ParamAttr.StrideOrArg != 1) 11176 Out << ParamAttr.StrideOrArg; 11177 break; 11178 case Uniform: 11179 Out << 'u'; 11180 break; 11181 case Vector: 11182 Out << 'v'; 11183 break; 11184 } 11185 if (!!ParamAttr.Alignment) 11186 Out << 'a' << ParamAttr.Alignment; 11187 } 11188 Out << '_' << Fn->getName(); 11189 Fn->addFnAttr(Out.str()); 11190 } 11191 } 11192 } 11193 11194 // This are the Functions that are needed to mangle the name of the 11195 // vector functions generated by the compiler, according to the rules 11196 // defined in the "Vector Function ABI specifications for AArch64", 11197 // available at 11198 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11199 11200 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11201 /// 11202 /// TODO: Need to implement the behavior for reference marked with a 11203 /// var or no linear modifiers (1.b in the section). For this, we 11204 /// need to extend ParamKindTy to support the linear modifiers. 11205 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11206 QT = QT.getCanonicalType(); 11207 11208 if (QT->isVoidType()) 11209 return false; 11210 11211 if (Kind == ParamKindTy::Uniform) 11212 return false; 11213 11214 if (Kind == ParamKindTy::Linear) 11215 return false; 11216 11217 // TODO: Handle linear references with modifiers 11218 11219 if (Kind == ParamKindTy::LinearWithVarStride) 11220 return false; 11221 11222 return true; 11223 } 11224 11225 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11226 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11227 QT = QT.getCanonicalType(); 11228 unsigned Size = C.getTypeSize(QT); 11229 11230 // Only scalars and complex within 16 bytes wide set PVB to true. 11231 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11232 return false; 11233 11234 if (QT->isFloatingType()) 11235 return true; 11236 11237 if (QT->isIntegerType()) 11238 return true; 11239 11240 if (QT->isPointerType()) 11241 return true; 11242 11243 // TODO: Add support for complex types (section 3.1.2, item 2). 11244 11245 return false; 11246 } 11247 11248 /// Computes the lane size (LS) of a return type or of an input parameter, 11249 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11250 /// TODO: Add support for references, section 3.2.1, item 1. 11251 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11252 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11253 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11254 if (getAArch64PBV(PTy, C)) 11255 return C.getTypeSize(PTy); 11256 } 11257 if (getAArch64PBV(QT, C)) 11258 return C.getTypeSize(QT); 11259 11260 return C.getTypeSize(C.getUIntPtrType()); 11261 } 11262 11263 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11264 // signature of the scalar function, as defined in 3.2.2 of the 11265 // AAVFABI. 11266 static std::tuple<unsigned, unsigned, bool> 11267 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11268 QualType RetType = FD->getReturnType().getCanonicalType(); 11269 11270 ASTContext &C = FD->getASTContext(); 11271 11272 bool OutputBecomesInput = false; 11273 11274 llvm::SmallVector<unsigned, 8> Sizes; 11275 if (!RetType->isVoidType()) { 11276 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11277 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11278 OutputBecomesInput = true; 11279 } 11280 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11281 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11282 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11283 } 11284 11285 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11286 // The LS of a function parameter / return value can only be a power 11287 // of 2, starting from 8 bits, up to 128. 11288 assert(std::all_of(Sizes.begin(), Sizes.end(), 11289 [](unsigned Size) { 11290 return Size == 8 || Size == 16 || Size == 32 || 11291 Size == 64 || Size == 128; 11292 }) && 11293 "Invalid size"); 11294 11295 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11296 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11297 OutputBecomesInput); 11298 } 11299 11300 /// Mangle the parameter part of the vector function name according to 11301 /// their OpenMP classification. The mangling function is defined in 11302 /// section 3.5 of the AAVFABI. 11303 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11304 SmallString<256> Buffer; 11305 llvm::raw_svector_ostream Out(Buffer); 11306 for (const auto &ParamAttr : ParamAttrs) { 11307 switch (ParamAttr.Kind) { 11308 case LinearWithVarStride: 11309 Out << "ls" << ParamAttr.StrideOrArg; 11310 break; 11311 case Linear: 11312 Out << 'l'; 11313 // Don't print the step value if it is not present or if it is 11314 // equal to 1. 11315 if (ParamAttr.StrideOrArg != 1) 11316 Out << ParamAttr.StrideOrArg; 11317 break; 11318 case Uniform: 11319 Out << 'u'; 11320 break; 11321 case Vector: 11322 Out << 'v'; 11323 break; 11324 } 11325 11326 if (!!ParamAttr.Alignment) 11327 Out << 'a' << ParamAttr.Alignment; 11328 } 11329 11330 return std::string(Out.str()); 11331 } 11332 11333 // Function used to add the attribute. The parameter `VLEN` is 11334 // templated to allow the use of "x" when targeting scalable functions 11335 // for SVE. 11336 template <typename T> 11337 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11338 char ISA, StringRef ParSeq, 11339 StringRef MangledName, bool OutputBecomesInput, 11340 llvm::Function *Fn) { 11341 SmallString<256> Buffer; 11342 llvm::raw_svector_ostream Out(Buffer); 11343 Out << Prefix << ISA << LMask << VLEN; 11344 if (OutputBecomesInput) 11345 Out << "v"; 11346 Out << ParSeq << "_" << MangledName; 11347 Fn->addFnAttr(Out.str()); 11348 } 11349 11350 // Helper function to generate the Advanced SIMD names depending on 11351 // the value of the NDS when simdlen is not present. 11352 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11353 StringRef Prefix, char ISA, 11354 StringRef ParSeq, StringRef MangledName, 11355 bool OutputBecomesInput, 11356 llvm::Function *Fn) { 11357 switch (NDS) { 11358 case 8: 11359 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11360 OutputBecomesInput, Fn); 11361 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11362 OutputBecomesInput, Fn); 11363 break; 11364 case 16: 11365 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11366 OutputBecomesInput, Fn); 11367 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11368 OutputBecomesInput, Fn); 11369 break; 11370 case 32: 11371 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11372 OutputBecomesInput, Fn); 11373 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11374 OutputBecomesInput, Fn); 11375 break; 11376 case 64: 11377 case 128: 11378 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11379 OutputBecomesInput, Fn); 11380 break; 11381 default: 11382 llvm_unreachable("Scalar type is too wide."); 11383 } 11384 } 11385 11386 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11387 static void emitAArch64DeclareSimdFunction( 11388 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11389 ArrayRef<ParamAttrTy> ParamAttrs, 11390 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11391 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11392 11393 // Get basic data for building the vector signature. 11394 const auto Data = getNDSWDS(FD, ParamAttrs); 11395 const unsigned NDS = std::get<0>(Data); 11396 const unsigned WDS = std::get<1>(Data); 11397 const bool OutputBecomesInput = std::get<2>(Data); 11398 11399 // Check the values provided via `simdlen` by the user. 11400 // 1. A `simdlen(1)` doesn't produce vector signatures, 11401 if (UserVLEN == 1) { 11402 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11403 DiagnosticsEngine::Warning, 11404 "The clause simdlen(1) has no effect when targeting aarch64."); 11405 CGM.getDiags().Report(SLoc, DiagID); 11406 return; 11407 } 11408 11409 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11410 // Advanced SIMD output. 11411 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11412 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11413 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11414 "power of 2 when targeting Advanced SIMD."); 11415 CGM.getDiags().Report(SLoc, DiagID); 11416 return; 11417 } 11418 11419 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11420 // limits. 11421 if (ISA == 's' && UserVLEN != 0) { 11422 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11423 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11424 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11425 "lanes in the architectural constraints " 11426 "for SVE (min is 128-bit, max is " 11427 "2048-bit, by steps of 128-bit)"); 11428 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11429 return; 11430 } 11431 } 11432 11433 // Sort out parameter sequence. 11434 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11435 StringRef Prefix = "_ZGV"; 11436 // Generate simdlen from user input (if any). 11437 if (UserVLEN) { 11438 if (ISA == 's') { 11439 // SVE generates only a masked function. 11440 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11441 OutputBecomesInput, Fn); 11442 } else { 11443 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11444 // Advanced SIMD generates one or two functions, depending on 11445 // the `[not]inbranch` clause. 11446 switch (State) { 11447 case OMPDeclareSimdDeclAttr::BS_Undefined: 11448 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11449 OutputBecomesInput, Fn); 11450 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11451 OutputBecomesInput, Fn); 11452 break; 11453 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11454 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11455 OutputBecomesInput, Fn); 11456 break; 11457 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11458 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11459 OutputBecomesInput, Fn); 11460 break; 11461 } 11462 } 11463 } else { 11464 // If no user simdlen is provided, follow the AAVFABI rules for 11465 // generating the vector length. 11466 if (ISA == 's') { 11467 // SVE, section 3.4.1, item 1. 11468 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11469 OutputBecomesInput, Fn); 11470 } else { 11471 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11472 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11473 // two vector names depending on the use of the clause 11474 // `[not]inbranch`. 11475 switch (State) { 11476 case OMPDeclareSimdDeclAttr::BS_Undefined: 11477 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11478 OutputBecomesInput, Fn); 11479 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11480 OutputBecomesInput, Fn); 11481 break; 11482 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11483 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11484 OutputBecomesInput, Fn); 11485 break; 11486 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11487 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11488 OutputBecomesInput, Fn); 11489 break; 11490 } 11491 } 11492 } 11493 } 11494 11495 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11496 llvm::Function *Fn) { 11497 ASTContext &C = CGM.getContext(); 11498 FD = FD->getMostRecentDecl(); 11499 // Map params to their positions in function decl. 11500 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11501 if (isa<CXXMethodDecl>(FD)) 11502 ParamPositions.try_emplace(FD, 0); 11503 unsigned ParamPos = ParamPositions.size(); 11504 for (const ParmVarDecl *P : FD->parameters()) { 11505 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11506 ++ParamPos; 11507 } 11508 while (FD) { 11509 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11510 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11511 // Mark uniform parameters. 11512 for (const Expr *E : Attr->uniforms()) { 11513 E = E->IgnoreParenImpCasts(); 11514 unsigned Pos; 11515 if (isa<CXXThisExpr>(E)) { 11516 Pos = ParamPositions[FD]; 11517 } else { 11518 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11519 ->getCanonicalDecl(); 11520 Pos = ParamPositions[PVD]; 11521 } 11522 ParamAttrs[Pos].Kind = Uniform; 11523 } 11524 // Get alignment info. 11525 auto NI = Attr->alignments_begin(); 11526 for (const Expr *E : Attr->aligneds()) { 11527 E = E->IgnoreParenImpCasts(); 11528 unsigned Pos; 11529 QualType ParmTy; 11530 if (isa<CXXThisExpr>(E)) { 11531 Pos = ParamPositions[FD]; 11532 ParmTy = E->getType(); 11533 } else { 11534 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11535 ->getCanonicalDecl(); 11536 Pos = ParamPositions[PVD]; 11537 ParmTy = PVD->getType(); 11538 } 11539 ParamAttrs[Pos].Alignment = 11540 (*NI) 11541 ? (*NI)->EvaluateKnownConstInt(C) 11542 : llvm::APSInt::getUnsigned( 11543 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11544 .getQuantity()); 11545 ++NI; 11546 } 11547 // Mark linear parameters. 11548 auto SI = Attr->steps_begin(); 11549 auto MI = Attr->modifiers_begin(); 11550 for (const Expr *E : Attr->linears()) { 11551 E = E->IgnoreParenImpCasts(); 11552 unsigned Pos; 11553 // Rescaling factor needed to compute the linear parameter 11554 // value in the mangled name. 11555 unsigned PtrRescalingFactor = 1; 11556 if (isa<CXXThisExpr>(E)) { 11557 Pos = ParamPositions[FD]; 11558 } else { 11559 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11560 ->getCanonicalDecl(); 11561 Pos = ParamPositions[PVD]; 11562 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11563 PtrRescalingFactor = CGM.getContext() 11564 .getTypeSizeInChars(P->getPointeeType()) 11565 .getQuantity(); 11566 } 11567 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11568 ParamAttr.Kind = Linear; 11569 // Assuming a stride of 1, for `linear` without modifiers. 11570 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11571 if (*SI) { 11572 Expr::EvalResult Result; 11573 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11574 if (const auto *DRE = 11575 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11576 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11577 ParamAttr.Kind = LinearWithVarStride; 11578 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11579 ParamPositions[StridePVD->getCanonicalDecl()]); 11580 } 11581 } 11582 } else { 11583 ParamAttr.StrideOrArg = Result.Val.getInt(); 11584 } 11585 } 11586 // If we are using a linear clause on a pointer, we need to 11587 // rescale the value of linear_step with the byte size of the 11588 // pointee type. 11589 if (Linear == ParamAttr.Kind) 11590 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11591 ++SI; 11592 ++MI; 11593 } 11594 llvm::APSInt VLENVal; 11595 SourceLocation ExprLoc; 11596 const Expr *VLENExpr = Attr->getSimdlen(); 11597 if (VLENExpr) { 11598 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11599 ExprLoc = VLENExpr->getExprLoc(); 11600 } 11601 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11602 if (CGM.getTriple().isX86()) { 11603 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11604 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11605 unsigned VLEN = VLENVal.getExtValue(); 11606 StringRef MangledName = Fn->getName(); 11607 if (CGM.getTarget().hasFeature("sve")) 11608 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11609 MangledName, 's', 128, Fn, ExprLoc); 11610 if (CGM.getTarget().hasFeature("neon")) 11611 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11612 MangledName, 'n', 128, Fn, ExprLoc); 11613 } 11614 } 11615 FD = FD->getPreviousDecl(); 11616 } 11617 } 11618 11619 namespace { 11620 /// Cleanup action for doacross support. 11621 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11622 public: 11623 static const int DoacrossFinArgs = 2; 11624 11625 private: 11626 llvm::FunctionCallee RTLFn; 11627 llvm::Value *Args[DoacrossFinArgs]; 11628 11629 public: 11630 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11631 ArrayRef<llvm::Value *> CallArgs) 11632 : RTLFn(RTLFn) { 11633 assert(CallArgs.size() == DoacrossFinArgs); 11634 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11635 } 11636 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11637 if (!CGF.HaveInsertPoint()) 11638 return; 11639 CGF.EmitRuntimeCall(RTLFn, Args); 11640 } 11641 }; 11642 } // namespace 11643 11644 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11645 const OMPLoopDirective &D, 11646 ArrayRef<Expr *> NumIterations) { 11647 if (!CGF.HaveInsertPoint()) 11648 return; 11649 11650 ASTContext &C = CGM.getContext(); 11651 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11652 RecordDecl *RD; 11653 if (KmpDimTy.isNull()) { 11654 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11655 // kmp_int64 lo; // lower 11656 // kmp_int64 up; // upper 11657 // kmp_int64 st; // stride 11658 // }; 11659 RD = C.buildImplicitRecord("kmp_dim"); 11660 RD->startDefinition(); 11661 addFieldToRecordDecl(C, RD, Int64Ty); 11662 addFieldToRecordDecl(C, RD, Int64Ty); 11663 addFieldToRecordDecl(C, RD, Int64Ty); 11664 RD->completeDefinition(); 11665 KmpDimTy = C.getRecordType(RD); 11666 } else { 11667 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11668 } 11669 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11670 QualType ArrayTy = 11671 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11672 11673 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11674 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11675 enum { LowerFD = 0, UpperFD, StrideFD }; 11676 // Fill dims with data. 11677 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11678 LValue DimsLVal = CGF.MakeAddrLValue( 11679 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11680 // dims.upper = num_iterations; 11681 LValue UpperLVal = CGF.EmitLValueForField( 11682 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11683 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11684 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11685 Int64Ty, NumIterations[I]->getExprLoc()); 11686 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11687 // dims.stride = 1; 11688 LValue StrideLVal = CGF.EmitLValueForField( 11689 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11690 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11691 StrideLVal); 11692 } 11693 11694 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11695 // kmp_int32 num_dims, struct kmp_dim * dims); 11696 llvm::Value *Args[] = { 11697 emitUpdateLocation(CGF, D.getBeginLoc()), 11698 getThreadID(CGF, D.getBeginLoc()), 11699 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11700 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11701 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11702 CGM.VoidPtrTy)}; 11703 11704 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11705 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11706 CGF.EmitRuntimeCall(RTLFn, Args); 11707 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11708 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11709 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11710 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11711 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11712 llvm::makeArrayRef(FiniArgs)); 11713 } 11714 11715 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11716 const OMPDependClause *C) { 11717 QualType Int64Ty = 11718 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11719 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11720 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11721 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11722 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11723 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11724 const Expr *CounterVal = C->getLoopData(I); 11725 assert(CounterVal); 11726 llvm::Value *CntVal = CGF.EmitScalarConversion( 11727 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11728 CounterVal->getExprLoc()); 11729 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11730 /*Volatile=*/false, Int64Ty); 11731 } 11732 llvm::Value *Args[] = { 11733 emitUpdateLocation(CGF, C->getBeginLoc()), 11734 getThreadID(CGF, C->getBeginLoc()), 11735 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11736 llvm::FunctionCallee RTLFn; 11737 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11738 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11739 OMPRTL___kmpc_doacross_post); 11740 } else { 11741 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11742 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11743 OMPRTL___kmpc_doacross_wait); 11744 } 11745 CGF.EmitRuntimeCall(RTLFn, Args); 11746 } 11747 11748 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11749 llvm::FunctionCallee Callee, 11750 ArrayRef<llvm::Value *> Args) const { 11751 assert(Loc.isValid() && "Outlined function call location must be valid."); 11752 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11753 11754 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11755 if (Fn->doesNotThrow()) { 11756 CGF.EmitNounwindRuntimeCall(Fn, Args); 11757 return; 11758 } 11759 } 11760 CGF.EmitRuntimeCall(Callee, Args); 11761 } 11762 11763 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11764 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11765 ArrayRef<llvm::Value *> Args) const { 11766 emitCall(CGF, Loc, OutlinedFn, Args); 11767 } 11768 11769 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11770 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11771 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11772 HasEmittedDeclareTargetRegion = true; 11773 } 11774 11775 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11776 const VarDecl *NativeParam, 11777 const VarDecl *TargetParam) const { 11778 return CGF.GetAddrOfLocalVar(NativeParam); 11779 } 11780 11781 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11782 const VarDecl *VD) { 11783 if (!VD) 11784 return Address::invalid(); 11785 Address UntiedAddr = Address::invalid(); 11786 Address UntiedRealAddr = Address::invalid(); 11787 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11788 if (It != FunctionToUntiedTaskStackMap.end()) { 11789 const UntiedLocalVarsAddressesMap &UntiedData = 11790 UntiedLocalVarsStack[It->second]; 11791 auto I = UntiedData.find(VD); 11792 if (I != UntiedData.end()) { 11793 UntiedAddr = I->second.first; 11794 UntiedRealAddr = I->second.second; 11795 } 11796 } 11797 const VarDecl *CVD = VD->getCanonicalDecl(); 11798 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11799 // Use the default allocation. 11800 if (!isAllocatableDecl(VD)) 11801 return UntiedAddr; 11802 llvm::Value *Size; 11803 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11804 if (CVD->getType()->isVariablyModifiedType()) { 11805 Size = CGF.getTypeSize(CVD->getType()); 11806 // Align the size: ((size + align - 1) / align) * align 11807 Size = CGF.Builder.CreateNUWAdd( 11808 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11809 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11810 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11811 } else { 11812 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11813 Size = CGM.getSize(Sz.alignTo(Align)); 11814 } 11815 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11816 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11817 assert(AA->getAllocator() && 11818 "Expected allocator expression for non-default allocator."); 11819 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11820 // According to the standard, the original allocator type is a enum 11821 // (integer). Convert to pointer type, if required. 11822 Allocator = CGF.EmitScalarConversion( 11823 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11824 AA->getAllocator()->getExprLoc()); 11825 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11826 11827 llvm::Value *Addr = 11828 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11829 CGM.getModule(), OMPRTL___kmpc_alloc), 11830 Args, getName({CVD->getName(), ".void.addr"})); 11831 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11832 CGM.getModule(), OMPRTL___kmpc_free); 11833 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11834 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11835 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11836 if (UntiedAddr.isValid()) 11837 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11838 11839 // Cleanup action for allocate support. 11840 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11841 llvm::FunctionCallee RTLFn; 11842 unsigned LocEncoding; 11843 Address Addr; 11844 const Expr *Allocator; 11845 11846 public: 11847 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11848 Address Addr, const Expr *Allocator) 11849 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11850 Allocator(Allocator) {} 11851 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11852 if (!CGF.HaveInsertPoint()) 11853 return; 11854 llvm::Value *Args[3]; 11855 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11856 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11857 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11858 Addr.getPointer(), CGF.VoidPtrTy); 11859 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11860 // According to the standard, the original allocator type is a enum 11861 // (integer). Convert to pointer type, if required. 11862 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11863 CGF.getContext().VoidPtrTy, 11864 Allocator->getExprLoc()); 11865 Args[2] = AllocVal; 11866 11867 CGF.EmitRuntimeCall(RTLFn, Args); 11868 } 11869 }; 11870 Address VDAddr = 11871 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11872 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11873 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11874 VDAddr, AA->getAllocator()); 11875 if (UntiedRealAddr.isValid()) 11876 if (auto *Region = 11877 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11878 Region->emitUntiedSwitch(CGF); 11879 return VDAddr; 11880 } 11881 return UntiedAddr; 11882 } 11883 11884 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11885 const VarDecl *VD) const { 11886 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11887 if (It == FunctionToUntiedTaskStackMap.end()) 11888 return false; 11889 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11890 } 11891 11892 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11893 CodeGenModule &CGM, const OMPLoopDirective &S) 11894 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11895 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11896 if (!NeedToPush) 11897 return; 11898 NontemporalDeclsSet &DS = 11899 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11900 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11901 for (const Stmt *Ref : C->private_refs()) { 11902 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11903 const ValueDecl *VD; 11904 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11905 VD = DRE->getDecl(); 11906 } else { 11907 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11908 assert((ME->isImplicitCXXThis() || 11909 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11910 "Expected member of current class."); 11911 VD = ME->getMemberDecl(); 11912 } 11913 DS.insert(VD); 11914 } 11915 } 11916 } 11917 11918 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11919 if (!NeedToPush) 11920 return; 11921 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11922 } 11923 11924 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11925 CodeGenFunction &CGF, 11926 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 11927 std::pair<Address, Address>> &LocalVars) 11928 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11929 if (!NeedToPush) 11930 return; 11931 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11932 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11933 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11934 } 11935 11936 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11937 if (!NeedToPush) 11938 return; 11939 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11940 } 11941 11942 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11943 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11944 11945 return llvm::any_of( 11946 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11947 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11948 } 11949 11950 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11951 const OMPExecutableDirective &S, 11952 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11953 const { 11954 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11955 // Vars in target/task regions must be excluded completely. 11956 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11957 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11958 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11959 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11960 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11961 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11962 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11963 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11964 } 11965 } 11966 // Exclude vars in private clauses. 11967 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11968 for (const Expr *Ref : C->varlists()) { 11969 if (!Ref->getType()->isScalarType()) 11970 continue; 11971 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11972 if (!DRE) 11973 continue; 11974 NeedToCheckForLPCs.insert(DRE->getDecl()); 11975 } 11976 } 11977 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11978 for (const Expr *Ref : C->varlists()) { 11979 if (!Ref->getType()->isScalarType()) 11980 continue; 11981 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11982 if (!DRE) 11983 continue; 11984 NeedToCheckForLPCs.insert(DRE->getDecl()); 11985 } 11986 } 11987 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11988 for (const Expr *Ref : C->varlists()) { 11989 if (!Ref->getType()->isScalarType()) 11990 continue; 11991 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11992 if (!DRE) 11993 continue; 11994 NeedToCheckForLPCs.insert(DRE->getDecl()); 11995 } 11996 } 11997 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11998 for (const Expr *Ref : C->varlists()) { 11999 if (!Ref->getType()->isScalarType()) 12000 continue; 12001 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12002 if (!DRE) 12003 continue; 12004 NeedToCheckForLPCs.insert(DRE->getDecl()); 12005 } 12006 } 12007 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12008 for (const Expr *Ref : C->varlists()) { 12009 if (!Ref->getType()->isScalarType()) 12010 continue; 12011 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12012 if (!DRE) 12013 continue; 12014 NeedToCheckForLPCs.insert(DRE->getDecl()); 12015 } 12016 } 12017 for (const Decl *VD : NeedToCheckForLPCs) { 12018 for (const LastprivateConditionalData &Data : 12019 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12020 if (Data.DeclToUniqueName.count(VD) > 0) { 12021 if (!Data.Disabled) 12022 NeedToAddForLPCsAsDisabled.insert(VD); 12023 break; 12024 } 12025 } 12026 } 12027 } 12028 12029 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12030 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12031 : CGM(CGF.CGM), 12032 Action((CGM.getLangOpts().OpenMP >= 50 && 12033 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12034 [](const OMPLastprivateClause *C) { 12035 return C->getKind() == 12036 OMPC_LASTPRIVATE_conditional; 12037 })) 12038 ? ActionToDo::PushAsLastprivateConditional 12039 : ActionToDo::DoNotPush) { 12040 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12041 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12042 return; 12043 assert(Action == ActionToDo::PushAsLastprivateConditional && 12044 "Expected a push action."); 12045 LastprivateConditionalData &Data = 12046 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12047 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12048 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12049 continue; 12050 12051 for (const Expr *Ref : C->varlists()) { 12052 Data.DeclToUniqueName.insert(std::make_pair( 12053 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12054 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12055 } 12056 } 12057 Data.IVLVal = IVLVal; 12058 Data.Fn = CGF.CurFn; 12059 } 12060 12061 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12062 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12063 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12064 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12065 if (CGM.getLangOpts().OpenMP < 50) 12066 return; 12067 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12068 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12069 if (!NeedToAddForLPCsAsDisabled.empty()) { 12070 Action = ActionToDo::DisableLastprivateConditional; 12071 LastprivateConditionalData &Data = 12072 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12073 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12074 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12075 Data.Fn = CGF.CurFn; 12076 Data.Disabled = true; 12077 } 12078 } 12079 12080 CGOpenMPRuntime::LastprivateConditionalRAII 12081 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12082 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12083 return LastprivateConditionalRAII(CGF, S); 12084 } 12085 12086 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12087 if (CGM.getLangOpts().OpenMP < 50) 12088 return; 12089 if (Action == ActionToDo::DisableLastprivateConditional) { 12090 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12091 "Expected list of disabled private vars."); 12092 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12093 } 12094 if (Action == ActionToDo::PushAsLastprivateConditional) { 12095 assert( 12096 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12097 "Expected list of lastprivate conditional vars."); 12098 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12099 } 12100 } 12101 12102 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12103 const VarDecl *VD) { 12104 ASTContext &C = CGM.getContext(); 12105 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12106 if (I == LastprivateConditionalToTypes.end()) 12107 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12108 QualType NewType; 12109 const FieldDecl *VDField; 12110 const FieldDecl *FiredField; 12111 LValue BaseLVal; 12112 auto VI = I->getSecond().find(VD); 12113 if (VI == I->getSecond().end()) { 12114 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12115 RD->startDefinition(); 12116 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12117 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12118 RD->completeDefinition(); 12119 NewType = C.getRecordType(RD); 12120 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12121 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12122 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12123 } else { 12124 NewType = std::get<0>(VI->getSecond()); 12125 VDField = std::get<1>(VI->getSecond()); 12126 FiredField = std::get<2>(VI->getSecond()); 12127 BaseLVal = std::get<3>(VI->getSecond()); 12128 } 12129 LValue FiredLVal = 12130 CGF.EmitLValueForField(BaseLVal, FiredField); 12131 CGF.EmitStoreOfScalar( 12132 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12133 FiredLVal); 12134 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12135 } 12136 12137 namespace { 12138 /// Checks if the lastprivate conditional variable is referenced in LHS. 12139 class LastprivateConditionalRefChecker final 12140 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12141 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12142 const Expr *FoundE = nullptr; 12143 const Decl *FoundD = nullptr; 12144 StringRef UniqueDeclName; 12145 LValue IVLVal; 12146 llvm::Function *FoundFn = nullptr; 12147 SourceLocation Loc; 12148 12149 public: 12150 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12151 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12152 llvm::reverse(LPM)) { 12153 auto It = D.DeclToUniqueName.find(E->getDecl()); 12154 if (It == D.DeclToUniqueName.end()) 12155 continue; 12156 if (D.Disabled) 12157 return false; 12158 FoundE = E; 12159 FoundD = E->getDecl()->getCanonicalDecl(); 12160 UniqueDeclName = It->second; 12161 IVLVal = D.IVLVal; 12162 FoundFn = D.Fn; 12163 break; 12164 } 12165 return FoundE == E; 12166 } 12167 bool VisitMemberExpr(const MemberExpr *E) { 12168 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12169 return false; 12170 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12171 llvm::reverse(LPM)) { 12172 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12173 if (It == D.DeclToUniqueName.end()) 12174 continue; 12175 if (D.Disabled) 12176 return false; 12177 FoundE = E; 12178 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12179 UniqueDeclName = It->second; 12180 IVLVal = D.IVLVal; 12181 FoundFn = D.Fn; 12182 break; 12183 } 12184 return FoundE == E; 12185 } 12186 bool VisitStmt(const Stmt *S) { 12187 for (const Stmt *Child : S->children()) { 12188 if (!Child) 12189 continue; 12190 if (const auto *E = dyn_cast<Expr>(Child)) 12191 if (!E->isGLValue()) 12192 continue; 12193 if (Visit(Child)) 12194 return true; 12195 } 12196 return false; 12197 } 12198 explicit LastprivateConditionalRefChecker( 12199 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12200 : LPM(LPM) {} 12201 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12202 getFoundData() const { 12203 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12204 } 12205 }; 12206 } // namespace 12207 12208 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12209 LValue IVLVal, 12210 StringRef UniqueDeclName, 12211 LValue LVal, 12212 SourceLocation Loc) { 12213 // Last updated loop counter for the lastprivate conditional var. 12214 // int<xx> last_iv = 0; 12215 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12216 llvm::Constant *LastIV = 12217 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12218 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12219 IVLVal.getAlignment().getAsAlign()); 12220 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12221 12222 // Last value of the lastprivate conditional. 12223 // decltype(priv_a) last_a; 12224 llvm::Constant *Last = getOrCreateInternalVariable( 12225 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12226 cast<llvm::GlobalVariable>(Last)->setAlignment( 12227 LVal.getAlignment().getAsAlign()); 12228 LValue LastLVal = 12229 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12230 12231 // Global loop counter. Required to handle inner parallel-for regions. 12232 // iv 12233 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12234 12235 // #pragma omp critical(a) 12236 // if (last_iv <= iv) { 12237 // last_iv = iv; 12238 // last_a = priv_a; 12239 // } 12240 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12241 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12242 Action.Enter(CGF); 12243 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12244 // (last_iv <= iv) ? Check if the variable is updated and store new 12245 // value in global var. 12246 llvm::Value *CmpRes; 12247 if (IVLVal.getType()->isSignedIntegerType()) { 12248 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12249 } else { 12250 assert(IVLVal.getType()->isUnsignedIntegerType() && 12251 "Loop iteration variable must be integer."); 12252 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12253 } 12254 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12255 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12256 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12257 // { 12258 CGF.EmitBlock(ThenBB); 12259 12260 // last_iv = iv; 12261 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12262 12263 // last_a = priv_a; 12264 switch (CGF.getEvaluationKind(LVal.getType())) { 12265 case TEK_Scalar: { 12266 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12267 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12268 break; 12269 } 12270 case TEK_Complex: { 12271 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12272 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12273 break; 12274 } 12275 case TEK_Aggregate: 12276 llvm_unreachable( 12277 "Aggregates are not supported in lastprivate conditional."); 12278 } 12279 // } 12280 CGF.EmitBranch(ExitBB); 12281 // There is no need to emit line number for unconditional branch. 12282 (void)ApplyDebugLocation::CreateEmpty(CGF); 12283 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12284 }; 12285 12286 if (CGM.getLangOpts().OpenMPSimd) { 12287 // Do not emit as a critical region as no parallel region could be emitted. 12288 RegionCodeGenTy ThenRCG(CodeGen); 12289 ThenRCG(CGF); 12290 } else { 12291 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12292 } 12293 } 12294 12295 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12296 const Expr *LHS) { 12297 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12298 return; 12299 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12300 if (!Checker.Visit(LHS)) 12301 return; 12302 const Expr *FoundE; 12303 const Decl *FoundD; 12304 StringRef UniqueDeclName; 12305 LValue IVLVal; 12306 llvm::Function *FoundFn; 12307 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12308 Checker.getFoundData(); 12309 if (FoundFn != CGF.CurFn) { 12310 // Special codegen for inner parallel regions. 12311 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12312 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12313 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12314 "Lastprivate conditional is not found in outer region."); 12315 QualType StructTy = std::get<0>(It->getSecond()); 12316 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12317 LValue PrivLVal = CGF.EmitLValue(FoundE); 12318 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12319 PrivLVal.getAddress(CGF), 12320 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12321 LValue BaseLVal = 12322 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12323 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12324 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12325 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12326 FiredLVal, llvm::AtomicOrdering::Unordered, 12327 /*IsVolatile=*/true, /*isInit=*/false); 12328 return; 12329 } 12330 12331 // Private address of the lastprivate conditional in the current context. 12332 // priv_a 12333 LValue LVal = CGF.EmitLValue(FoundE); 12334 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12335 FoundE->getExprLoc()); 12336 } 12337 12338 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12339 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12340 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12341 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12342 return; 12343 auto Range = llvm::reverse(LastprivateConditionalStack); 12344 auto It = llvm::find_if( 12345 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12346 if (It == Range.end() || It->Fn != CGF.CurFn) 12347 return; 12348 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12349 assert(LPCI != LastprivateConditionalToTypes.end() && 12350 "Lastprivates must be registered already."); 12351 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12352 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12353 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12354 for (const auto &Pair : It->DeclToUniqueName) { 12355 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12356 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12357 continue; 12358 auto I = LPCI->getSecond().find(Pair.first); 12359 assert(I != LPCI->getSecond().end() && 12360 "Lastprivate must be rehistered already."); 12361 // bool Cmp = priv_a.Fired != 0; 12362 LValue BaseLVal = std::get<3>(I->getSecond()); 12363 LValue FiredLVal = 12364 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12365 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12366 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12367 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12368 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12369 // if (Cmp) { 12370 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12371 CGF.EmitBlock(ThenBB); 12372 Address Addr = CGF.GetAddrOfLocalVar(VD); 12373 LValue LVal; 12374 if (VD->getType()->isReferenceType()) 12375 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12376 AlignmentSource::Decl); 12377 else 12378 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12379 AlignmentSource::Decl); 12380 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12381 D.getBeginLoc()); 12382 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12383 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12384 // } 12385 } 12386 } 12387 12388 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12389 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12390 SourceLocation Loc) { 12391 if (CGF.getLangOpts().OpenMP < 50) 12392 return; 12393 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12394 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12395 "Unknown lastprivate conditional variable."); 12396 StringRef UniqueName = It->second; 12397 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12398 // The variable was not updated in the region - exit. 12399 if (!GV) 12400 return; 12401 LValue LPLVal = CGF.MakeAddrLValue( 12402 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12403 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12404 CGF.EmitStoreOfScalar(Res, PrivLVal); 12405 } 12406 12407 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12408 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12409 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12410 llvm_unreachable("Not supported in SIMD-only mode"); 12411 } 12412 12413 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12414 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12415 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12416 llvm_unreachable("Not supported in SIMD-only mode"); 12417 } 12418 12419 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12420 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12421 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12422 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12423 bool Tied, unsigned &NumberOfParts) { 12424 llvm_unreachable("Not supported in SIMD-only mode"); 12425 } 12426 12427 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12428 SourceLocation Loc, 12429 llvm::Function *OutlinedFn, 12430 ArrayRef<llvm::Value *> CapturedVars, 12431 const Expr *IfCond) { 12432 llvm_unreachable("Not supported in SIMD-only mode"); 12433 } 12434 12435 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12436 CodeGenFunction &CGF, StringRef CriticalName, 12437 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12438 const Expr *Hint) { 12439 llvm_unreachable("Not supported in SIMD-only mode"); 12440 } 12441 12442 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12443 const RegionCodeGenTy &MasterOpGen, 12444 SourceLocation Loc) { 12445 llvm_unreachable("Not supported in SIMD-only mode"); 12446 } 12447 12448 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12449 SourceLocation Loc) { 12450 llvm_unreachable("Not supported in SIMD-only mode"); 12451 } 12452 12453 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12454 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12455 SourceLocation Loc) { 12456 llvm_unreachable("Not supported in SIMD-only mode"); 12457 } 12458 12459 void CGOpenMPSIMDRuntime::emitSingleRegion( 12460 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12461 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12462 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12463 ArrayRef<const Expr *> AssignmentOps) { 12464 llvm_unreachable("Not supported in SIMD-only mode"); 12465 } 12466 12467 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12468 const RegionCodeGenTy &OrderedOpGen, 12469 SourceLocation Loc, 12470 bool IsThreads) { 12471 llvm_unreachable("Not supported in SIMD-only mode"); 12472 } 12473 12474 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12475 SourceLocation Loc, 12476 OpenMPDirectiveKind Kind, 12477 bool EmitChecks, 12478 bool ForceSimpleCall) { 12479 llvm_unreachable("Not supported in SIMD-only mode"); 12480 } 12481 12482 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12483 CodeGenFunction &CGF, SourceLocation Loc, 12484 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12485 bool Ordered, const DispatchRTInput &DispatchValues) { 12486 llvm_unreachable("Not supported in SIMD-only mode"); 12487 } 12488 12489 void CGOpenMPSIMDRuntime::emitForStaticInit( 12490 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12491 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12492 llvm_unreachable("Not supported in SIMD-only mode"); 12493 } 12494 12495 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12496 CodeGenFunction &CGF, SourceLocation Loc, 12497 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12498 llvm_unreachable("Not supported in SIMD-only mode"); 12499 } 12500 12501 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12502 SourceLocation Loc, 12503 unsigned IVSize, 12504 bool IVSigned) { 12505 llvm_unreachable("Not supported in SIMD-only mode"); 12506 } 12507 12508 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12509 SourceLocation Loc, 12510 OpenMPDirectiveKind DKind) { 12511 llvm_unreachable("Not supported in SIMD-only mode"); 12512 } 12513 12514 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12515 SourceLocation Loc, 12516 unsigned IVSize, bool IVSigned, 12517 Address IL, Address LB, 12518 Address UB, Address ST) { 12519 llvm_unreachable("Not supported in SIMD-only mode"); 12520 } 12521 12522 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12523 llvm::Value *NumThreads, 12524 SourceLocation Loc) { 12525 llvm_unreachable("Not supported in SIMD-only mode"); 12526 } 12527 12528 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12529 ProcBindKind ProcBind, 12530 SourceLocation Loc) { 12531 llvm_unreachable("Not supported in SIMD-only mode"); 12532 } 12533 12534 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12535 const VarDecl *VD, 12536 Address VDAddr, 12537 SourceLocation Loc) { 12538 llvm_unreachable("Not supported in SIMD-only mode"); 12539 } 12540 12541 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12542 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12543 CodeGenFunction *CGF) { 12544 llvm_unreachable("Not supported in SIMD-only mode"); 12545 } 12546 12547 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12548 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12549 llvm_unreachable("Not supported in SIMD-only mode"); 12550 } 12551 12552 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12553 ArrayRef<const Expr *> Vars, 12554 SourceLocation Loc, 12555 llvm::AtomicOrdering AO) { 12556 llvm_unreachable("Not supported in SIMD-only mode"); 12557 } 12558 12559 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12560 const OMPExecutableDirective &D, 12561 llvm::Function *TaskFunction, 12562 QualType SharedsTy, Address Shareds, 12563 const Expr *IfCond, 12564 const OMPTaskDataTy &Data) { 12565 llvm_unreachable("Not supported in SIMD-only mode"); 12566 } 12567 12568 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12569 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12570 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12571 const Expr *IfCond, const OMPTaskDataTy &Data) { 12572 llvm_unreachable("Not supported in SIMD-only mode"); 12573 } 12574 12575 void CGOpenMPSIMDRuntime::emitReduction( 12576 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12577 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12578 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12579 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12580 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12581 ReductionOps, Options); 12582 } 12583 12584 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12585 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12586 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12587 llvm_unreachable("Not supported in SIMD-only mode"); 12588 } 12589 12590 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12591 SourceLocation Loc, 12592 bool IsWorksharingReduction) { 12593 llvm_unreachable("Not supported in SIMD-only mode"); 12594 } 12595 12596 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12597 SourceLocation Loc, 12598 ReductionCodeGen &RCG, 12599 unsigned N) { 12600 llvm_unreachable("Not supported in SIMD-only mode"); 12601 } 12602 12603 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12604 SourceLocation Loc, 12605 llvm::Value *ReductionsPtr, 12606 LValue SharedLVal) { 12607 llvm_unreachable("Not supported in SIMD-only mode"); 12608 } 12609 12610 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12611 SourceLocation Loc) { 12612 llvm_unreachable("Not supported in SIMD-only mode"); 12613 } 12614 12615 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12616 CodeGenFunction &CGF, SourceLocation Loc, 12617 OpenMPDirectiveKind CancelRegion) { 12618 llvm_unreachable("Not supported in SIMD-only mode"); 12619 } 12620 12621 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12622 SourceLocation Loc, const Expr *IfCond, 12623 OpenMPDirectiveKind CancelRegion) { 12624 llvm_unreachable("Not supported in SIMD-only mode"); 12625 } 12626 12627 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12628 const OMPExecutableDirective &D, StringRef ParentName, 12629 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12630 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12631 llvm_unreachable("Not supported in SIMD-only mode"); 12632 } 12633 12634 void CGOpenMPSIMDRuntime::emitTargetCall( 12635 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12636 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12637 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12638 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12639 const OMPLoopDirective &D)> 12640 SizeEmitter) { 12641 llvm_unreachable("Not supported in SIMD-only mode"); 12642 } 12643 12644 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12645 llvm_unreachable("Not supported in SIMD-only mode"); 12646 } 12647 12648 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12649 llvm_unreachable("Not supported in SIMD-only mode"); 12650 } 12651 12652 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12653 return false; 12654 } 12655 12656 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12657 const OMPExecutableDirective &D, 12658 SourceLocation Loc, 12659 llvm::Function *OutlinedFn, 12660 ArrayRef<llvm::Value *> CapturedVars) { 12661 llvm_unreachable("Not supported in SIMD-only mode"); 12662 } 12663 12664 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12665 const Expr *NumTeams, 12666 const Expr *ThreadLimit, 12667 SourceLocation Loc) { 12668 llvm_unreachable("Not supported in SIMD-only mode"); 12669 } 12670 12671 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12672 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12673 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12674 llvm_unreachable("Not supported in SIMD-only mode"); 12675 } 12676 12677 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12678 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12679 const Expr *Device) { 12680 llvm_unreachable("Not supported in SIMD-only mode"); 12681 } 12682 12683 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12684 const OMPLoopDirective &D, 12685 ArrayRef<Expr *> NumIterations) { 12686 llvm_unreachable("Not supported in SIMD-only mode"); 12687 } 12688 12689 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12690 const OMPDependClause *C) { 12691 llvm_unreachable("Not supported in SIMD-only mode"); 12692 } 12693 12694 const VarDecl * 12695 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12696 const VarDecl *NativeParam) const { 12697 llvm_unreachable("Not supported in SIMD-only mode"); 12698 } 12699 12700 Address 12701 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12702 const VarDecl *NativeParam, 12703 const VarDecl *TargetParam) const { 12704 llvm_unreachable("Not supported in SIMD-only mode"); 12705 } 12706