1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1530 } 1531 1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1533 return OMPBuilder.IdentPtr; 1534 } 1535 1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1537 if (!Kmpc_MicroTy) { 1538 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1539 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1540 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1541 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1542 } 1543 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1544 } 1545 1546 llvm::FunctionCallee 1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1548 assert((IVSize == 32 || IVSize == 64) && 1549 "IV size is not compatible with the omp runtime"); 1550 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1551 : "__kmpc_for_static_init_4u") 1552 : (IVSigned ? "__kmpc_for_static_init_8" 1553 : "__kmpc_for_static_init_8u"); 1554 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1555 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1556 llvm::Type *TypeParams[] = { 1557 getIdentTyPointerTy(), // loc 1558 CGM.Int32Ty, // tid 1559 CGM.Int32Ty, // schedtype 1560 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1561 PtrTy, // p_lower 1562 PtrTy, // p_upper 1563 PtrTy, // p_stride 1564 ITy, // incr 1565 ITy // chunk 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1579 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1582 CGM.Int32Ty, // tid 1583 CGM.Int32Ty, // schedtype 1584 ITy, // lower 1585 ITy, // upper 1586 ITy, // stride 1587 ITy // chunk 1588 }; 1589 auto *FnTy = 1590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1591 return CGM.CreateRuntimeFunction(FnTy, Name); 1592 } 1593 1594 llvm::FunctionCallee 1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1596 assert((IVSize == 32 || IVSize == 64) && 1597 "IV size is not compatible with the omp runtime"); 1598 StringRef Name = 1599 IVSize == 32 1600 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1601 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1602 llvm::Type *TypeParams[] = { 1603 getIdentTyPointerTy(), // loc 1604 CGM.Int32Ty, // tid 1605 }; 1606 auto *FnTy = 1607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1608 return CGM.CreateRuntimeFunction(FnTy, Name); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = 1616 IVSize == 32 1617 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1618 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1621 llvm::Type *TypeParams[] = { 1622 getIdentTyPointerTy(), // loc 1623 CGM.Int32Ty, // tid 1624 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1625 PtrTy, // p_lower 1626 PtrTy, // p_upper 1627 PtrTy // p_stride 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 /// Obtain information that uniquely identifies a target entry. This 1635 /// consists of the file and device IDs as well as line number associated with 1636 /// the relevant entry source location. 1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1638 unsigned &DeviceID, unsigned &FileID, 1639 unsigned &LineNum) { 1640 SourceManager &SM = C.getSourceManager(); 1641 1642 // The loc should be always valid and have a file ID (the user cannot use 1643 // #pragma directives in macros) 1644 1645 assert(Loc.isValid() && "Source location is expected to be always valid."); 1646 1647 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1648 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1649 1650 llvm::sys::fs::UniqueID ID; 1651 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1652 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1653 << PLoc.getFilename() << EC.message(); 1654 1655 DeviceID = ID.getDevice(); 1656 FileID = ID.getFile(); 1657 LineNum = PLoc.getLine(); 1658 } 1659 1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1661 if (CGM.getLangOpts().OpenMPSimd) 1662 return Address::invalid(); 1663 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1664 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1665 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1666 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1667 HasRequiresUnifiedSharedMemory))) { 1668 SmallString<64> PtrName; 1669 { 1670 llvm::raw_svector_ostream OS(PtrName); 1671 OS << CGM.getMangledName(GlobalDecl(VD)); 1672 if (!VD->isExternallyVisible()) { 1673 unsigned DeviceID, FileID, Line; 1674 getTargetEntryUniqueInfo(CGM.getContext(), 1675 VD->getCanonicalDecl()->getBeginLoc(), 1676 DeviceID, FileID, Line); 1677 OS << llvm::format("_%x", FileID); 1678 } 1679 OS << "_decl_tgt_ref_ptr"; 1680 } 1681 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1682 if (!Ptr) { 1683 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1684 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1685 PtrName); 1686 1687 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1688 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1689 1690 if (!CGM.getLangOpts().OpenMPIsDevice) 1691 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1692 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1693 } 1694 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1695 } 1696 return Address::invalid(); 1697 } 1698 1699 llvm::Constant * 1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1701 assert(!CGM.getLangOpts().OpenMPUseTLS || 1702 !CGM.getContext().getTargetInfo().isTLSSupported()); 1703 // Lookup the entry, lazily creating it if necessary. 1704 std::string Suffix = getName({"cache", ""}); 1705 return getOrCreateInternalVariable( 1706 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1707 } 1708 1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1710 const VarDecl *VD, 1711 Address VDAddr, 1712 SourceLocation Loc) { 1713 if (CGM.getLangOpts().OpenMPUseTLS && 1714 CGM.getContext().getTargetInfo().isTLSSupported()) 1715 return VDAddr; 1716 1717 llvm::Type *VarTy = VDAddr.getElementType(); 1718 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1719 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1720 CGM.Int8PtrTy), 1721 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1722 getOrCreateThreadPrivateCache(VD)}; 1723 return Address(CGF.EmitRuntimeCall( 1724 OMPBuilder.getOrCreateRuntimeFunction( 1725 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1726 Args), 1727 VDAddr.getAlignment()); 1728 } 1729 1730 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1731 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1732 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1733 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1734 // library. 1735 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1736 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1737 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1738 OMPLoc); 1739 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1740 // to register constructor/destructor for variable. 1741 llvm::Value *Args[] = { 1742 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1743 Ctor, CopyCtor, Dtor}; 1744 CGF.EmitRuntimeCall( 1745 OMPBuilder.getOrCreateRuntimeFunction( 1746 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1747 Args); 1748 } 1749 1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1751 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1752 bool PerformInit, CodeGenFunction *CGF) { 1753 if (CGM.getLangOpts().OpenMPUseTLS && 1754 CGM.getContext().getTargetInfo().isTLSSupported()) 1755 return nullptr; 1756 1757 VD = VD->getDefinition(CGM.getContext()); 1758 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1759 QualType ASTTy = VD->getType(); 1760 1761 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1762 const Expr *Init = VD->getAnyInitializer(); 1763 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1764 // Generate function that re-emits the declaration's initializer into the 1765 // threadprivate copy of the variable VD 1766 CodeGenFunction CtorCGF(CGM); 1767 FunctionArgList Args; 1768 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1769 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1770 ImplicitParamDecl::Other); 1771 Args.push_back(&Dst); 1772 1773 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1774 CGM.getContext().VoidPtrTy, Args); 1775 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1776 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1777 llvm::Function *Fn = 1778 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1779 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1780 Args, Loc, Loc); 1781 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1782 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1783 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1784 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1785 Arg = CtorCGF.Builder.CreateElementBitCast( 1786 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1787 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1788 /*IsInitializer=*/true); 1789 ArgVal = CtorCGF.EmitLoadOfScalar( 1790 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1791 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1792 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1793 CtorCGF.FinishFunction(); 1794 Ctor = Fn; 1795 } 1796 if (VD->getType().isDestructedType() != QualType::DK_none) { 1797 // Generate function that emits destructor call for the threadprivate copy 1798 // of the variable VD 1799 CodeGenFunction DtorCGF(CGM); 1800 FunctionArgList Args; 1801 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1802 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1803 ImplicitParamDecl::Other); 1804 Args.push_back(&Dst); 1805 1806 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1807 CGM.getContext().VoidTy, Args); 1808 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1809 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1810 llvm::Function *Fn = 1811 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1812 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1813 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1814 Loc, Loc); 1815 // Create a scope with an artificial location for the body of this function. 1816 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1817 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1818 DtorCGF.GetAddrOfLocalVar(&Dst), 1819 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1820 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1821 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1822 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1823 DtorCGF.FinishFunction(); 1824 Dtor = Fn; 1825 } 1826 // Do not emit init function if it is not required. 1827 if (!Ctor && !Dtor) 1828 return nullptr; 1829 1830 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1831 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1832 /*isVarArg=*/false) 1833 ->getPointerTo(); 1834 // Copying constructor for the threadprivate variable. 1835 // Must be NULL - reserved by runtime, but currently it requires that this 1836 // parameter is always NULL. Otherwise it fires assertion. 1837 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1838 if (Ctor == nullptr) { 1839 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1840 /*isVarArg=*/false) 1841 ->getPointerTo(); 1842 Ctor = llvm::Constant::getNullValue(CtorTy); 1843 } 1844 if (Dtor == nullptr) { 1845 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1846 /*isVarArg=*/false) 1847 ->getPointerTo(); 1848 Dtor = llvm::Constant::getNullValue(DtorTy); 1849 } 1850 if (!CGF) { 1851 auto *InitFunctionTy = 1852 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1853 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1854 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1855 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1856 CodeGenFunction InitCGF(CGM); 1857 FunctionArgList ArgList; 1858 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1859 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1860 Loc, Loc); 1861 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1862 InitCGF.FinishFunction(); 1863 return InitFunction; 1864 } 1865 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1866 } 1867 return nullptr; 1868 } 1869 1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1871 llvm::GlobalVariable *Addr, 1872 bool PerformInit) { 1873 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1874 !CGM.getLangOpts().OpenMPIsDevice) 1875 return false; 1876 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1877 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1878 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1879 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1880 HasRequiresUnifiedSharedMemory)) 1881 return CGM.getLangOpts().OpenMPIsDevice; 1882 VD = VD->getDefinition(CGM.getContext()); 1883 assert(VD && "Unknown VarDecl"); 1884 1885 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1886 return CGM.getLangOpts().OpenMPIsDevice; 1887 1888 QualType ASTTy = VD->getType(); 1889 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1890 1891 // Produce the unique prefix to identify the new target regions. We use 1892 // the source location of the variable declaration which we know to not 1893 // conflict with any target region. 1894 unsigned DeviceID; 1895 unsigned FileID; 1896 unsigned Line; 1897 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1898 SmallString<128> Buffer, Out; 1899 { 1900 llvm::raw_svector_ostream OS(Buffer); 1901 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1902 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1903 } 1904 1905 const Expr *Init = VD->getAnyInitializer(); 1906 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1907 llvm::Constant *Ctor; 1908 llvm::Constant *ID; 1909 if (CGM.getLangOpts().OpenMPIsDevice) { 1910 // Generate function that re-emits the declaration's initializer into 1911 // the threadprivate copy of the variable VD 1912 CodeGenFunction CtorCGF(CGM); 1913 1914 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1915 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1916 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1917 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1918 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1919 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1920 FunctionArgList(), Loc, Loc); 1921 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1922 CtorCGF.EmitAnyExprToMem(Init, 1923 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1924 Init->getType().getQualifiers(), 1925 /*IsInitializer=*/true); 1926 CtorCGF.FinishFunction(); 1927 Ctor = Fn; 1928 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1929 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1930 } else { 1931 Ctor = new llvm::GlobalVariable( 1932 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1933 llvm::GlobalValue::PrivateLinkage, 1934 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1935 ID = Ctor; 1936 } 1937 1938 // Register the information for the entry associated with the constructor. 1939 Out.clear(); 1940 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1941 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1942 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1943 } 1944 if (VD->getType().isDestructedType() != QualType::DK_none) { 1945 llvm::Constant *Dtor; 1946 llvm::Constant *ID; 1947 if (CGM.getLangOpts().OpenMPIsDevice) { 1948 // Generate function that emits destructor call for the threadprivate 1949 // copy of the variable VD 1950 CodeGenFunction DtorCGF(CGM); 1951 1952 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1953 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1954 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1955 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1956 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1957 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1958 FunctionArgList(), Loc, Loc); 1959 // Create a scope with an artificial location for the body of this 1960 // function. 1961 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1962 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1963 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1964 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1965 DtorCGF.FinishFunction(); 1966 Dtor = Fn; 1967 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1968 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1969 } else { 1970 Dtor = new llvm::GlobalVariable( 1971 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1972 llvm::GlobalValue::PrivateLinkage, 1973 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1974 ID = Dtor; 1975 } 1976 // Register the information for the entry associated with the destructor. 1977 Out.clear(); 1978 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1979 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1980 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1981 } 1982 return CGM.getLangOpts().OpenMPIsDevice; 1983 } 1984 1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1986 QualType VarType, 1987 StringRef Name) { 1988 std::string Suffix = getName({"artificial", ""}); 1989 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1990 llvm::Value *GAddr = 1991 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1992 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1993 CGM.getTarget().isTLSSupported()) { 1994 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1995 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1996 } 1997 std::string CacheSuffix = getName({"cache", ""}); 1998 llvm::Value *Args[] = { 1999 emitUpdateLocation(CGF, SourceLocation()), 2000 getThreadID(CGF, SourceLocation()), 2001 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2002 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2003 /*isSigned=*/false), 2004 getOrCreateInternalVariable( 2005 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2006 return Address( 2007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2008 CGF.EmitRuntimeCall( 2009 OMPBuilder.getOrCreateRuntimeFunction( 2010 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2011 Args), 2012 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2013 CGM.getContext().getTypeAlignInChars(VarType)); 2014 } 2015 2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2017 const RegionCodeGenTy &ThenGen, 2018 const RegionCodeGenTy &ElseGen) { 2019 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2020 2021 // If the condition constant folds and can be elided, try to avoid emitting 2022 // the condition and the dead arm of the if/else. 2023 bool CondConstant; 2024 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2025 if (CondConstant) 2026 ThenGen(CGF); 2027 else 2028 ElseGen(CGF); 2029 return; 2030 } 2031 2032 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2033 // emit the conditional branch. 2034 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2035 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2036 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2037 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2038 2039 // Emit the 'then' code. 2040 CGF.EmitBlock(ThenBlock); 2041 ThenGen(CGF); 2042 CGF.EmitBranch(ContBlock); 2043 // Emit the 'else' code if present. 2044 // There is no need to emit line number for unconditional branch. 2045 (void)ApplyDebugLocation::CreateEmpty(CGF); 2046 CGF.EmitBlock(ElseBlock); 2047 ElseGen(CGF); 2048 // There is no need to emit line number for unconditional branch. 2049 (void)ApplyDebugLocation::CreateEmpty(CGF); 2050 CGF.EmitBranch(ContBlock); 2051 // Emit the continuation block for code after the if. 2052 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2053 } 2054 2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2056 llvm::Function *OutlinedFn, 2057 ArrayRef<llvm::Value *> CapturedVars, 2058 const Expr *IfCond) { 2059 if (!CGF.HaveInsertPoint()) 2060 return; 2061 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2062 auto &M = CGM.getModule(); 2063 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2064 this](CodeGenFunction &CGF, PrePostActionTy &) { 2065 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *Args[] = { 2068 RTLoc, 2069 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2070 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2071 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2072 RealArgs.append(std::begin(Args), std::end(Args)); 2073 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2074 2075 llvm::FunctionCallee RTLFn = 2076 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2077 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2078 }; 2079 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2082 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2083 // Build calls: 2084 // __kmpc_serialized_parallel(&Loc, GTid); 2085 llvm::Value *Args[] = {RTLoc, ThreadID}; 2086 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2087 M, OMPRTL___kmpc_serialized_parallel), 2088 Args); 2089 2090 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2091 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2092 Address ZeroAddrBound = 2093 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2094 /*Name=*/".bound.zero.addr"); 2095 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2096 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2097 // ThreadId for serialized parallels is 0. 2098 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2099 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2100 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2101 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2102 2103 // __kmpc_end_serialized_parallel(&Loc, GTid); 2104 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2105 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2106 M, OMPRTL___kmpc_end_serialized_parallel), 2107 EndArgs); 2108 }; 2109 if (IfCond) { 2110 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2111 } else { 2112 RegionCodeGenTy ThenRCG(ThenGen); 2113 ThenRCG(CGF); 2114 } 2115 } 2116 2117 // If we're inside an (outlined) parallel region, use the region info's 2118 // thread-ID variable (it is passed in a first argument of the outlined function 2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2120 // regular serial code region, get thread ID by calling kmp_int32 2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2122 // return the address of that temp. 2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2124 SourceLocation Loc) { 2125 if (auto *OMPRegionInfo = 2126 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2127 if (OMPRegionInfo->getThreadIDVariable()) 2128 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2129 2130 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2131 QualType Int32Ty = 2132 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2133 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2134 CGF.EmitStoreOfScalar(ThreadID, 2135 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2136 2137 return ThreadIDTemp; 2138 } 2139 2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2141 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2142 SmallString<256> Buffer; 2143 llvm::raw_svector_ostream Out(Buffer); 2144 Out << Name; 2145 StringRef RuntimeName = Out.str(); 2146 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2147 if (Elem.second) { 2148 assert(Elem.second->getType()->getPointerElementType() == Ty && 2149 "OMP internal variable has different type than requested"); 2150 return &*Elem.second; 2151 } 2152 2153 return Elem.second = new llvm::GlobalVariable( 2154 CGM.getModule(), Ty, /*IsConstant*/ false, 2155 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2156 Elem.first(), /*InsertBefore=*/nullptr, 2157 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2158 } 2159 2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2161 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2162 std::string Name = getName({Prefix, "var"}); 2163 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2164 } 2165 2166 namespace { 2167 /// Common pre(post)-action for different OpenMP constructs. 2168 class CommonActionTy final : public PrePostActionTy { 2169 llvm::FunctionCallee EnterCallee; 2170 ArrayRef<llvm::Value *> EnterArgs; 2171 llvm::FunctionCallee ExitCallee; 2172 ArrayRef<llvm::Value *> ExitArgs; 2173 bool Conditional; 2174 llvm::BasicBlock *ContBlock = nullptr; 2175 2176 public: 2177 CommonActionTy(llvm::FunctionCallee EnterCallee, 2178 ArrayRef<llvm::Value *> EnterArgs, 2179 llvm::FunctionCallee ExitCallee, 2180 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2181 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2182 ExitArgs(ExitArgs), Conditional(Conditional) {} 2183 void Enter(CodeGenFunction &CGF) override { 2184 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2185 if (Conditional) { 2186 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2187 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2188 ContBlock = CGF.createBasicBlock("omp_if.end"); 2189 // Generate the branch (If-stmt) 2190 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2191 CGF.EmitBlock(ThenBlock); 2192 } 2193 } 2194 void Done(CodeGenFunction &CGF) { 2195 // Emit the rest of blocks/branches 2196 CGF.EmitBranch(ContBlock); 2197 CGF.EmitBlock(ContBlock, true); 2198 } 2199 void Exit(CodeGenFunction &CGF) override { 2200 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2201 } 2202 }; 2203 } // anonymous namespace 2204 2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2206 StringRef CriticalName, 2207 const RegionCodeGenTy &CriticalOpGen, 2208 SourceLocation Loc, const Expr *Hint) { 2209 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2210 // CriticalOpGen(); 2211 // __kmpc_end_critical(ident_t *, gtid, Lock); 2212 // Prepare arguments and build a call to __kmpc_critical 2213 if (!CGF.HaveInsertPoint()) 2214 return; 2215 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2216 getCriticalRegionLock(CriticalName)}; 2217 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2218 std::end(Args)); 2219 if (Hint) { 2220 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2221 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2222 } 2223 CommonActionTy Action( 2224 OMPBuilder.getOrCreateRuntimeFunction( 2225 CGM.getModule(), 2226 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2227 EnterArgs, 2228 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2229 OMPRTL___kmpc_end_critical), 2230 Args); 2231 CriticalOpGen.setAction(Action); 2232 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2233 } 2234 2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2236 const RegionCodeGenTy &MasterOpGen, 2237 SourceLocation Loc) { 2238 if (!CGF.HaveInsertPoint()) 2239 return; 2240 // if(__kmpc_master(ident_t *, gtid)) { 2241 // MasterOpGen(); 2242 // __kmpc_end_master(ident_t *, gtid); 2243 // } 2244 // Prepare arguments and build a call to __kmpc_master 2245 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2246 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2247 CGM.getModule(), OMPRTL___kmpc_master), 2248 Args, 2249 OMPBuilder.getOrCreateRuntimeFunction( 2250 CGM.getModule(), OMPRTL___kmpc_end_master), 2251 Args, 2252 /*Conditional=*/true); 2253 MasterOpGen.setAction(Action); 2254 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2255 Action.Done(CGF); 2256 } 2257 2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2259 SourceLocation Loc) { 2260 if (!CGF.HaveInsertPoint()) 2261 return; 2262 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2263 OMPBuilder.createTaskyield(CGF.Builder); 2264 } else { 2265 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2266 llvm::Value *Args[] = { 2267 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2268 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2269 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2271 Args); 2272 } 2273 2274 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2275 Region->emitUntiedSwitch(CGF); 2276 } 2277 2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2279 const RegionCodeGenTy &TaskgroupOpGen, 2280 SourceLocation Loc) { 2281 if (!CGF.HaveInsertPoint()) 2282 return; 2283 // __kmpc_taskgroup(ident_t *, gtid); 2284 // TaskgroupOpGen(); 2285 // __kmpc_end_taskgroup(ident_t *, gtid); 2286 // Prepare arguments and build a call to __kmpc_taskgroup 2287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2288 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2289 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2290 Args, 2291 OMPBuilder.getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2293 Args); 2294 TaskgroupOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2296 } 2297 2298 /// Given an array of pointers to variables, project the address of a 2299 /// given variable. 2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2301 unsigned Index, const VarDecl *Var) { 2302 // Pull out the pointer to the variable. 2303 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2304 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2305 2306 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2307 Addr = CGF.Builder.CreateElementBitCast( 2308 Addr, CGF.ConvertTypeForMem(Var->getType())); 2309 return Addr; 2310 } 2311 2312 static llvm::Value *emitCopyprivateCopyFunction( 2313 CodeGenModule &CGM, llvm::Type *ArgsType, 2314 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2315 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2316 SourceLocation Loc) { 2317 ASTContext &C = CGM.getContext(); 2318 // void copy_func(void *LHSArg, void *RHSArg); 2319 FunctionArgList Args; 2320 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2321 ImplicitParamDecl::Other); 2322 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2323 ImplicitParamDecl::Other); 2324 Args.push_back(&LHSArg); 2325 Args.push_back(&RHSArg); 2326 const auto &CGFI = 2327 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2328 std::string Name = 2329 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2330 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2331 llvm::GlobalValue::InternalLinkage, Name, 2332 &CGM.getModule()); 2333 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2334 Fn->setDoesNotRecurse(); 2335 CodeGenFunction CGF(CGM); 2336 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2337 // Dest = (void*[n])(LHSArg); 2338 // Src = (void*[n])(RHSArg); 2339 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2340 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2341 ArgsType), CGF.getPointerAlign()); 2342 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2343 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2344 ArgsType), CGF.getPointerAlign()); 2345 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2346 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2347 // ... 2348 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2349 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2350 const auto *DestVar = 2351 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2352 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2353 2354 const auto *SrcVar = 2355 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2356 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2357 2358 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2359 QualType Type = VD->getType(); 2360 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2361 } 2362 CGF.FinishFunction(); 2363 return Fn; 2364 } 2365 2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2367 const RegionCodeGenTy &SingleOpGen, 2368 SourceLocation Loc, 2369 ArrayRef<const Expr *> CopyprivateVars, 2370 ArrayRef<const Expr *> SrcExprs, 2371 ArrayRef<const Expr *> DstExprs, 2372 ArrayRef<const Expr *> AssignmentOps) { 2373 if (!CGF.HaveInsertPoint()) 2374 return; 2375 assert(CopyprivateVars.size() == SrcExprs.size() && 2376 CopyprivateVars.size() == DstExprs.size() && 2377 CopyprivateVars.size() == AssignmentOps.size()); 2378 ASTContext &C = CGM.getContext(); 2379 // int32 did_it = 0; 2380 // if(__kmpc_single(ident_t *, gtid)) { 2381 // SingleOpGen(); 2382 // __kmpc_end_single(ident_t *, gtid); 2383 // did_it = 1; 2384 // } 2385 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2386 // <copy_func>, did_it); 2387 2388 Address DidIt = Address::invalid(); 2389 if (!CopyprivateVars.empty()) { 2390 // int32 did_it = 0; 2391 QualType KmpInt32Ty = 2392 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2393 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2394 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2395 } 2396 // Prepare arguments and build a call to __kmpc_single 2397 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2398 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2399 CGM.getModule(), OMPRTL___kmpc_single), 2400 Args, 2401 OMPBuilder.getOrCreateRuntimeFunction( 2402 CGM.getModule(), OMPRTL___kmpc_end_single), 2403 Args, 2404 /*Conditional=*/true); 2405 SingleOpGen.setAction(Action); 2406 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2407 if (DidIt.isValid()) { 2408 // did_it = 1; 2409 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2410 } 2411 Action.Done(CGF); 2412 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2413 // <copy_func>, did_it); 2414 if (DidIt.isValid()) { 2415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2416 QualType CopyprivateArrayTy = C.getConstantArrayType( 2417 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2418 /*IndexTypeQuals=*/0); 2419 // Create a list of all private variables for copyprivate. 2420 Address CopyprivateList = 2421 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2422 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2423 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2424 CGF.Builder.CreateStore( 2425 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2426 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2427 CGF.VoidPtrTy), 2428 Elem); 2429 } 2430 // Build function that copies private values from single region to all other 2431 // threads in the corresponding parallel region. 2432 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2433 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2434 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2435 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2436 Address CL = 2437 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2438 CGF.VoidPtrTy); 2439 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2440 llvm::Value *Args[] = { 2441 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2442 getThreadID(CGF, Loc), // i32 <gtid> 2443 BufSize, // size_t <buf_size> 2444 CL.getPointer(), // void *<copyprivate list> 2445 CpyFn, // void (*) (void *, void *) <copy_func> 2446 DidItVal // i32 did_it 2447 }; 2448 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2449 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2450 Args); 2451 } 2452 } 2453 2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2455 const RegionCodeGenTy &OrderedOpGen, 2456 SourceLocation Loc, bool IsThreads) { 2457 if (!CGF.HaveInsertPoint()) 2458 return; 2459 // __kmpc_ordered(ident_t *, gtid); 2460 // OrderedOpGen(); 2461 // __kmpc_end_ordered(ident_t *, gtid); 2462 // Prepare arguments and build a call to __kmpc_ordered 2463 if (IsThreads) { 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_ordered), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2470 Args); 2471 OrderedOpGen.setAction(Action); 2472 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2473 return; 2474 } 2475 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2476 } 2477 2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2479 unsigned Flags; 2480 if (Kind == OMPD_for) 2481 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2482 else if (Kind == OMPD_sections) 2483 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2484 else if (Kind == OMPD_single) 2485 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2486 else if (Kind == OMPD_barrier) 2487 Flags = OMP_IDENT_BARRIER_EXPL; 2488 else 2489 Flags = OMP_IDENT_BARRIER_IMPL; 2490 return Flags; 2491 } 2492 2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2494 CodeGenFunction &CGF, const OMPLoopDirective &S, 2495 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2496 // Check if the loop directive is actually a doacross loop directive. In this 2497 // case choose static, 1 schedule. 2498 if (llvm::any_of( 2499 S.getClausesOfKind<OMPOrderedClause>(), 2500 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2501 ScheduleKind = OMPC_SCHEDULE_static; 2502 // Chunk size is 1 in this case. 2503 llvm::APInt ChunkSize(32, 1); 2504 ChunkExpr = IntegerLiteral::Create( 2505 CGF.getContext(), ChunkSize, 2506 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2507 SourceLocation()); 2508 } 2509 } 2510 2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2512 OpenMPDirectiveKind Kind, bool EmitChecks, 2513 bool ForceSimpleCall) { 2514 // Check if we should use the OMPBuilder 2515 auto *OMPRegionInfo = 2516 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2517 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2518 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2519 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2520 return; 2521 } 2522 2523 if (!CGF.HaveInsertPoint()) 2524 return; 2525 // Build call __kmpc_cancel_barrier(loc, thread_id); 2526 // Build call __kmpc_barrier(loc, thread_id); 2527 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2528 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2529 // thread_id); 2530 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2531 getThreadID(CGF, Loc)}; 2532 if (OMPRegionInfo) { 2533 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2534 llvm::Value *Result = CGF.EmitRuntimeCall( 2535 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2536 OMPRTL___kmpc_cancel_barrier), 2537 Args); 2538 if (EmitChecks) { 2539 // if (__kmpc_cancel_barrier()) { 2540 // exit from construct; 2541 // } 2542 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2543 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2544 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2545 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2546 CGF.EmitBlock(ExitBB); 2547 // exit from construct; 2548 CodeGenFunction::JumpDest CancelDestination = 2549 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2550 CGF.EmitBranchThroughCleanup(CancelDestination); 2551 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2552 } 2553 return; 2554 } 2555 } 2556 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2557 CGM.getModule(), OMPRTL___kmpc_barrier), 2558 Args); 2559 } 2560 2561 /// Map the OpenMP loop schedule to the runtime enumeration. 2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2563 bool Chunked, bool Ordered) { 2564 switch (ScheduleKind) { 2565 case OMPC_SCHEDULE_static: 2566 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2567 : (Ordered ? OMP_ord_static : OMP_sch_static); 2568 case OMPC_SCHEDULE_dynamic: 2569 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2570 case OMPC_SCHEDULE_guided: 2571 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2572 case OMPC_SCHEDULE_runtime: 2573 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2574 case OMPC_SCHEDULE_auto: 2575 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2576 case OMPC_SCHEDULE_unknown: 2577 assert(!Chunked && "chunk was specified but schedule kind not known"); 2578 return Ordered ? OMP_ord_static : OMP_sch_static; 2579 } 2580 llvm_unreachable("Unexpected runtime schedule"); 2581 } 2582 2583 /// Map the OpenMP distribute schedule to the runtime enumeration. 2584 static OpenMPSchedType 2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2586 // only static is allowed for dist_schedule 2587 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2588 } 2589 2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2591 bool Chunked) const { 2592 OpenMPSchedType Schedule = 2593 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2594 return Schedule == OMP_sch_static; 2595 } 2596 2597 bool CGOpenMPRuntime::isStaticNonchunked( 2598 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2599 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2600 return Schedule == OMP_dist_sch_static; 2601 } 2602 2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2604 bool Chunked) const { 2605 OpenMPSchedType Schedule = 2606 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2607 return Schedule == OMP_sch_static_chunked; 2608 } 2609 2610 bool CGOpenMPRuntime::isStaticChunked( 2611 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2612 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2613 return Schedule == OMP_dist_sch_static_chunked; 2614 } 2615 2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2619 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2620 return Schedule != OMP_sch_static; 2621 } 2622 2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2624 OpenMPScheduleClauseModifier M1, 2625 OpenMPScheduleClauseModifier M2) { 2626 int Modifier = 0; 2627 switch (M1) { 2628 case OMPC_SCHEDULE_MODIFIER_monotonic: 2629 Modifier = OMP_sch_modifier_monotonic; 2630 break; 2631 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2632 Modifier = OMP_sch_modifier_nonmonotonic; 2633 break; 2634 case OMPC_SCHEDULE_MODIFIER_simd: 2635 if (Schedule == OMP_sch_static_chunked) 2636 Schedule = OMP_sch_static_balanced_chunked; 2637 break; 2638 case OMPC_SCHEDULE_MODIFIER_last: 2639 case OMPC_SCHEDULE_MODIFIER_unknown: 2640 break; 2641 } 2642 switch (M2) { 2643 case OMPC_SCHEDULE_MODIFIER_monotonic: 2644 Modifier = OMP_sch_modifier_monotonic; 2645 break; 2646 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2647 Modifier = OMP_sch_modifier_nonmonotonic; 2648 break; 2649 case OMPC_SCHEDULE_MODIFIER_simd: 2650 if (Schedule == OMP_sch_static_chunked) 2651 Schedule = OMP_sch_static_balanced_chunked; 2652 break; 2653 case OMPC_SCHEDULE_MODIFIER_last: 2654 case OMPC_SCHEDULE_MODIFIER_unknown: 2655 break; 2656 } 2657 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2658 // If the static schedule kind is specified or if the ordered clause is 2659 // specified, and if the nonmonotonic modifier is not specified, the effect is 2660 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2661 // modifier is specified, the effect is as if the nonmonotonic modifier is 2662 // specified. 2663 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2664 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2665 Schedule == OMP_sch_static_balanced_chunked || 2666 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2667 Schedule == OMP_dist_sch_static_chunked || 2668 Schedule == OMP_dist_sch_static)) 2669 Modifier = OMP_sch_modifier_nonmonotonic; 2670 } 2671 return Schedule | Modifier; 2672 } 2673 2674 void CGOpenMPRuntime::emitForDispatchInit( 2675 CodeGenFunction &CGF, SourceLocation Loc, 2676 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2677 bool Ordered, const DispatchRTInput &DispatchValues) { 2678 if (!CGF.HaveInsertPoint()) 2679 return; 2680 OpenMPSchedType Schedule = getRuntimeSchedule( 2681 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2682 assert(Ordered || 2683 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2684 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2685 Schedule != OMP_sch_static_balanced_chunked)); 2686 // Call __kmpc_dispatch_init( 2687 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2688 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2689 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2690 2691 // If the Chunk was not specified in the clause - use default value 1. 2692 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2693 : CGF.Builder.getIntN(IVSize, 1); 2694 llvm::Value *Args[] = { 2695 emitUpdateLocation(CGF, Loc), 2696 getThreadID(CGF, Loc), 2697 CGF.Builder.getInt32(addMonoNonMonoModifier( 2698 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2699 DispatchValues.LB, // Lower 2700 DispatchValues.UB, // Upper 2701 CGF.Builder.getIntN(IVSize, 1), // Stride 2702 Chunk // Chunk 2703 }; 2704 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2705 } 2706 2707 static void emitForStaticInitCall( 2708 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2709 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2710 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2711 const CGOpenMPRuntime::StaticRTInput &Values) { 2712 if (!CGF.HaveInsertPoint()) 2713 return; 2714 2715 assert(!Values.Ordered); 2716 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2717 Schedule == OMP_sch_static_balanced_chunked || 2718 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2719 Schedule == OMP_dist_sch_static || 2720 Schedule == OMP_dist_sch_static_chunked); 2721 2722 // Call __kmpc_for_static_init( 2723 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2724 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2725 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2726 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2727 llvm::Value *Chunk = Values.Chunk; 2728 if (Chunk == nullptr) { 2729 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2730 Schedule == OMP_dist_sch_static) && 2731 "expected static non-chunked schedule"); 2732 // If the Chunk was not specified in the clause - use default value 1. 2733 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2734 } else { 2735 assert((Schedule == OMP_sch_static_chunked || 2736 Schedule == OMP_sch_static_balanced_chunked || 2737 Schedule == OMP_ord_static_chunked || 2738 Schedule == OMP_dist_sch_static_chunked) && 2739 "expected static chunked schedule"); 2740 } 2741 llvm::Value *Args[] = { 2742 UpdateLocation, 2743 ThreadId, 2744 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2745 M2)), // Schedule type 2746 Values.IL.getPointer(), // &isLastIter 2747 Values.LB.getPointer(), // &LB 2748 Values.UB.getPointer(), // &UB 2749 Values.ST.getPointer(), // &Stride 2750 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2751 Chunk // Chunk 2752 }; 2753 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2754 } 2755 2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2757 SourceLocation Loc, 2758 OpenMPDirectiveKind DKind, 2759 const OpenMPScheduleTy &ScheduleKind, 2760 const StaticRTInput &Values) { 2761 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2762 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2763 assert(isOpenMPWorksharingDirective(DKind) && 2764 "Expected loop-based or sections-based directive."); 2765 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2766 isOpenMPLoopDirective(DKind) 2767 ? OMP_IDENT_WORK_LOOP 2768 : OMP_IDENT_WORK_SECTIONS); 2769 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2770 llvm::FunctionCallee StaticInitFunction = 2771 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2772 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2773 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2774 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2775 } 2776 2777 void CGOpenMPRuntime::emitDistributeStaticInit( 2778 CodeGenFunction &CGF, SourceLocation Loc, 2779 OpenMPDistScheduleClauseKind SchedKind, 2780 const CGOpenMPRuntime::StaticRTInput &Values) { 2781 OpenMPSchedType ScheduleNum = 2782 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2783 llvm::Value *UpdatedLocation = 2784 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2785 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2786 llvm::FunctionCallee StaticInitFunction = 2787 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2788 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2789 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2790 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2791 } 2792 2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2794 SourceLocation Loc, 2795 OpenMPDirectiveKind DKind) { 2796 if (!CGF.HaveInsertPoint()) 2797 return; 2798 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2799 llvm::Value *Args[] = { 2800 emitUpdateLocation(CGF, Loc, 2801 isOpenMPDistributeDirective(DKind) 2802 ? OMP_IDENT_WORK_DISTRIBUTE 2803 : isOpenMPLoopDirective(DKind) 2804 ? OMP_IDENT_WORK_LOOP 2805 : OMP_IDENT_WORK_SECTIONS), 2806 getThreadID(CGF, Loc)}; 2807 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2808 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2809 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2810 Args); 2811 } 2812 2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2814 SourceLocation Loc, 2815 unsigned IVSize, 2816 bool IVSigned) { 2817 if (!CGF.HaveInsertPoint()) 2818 return; 2819 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2820 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2821 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2822 } 2823 2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2825 SourceLocation Loc, unsigned IVSize, 2826 bool IVSigned, Address IL, 2827 Address LB, Address UB, 2828 Address ST) { 2829 // Call __kmpc_dispatch_next( 2830 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2831 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2832 // kmp_int[32|64] *p_stride); 2833 llvm::Value *Args[] = { 2834 emitUpdateLocation(CGF, Loc), 2835 getThreadID(CGF, Loc), 2836 IL.getPointer(), // &isLastIter 2837 LB.getPointer(), // &Lower 2838 UB.getPointer(), // &Upper 2839 ST.getPointer() // &Stride 2840 }; 2841 llvm::Value *Call = 2842 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2843 return CGF.EmitScalarConversion( 2844 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2845 CGF.getContext().BoolTy, Loc); 2846 } 2847 2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2849 llvm::Value *NumThreads, 2850 SourceLocation Loc) { 2851 if (!CGF.HaveInsertPoint()) 2852 return; 2853 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2854 llvm::Value *Args[] = { 2855 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2856 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2857 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2858 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2859 Args); 2860 } 2861 2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2863 ProcBindKind ProcBind, 2864 SourceLocation Loc) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2868 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2869 llvm::Value *Args[] = { 2870 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2871 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2872 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2873 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2874 Args); 2875 } 2876 2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2878 SourceLocation Loc, llvm::AtomicOrdering AO) { 2879 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2880 OMPBuilder.createFlush(CGF.Builder); 2881 } else { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Build call void __kmpc_flush(ident_t *loc) 2885 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2886 CGM.getModule(), OMPRTL___kmpc_flush), 2887 emitUpdateLocation(CGF, Loc)); 2888 } 2889 } 2890 2891 namespace { 2892 /// Indexes of fields for type kmp_task_t. 2893 enum KmpTaskTFields { 2894 /// List of shared variables. 2895 KmpTaskTShareds, 2896 /// Task routine. 2897 KmpTaskTRoutine, 2898 /// Partition id for the untied tasks. 2899 KmpTaskTPartId, 2900 /// Function with call of destructors for private variables. 2901 Data1, 2902 /// Task priority. 2903 Data2, 2904 /// (Taskloops only) Lower bound. 2905 KmpTaskTLowerBound, 2906 /// (Taskloops only) Upper bound. 2907 KmpTaskTUpperBound, 2908 /// (Taskloops only) Stride. 2909 KmpTaskTStride, 2910 /// (Taskloops only) Is last iteration flag. 2911 KmpTaskTLastIter, 2912 /// (Taskloops only) Reduction data. 2913 KmpTaskTReductions, 2914 }; 2915 } // anonymous namespace 2916 2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2918 return OffloadEntriesTargetRegion.empty() && 2919 OffloadEntriesDeviceGlobalVar.empty(); 2920 } 2921 2922 /// Initialize target region entry. 2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2924 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2925 StringRef ParentName, unsigned LineNum, 2926 unsigned Order) { 2927 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2928 "only required for the device " 2929 "code generation."); 2930 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2931 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2932 OMPTargetRegionEntryTargetRegion); 2933 ++OffloadingEntriesNum; 2934 } 2935 2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2937 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2938 StringRef ParentName, unsigned LineNum, 2939 llvm::Constant *Addr, llvm::Constant *ID, 2940 OMPTargetRegionEntryKind Flags) { 2941 // If we are emitting code for a target, the entry is already initialized, 2942 // only has to be registered. 2943 if (CGM.getLangOpts().OpenMPIsDevice) { 2944 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 2945 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2946 DiagnosticsEngine::Error, 2947 "Unable to find target region on line '%0' in the device code."); 2948 CGM.getDiags().Report(DiagID) << LineNum; 2949 return; 2950 } 2951 auto &Entry = 2952 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2953 assert(Entry.isValid() && "Entry not initialized!"); 2954 Entry.setAddress(Addr); 2955 Entry.setID(ID); 2956 Entry.setFlags(Flags); 2957 } else { 2958 if (Flags == 2959 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2960 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2961 /*IgnoreAddressId*/ true)) 2962 return; 2963 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2964 "Target region entry already registered!"); 2965 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2966 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2967 ++OffloadingEntriesNum; 2968 } 2969 } 2970 2971 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2972 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2973 bool IgnoreAddressId) const { 2974 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2975 if (PerDevice == OffloadEntriesTargetRegion.end()) 2976 return false; 2977 auto PerFile = PerDevice->second.find(FileID); 2978 if (PerFile == PerDevice->second.end()) 2979 return false; 2980 auto PerParentName = PerFile->second.find(ParentName); 2981 if (PerParentName == PerFile->second.end()) 2982 return false; 2983 auto PerLine = PerParentName->second.find(LineNum); 2984 if (PerLine == PerParentName->second.end()) 2985 return false; 2986 // Fail if this entry is already registered. 2987 if (!IgnoreAddressId && 2988 (PerLine->second.getAddress() || PerLine->second.getID())) 2989 return false; 2990 return true; 2991 } 2992 2993 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2994 const OffloadTargetRegionEntryInfoActTy &Action) { 2995 // Scan all target region entries and perform the provided action. 2996 for (const auto &D : OffloadEntriesTargetRegion) 2997 for (const auto &F : D.second) 2998 for (const auto &P : F.second) 2999 for (const auto &L : P.second) 3000 Action(D.first, F.first, P.first(), L.first, L.second); 3001 } 3002 3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3004 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3005 OMPTargetGlobalVarEntryKind Flags, 3006 unsigned Order) { 3007 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3008 "only required for the device " 3009 "code generation."); 3010 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3011 ++OffloadingEntriesNum; 3012 } 3013 3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3015 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3016 CharUnits VarSize, 3017 OMPTargetGlobalVarEntryKind Flags, 3018 llvm::GlobalValue::LinkageTypes Linkage) { 3019 if (CGM.getLangOpts().OpenMPIsDevice) { 3020 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3021 assert(Entry.isValid() && Entry.getFlags() == Flags && 3022 "Entry not initialized!"); 3023 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3024 "Resetting with the new address."); 3025 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3026 if (Entry.getVarSize().isZero()) { 3027 Entry.setVarSize(VarSize); 3028 Entry.setLinkage(Linkage); 3029 } 3030 return; 3031 } 3032 Entry.setVarSize(VarSize); 3033 Entry.setLinkage(Linkage); 3034 Entry.setAddress(Addr); 3035 } else { 3036 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3037 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3038 assert(Entry.isValid() && Entry.getFlags() == Flags && 3039 "Entry not initialized!"); 3040 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3041 "Resetting with the new address."); 3042 if (Entry.getVarSize().isZero()) { 3043 Entry.setVarSize(VarSize); 3044 Entry.setLinkage(Linkage); 3045 } 3046 return; 3047 } 3048 OffloadEntriesDeviceGlobalVar.try_emplace( 3049 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3050 ++OffloadingEntriesNum; 3051 } 3052 } 3053 3054 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3055 actOnDeviceGlobalVarEntriesInfo( 3056 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3057 // Scan all target region entries and perform the provided action. 3058 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3059 Action(E.getKey(), E.getValue()); 3060 } 3061 3062 void CGOpenMPRuntime::createOffloadEntry( 3063 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3064 llvm::GlobalValue::LinkageTypes Linkage) { 3065 StringRef Name = Addr->getName(); 3066 llvm::Module &M = CGM.getModule(); 3067 llvm::LLVMContext &C = M.getContext(); 3068 3069 // Create constant string with the name. 3070 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3071 3072 std::string StringName = getName({"omp_offloading", "entry_name"}); 3073 auto *Str = new llvm::GlobalVariable( 3074 M, StrPtrInit->getType(), /*isConstant=*/true, 3075 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3076 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3077 3078 llvm::Constant *Data[] = { 3079 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3080 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3081 llvm::ConstantInt::get(CGM.SizeTy, Size), 3082 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3083 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3084 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3085 llvm::GlobalVariable *Entry = createGlobalStruct( 3086 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3087 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3088 3089 // The entry has to be created in the section the linker expects it to be. 3090 Entry->setSection("omp_offloading_entries"); 3091 } 3092 3093 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3094 // Emit the offloading entries and metadata so that the device codegen side 3095 // can easily figure out what to emit. The produced metadata looks like 3096 // this: 3097 // 3098 // !omp_offload.info = !{!1, ...} 3099 // 3100 // Right now we only generate metadata for function that contain target 3101 // regions. 3102 3103 // If we are in simd mode or there are no entries, we don't need to do 3104 // anything. 3105 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3106 return; 3107 3108 llvm::Module &M = CGM.getModule(); 3109 llvm::LLVMContext &C = M.getContext(); 3110 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3111 SourceLocation, StringRef>, 3112 16> 3113 OrderedEntries(OffloadEntriesInfoManager.size()); 3114 llvm::SmallVector<StringRef, 16> ParentFunctions( 3115 OffloadEntriesInfoManager.size()); 3116 3117 // Auxiliary methods to create metadata values and strings. 3118 auto &&GetMDInt = [this](unsigned V) { 3119 return llvm::ConstantAsMetadata::get( 3120 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3121 }; 3122 3123 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3124 3125 // Create the offloading info metadata node. 3126 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3127 3128 // Create function that emits metadata for each target region entry; 3129 auto &&TargetRegionMetadataEmitter = 3130 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3131 &GetMDString]( 3132 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3133 unsigned Line, 3134 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3135 // Generate metadata for target regions. Each entry of this metadata 3136 // contains: 3137 // - Entry 0 -> Kind of this type of metadata (0). 3138 // - Entry 1 -> Device ID of the file where the entry was identified. 3139 // - Entry 2 -> File ID of the file where the entry was identified. 3140 // - Entry 3 -> Mangled name of the function where the entry was 3141 // identified. 3142 // - Entry 4 -> Line in the file where the entry was identified. 3143 // - Entry 5 -> Order the entry was created. 3144 // The first element of the metadata node is the kind. 3145 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3146 GetMDInt(FileID), GetMDString(ParentName), 3147 GetMDInt(Line), GetMDInt(E.getOrder())}; 3148 3149 SourceLocation Loc; 3150 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3151 E = CGM.getContext().getSourceManager().fileinfo_end(); 3152 I != E; ++I) { 3153 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3154 I->getFirst()->getUniqueID().getFile() == FileID) { 3155 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3156 I->getFirst(), Line, 1); 3157 break; 3158 } 3159 } 3160 // Save this entry in the right position of the ordered entries array. 3161 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3162 ParentFunctions[E.getOrder()] = ParentName; 3163 3164 // Add metadata to the named metadata node. 3165 MD->addOperand(llvm::MDNode::get(C, Ops)); 3166 }; 3167 3168 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3169 TargetRegionMetadataEmitter); 3170 3171 // Create function that emits metadata for each device global variable entry; 3172 auto &&DeviceGlobalVarMetadataEmitter = 3173 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3174 MD](StringRef MangledName, 3175 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3176 &E) { 3177 // Generate metadata for global variables. Each entry of this metadata 3178 // contains: 3179 // - Entry 0 -> Kind of this type of metadata (1). 3180 // - Entry 1 -> Mangled name of the variable. 3181 // - Entry 2 -> Declare target kind. 3182 // - Entry 3 -> Order the entry was created. 3183 // The first element of the metadata node is the kind. 3184 llvm::Metadata *Ops[] = { 3185 GetMDInt(E.getKind()), GetMDString(MangledName), 3186 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3187 3188 // Save this entry in the right position of the ordered entries array. 3189 OrderedEntries[E.getOrder()] = 3190 std::make_tuple(&E, SourceLocation(), MangledName); 3191 3192 // Add metadata to the named metadata node. 3193 MD->addOperand(llvm::MDNode::get(C, Ops)); 3194 }; 3195 3196 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3197 DeviceGlobalVarMetadataEmitter); 3198 3199 for (const auto &E : OrderedEntries) { 3200 assert(std::get<0>(E) && "All ordered entries must exist!"); 3201 if (const auto *CE = 3202 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3203 std::get<0>(E))) { 3204 if (!CE->getID() || !CE->getAddress()) { 3205 // Do not blame the entry if the parent funtion is not emitted. 3206 StringRef FnName = ParentFunctions[CE->getOrder()]; 3207 if (!CGM.GetGlobalValue(FnName)) 3208 continue; 3209 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3210 DiagnosticsEngine::Error, 3211 "Offloading entry for target region in %0 is incorrect: either the " 3212 "address or the ID is invalid."); 3213 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3214 continue; 3215 } 3216 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3217 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3218 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3219 OffloadEntryInfoDeviceGlobalVar>( 3220 std::get<0>(E))) { 3221 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3222 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3223 CE->getFlags()); 3224 switch (Flags) { 3225 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3226 if (CGM.getLangOpts().OpenMPIsDevice && 3227 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3228 continue; 3229 if (!CE->getAddress()) { 3230 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3231 DiagnosticsEngine::Error, "Offloading entry for declare target " 3232 "variable %0 is incorrect: the " 3233 "address is invalid."); 3234 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3235 continue; 3236 } 3237 // The vaiable has no definition - no need to add the entry. 3238 if (CE->getVarSize().isZero()) 3239 continue; 3240 break; 3241 } 3242 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3243 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3244 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3245 "Declaret target link address is set."); 3246 if (CGM.getLangOpts().OpenMPIsDevice) 3247 continue; 3248 if (!CE->getAddress()) { 3249 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3250 DiagnosticsEngine::Error, 3251 "Offloading entry for declare target variable is incorrect: the " 3252 "address is invalid."); 3253 CGM.getDiags().Report(DiagID); 3254 continue; 3255 } 3256 break; 3257 } 3258 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3259 CE->getVarSize().getQuantity(), Flags, 3260 CE->getLinkage()); 3261 } else { 3262 llvm_unreachable("Unsupported entry kind."); 3263 } 3264 } 3265 } 3266 3267 /// Loads all the offload entries information from the host IR 3268 /// metadata. 3269 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3270 // If we are in target mode, load the metadata from the host IR. This code has 3271 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3272 3273 if (!CGM.getLangOpts().OpenMPIsDevice) 3274 return; 3275 3276 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3277 return; 3278 3279 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3280 if (auto EC = Buf.getError()) { 3281 CGM.getDiags().Report(diag::err_cannot_open_file) 3282 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3283 return; 3284 } 3285 3286 llvm::LLVMContext C; 3287 auto ME = expectedToErrorOrAndEmitErrors( 3288 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3289 3290 if (auto EC = ME.getError()) { 3291 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3292 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3293 CGM.getDiags().Report(DiagID) 3294 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3295 return; 3296 } 3297 3298 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3299 if (!MD) 3300 return; 3301 3302 for (llvm::MDNode *MN : MD->operands()) { 3303 auto &&GetMDInt = [MN](unsigned Idx) { 3304 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3305 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3306 }; 3307 3308 auto &&GetMDString = [MN](unsigned Idx) { 3309 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3310 return V->getString(); 3311 }; 3312 3313 switch (GetMDInt(0)) { 3314 default: 3315 llvm_unreachable("Unexpected metadata!"); 3316 break; 3317 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3318 OffloadingEntryInfoTargetRegion: 3319 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3320 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3321 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3322 /*Order=*/GetMDInt(5)); 3323 break; 3324 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3325 OffloadingEntryInfoDeviceGlobalVar: 3326 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3327 /*MangledName=*/GetMDString(1), 3328 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3329 /*Flags=*/GetMDInt(2)), 3330 /*Order=*/GetMDInt(3)); 3331 break; 3332 } 3333 } 3334 } 3335 3336 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3337 if (!KmpRoutineEntryPtrTy) { 3338 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3339 ASTContext &C = CGM.getContext(); 3340 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3341 FunctionProtoType::ExtProtoInfo EPI; 3342 KmpRoutineEntryPtrQTy = C.getPointerType( 3343 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3344 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3345 } 3346 } 3347 3348 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3349 // Make sure the type of the entry is already created. This is the type we 3350 // have to create: 3351 // struct __tgt_offload_entry{ 3352 // void *addr; // Pointer to the offload entry info. 3353 // // (function or global) 3354 // char *name; // Name of the function or global. 3355 // size_t size; // Size of the entry info (0 if it a function). 3356 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3357 // int32_t reserved; // Reserved, to use by the runtime library. 3358 // }; 3359 if (TgtOffloadEntryQTy.isNull()) { 3360 ASTContext &C = CGM.getContext(); 3361 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3362 RD->startDefinition(); 3363 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3364 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3365 addFieldToRecordDecl(C, RD, C.getSizeType()); 3366 addFieldToRecordDecl( 3367 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3368 addFieldToRecordDecl( 3369 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3370 RD->completeDefinition(); 3371 RD->addAttr(PackedAttr::CreateImplicit(C)); 3372 TgtOffloadEntryQTy = C.getRecordType(RD); 3373 } 3374 return TgtOffloadEntryQTy; 3375 } 3376 3377 namespace { 3378 struct PrivateHelpersTy { 3379 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3380 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3381 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3382 PrivateElemInit(PrivateElemInit) {} 3383 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3384 const Expr *OriginalRef = nullptr; 3385 const VarDecl *Original = nullptr; 3386 const VarDecl *PrivateCopy = nullptr; 3387 const VarDecl *PrivateElemInit = nullptr; 3388 bool isLocalPrivate() const { 3389 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3390 } 3391 }; 3392 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3393 } // anonymous namespace 3394 3395 static bool isAllocatableDecl(const VarDecl *VD) { 3396 const VarDecl *CVD = VD->getCanonicalDecl(); 3397 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3398 return false; 3399 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3400 // Use the default allocation. 3401 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3402 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3403 !AA->getAllocator()); 3404 } 3405 3406 static RecordDecl * 3407 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3408 if (!Privates.empty()) { 3409 ASTContext &C = CGM.getContext(); 3410 // Build struct .kmp_privates_t. { 3411 // /* private vars */ 3412 // }; 3413 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3414 RD->startDefinition(); 3415 for (const auto &Pair : Privates) { 3416 const VarDecl *VD = Pair.second.Original; 3417 QualType Type = VD->getType().getNonReferenceType(); 3418 // If the private variable is a local variable with lvalue ref type, 3419 // allocate the pointer instead of the pointee type. 3420 if (Pair.second.isLocalPrivate()) { 3421 if (VD->getType()->isLValueReferenceType()) 3422 Type = C.getPointerType(Type); 3423 if (isAllocatableDecl(VD)) 3424 Type = C.getPointerType(Type); 3425 } 3426 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3427 if (VD->hasAttrs()) { 3428 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3429 E(VD->getAttrs().end()); 3430 I != E; ++I) 3431 FD->addAttr(*I); 3432 } 3433 } 3434 RD->completeDefinition(); 3435 return RD; 3436 } 3437 return nullptr; 3438 } 3439 3440 static RecordDecl * 3441 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3442 QualType KmpInt32Ty, 3443 QualType KmpRoutineEntryPointerQTy) { 3444 ASTContext &C = CGM.getContext(); 3445 // Build struct kmp_task_t { 3446 // void * shareds; 3447 // kmp_routine_entry_t routine; 3448 // kmp_int32 part_id; 3449 // kmp_cmplrdata_t data1; 3450 // kmp_cmplrdata_t data2; 3451 // For taskloops additional fields: 3452 // kmp_uint64 lb; 3453 // kmp_uint64 ub; 3454 // kmp_int64 st; 3455 // kmp_int32 liter; 3456 // void * reductions; 3457 // }; 3458 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3459 UD->startDefinition(); 3460 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3461 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3462 UD->completeDefinition(); 3463 QualType KmpCmplrdataTy = C.getRecordType(UD); 3464 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3465 RD->startDefinition(); 3466 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3467 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3468 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3469 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3470 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3471 if (isOpenMPTaskLoopDirective(Kind)) { 3472 QualType KmpUInt64Ty = 3473 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3474 QualType KmpInt64Ty = 3475 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3476 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3477 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3478 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3479 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3480 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3481 } 3482 RD->completeDefinition(); 3483 return RD; 3484 } 3485 3486 static RecordDecl * 3487 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3488 ArrayRef<PrivateDataTy> Privates) { 3489 ASTContext &C = CGM.getContext(); 3490 // Build struct kmp_task_t_with_privates { 3491 // kmp_task_t task_data; 3492 // .kmp_privates_t. privates; 3493 // }; 3494 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3495 RD->startDefinition(); 3496 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3497 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3498 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3499 RD->completeDefinition(); 3500 return RD; 3501 } 3502 3503 /// Emit a proxy function which accepts kmp_task_t as the second 3504 /// argument. 3505 /// \code 3506 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3507 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3508 /// For taskloops: 3509 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3510 /// tt->reductions, tt->shareds); 3511 /// return 0; 3512 /// } 3513 /// \endcode 3514 static llvm::Function * 3515 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3516 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3517 QualType KmpTaskTWithPrivatesPtrQTy, 3518 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3519 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3520 llvm::Value *TaskPrivatesMap) { 3521 ASTContext &C = CGM.getContext(); 3522 FunctionArgList Args; 3523 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3524 ImplicitParamDecl::Other); 3525 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3526 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3527 ImplicitParamDecl::Other); 3528 Args.push_back(&GtidArg); 3529 Args.push_back(&TaskTypeArg); 3530 const auto &TaskEntryFnInfo = 3531 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3532 llvm::FunctionType *TaskEntryTy = 3533 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3534 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3535 auto *TaskEntry = llvm::Function::Create( 3536 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3537 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3538 TaskEntry->setDoesNotRecurse(); 3539 CodeGenFunction CGF(CGM); 3540 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3541 Loc, Loc); 3542 3543 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3544 // tt, 3545 // For taskloops: 3546 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3547 // tt->task_data.shareds); 3548 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3549 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3550 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3551 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3552 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3553 const auto *KmpTaskTWithPrivatesQTyRD = 3554 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3555 LValue Base = 3556 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3557 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3558 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3559 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3560 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3561 3562 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3563 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3564 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3565 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3566 CGF.ConvertTypeForMem(SharedsPtrTy)); 3567 3568 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3569 llvm::Value *PrivatesParam; 3570 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3571 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3572 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3573 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3574 } else { 3575 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3576 } 3577 3578 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3579 TaskPrivatesMap, 3580 CGF.Builder 3581 .CreatePointerBitCastOrAddrSpaceCast( 3582 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3583 .getPointer()}; 3584 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3585 std::end(CommonArgs)); 3586 if (isOpenMPTaskLoopDirective(Kind)) { 3587 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3588 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3589 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3590 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3591 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3592 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3593 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3594 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3595 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3596 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3597 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3598 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3599 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3600 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3601 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3602 CallArgs.push_back(LBParam); 3603 CallArgs.push_back(UBParam); 3604 CallArgs.push_back(StParam); 3605 CallArgs.push_back(LIParam); 3606 CallArgs.push_back(RParam); 3607 } 3608 CallArgs.push_back(SharedsParam); 3609 3610 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3611 CallArgs); 3612 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3613 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3614 CGF.FinishFunction(); 3615 return TaskEntry; 3616 } 3617 3618 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3619 SourceLocation Loc, 3620 QualType KmpInt32Ty, 3621 QualType KmpTaskTWithPrivatesPtrQTy, 3622 QualType KmpTaskTWithPrivatesQTy) { 3623 ASTContext &C = CGM.getContext(); 3624 FunctionArgList Args; 3625 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3626 ImplicitParamDecl::Other); 3627 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3628 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3629 ImplicitParamDecl::Other); 3630 Args.push_back(&GtidArg); 3631 Args.push_back(&TaskTypeArg); 3632 const auto &DestructorFnInfo = 3633 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3634 llvm::FunctionType *DestructorFnTy = 3635 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3636 std::string Name = 3637 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3638 auto *DestructorFn = 3639 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3640 Name, &CGM.getModule()); 3641 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3642 DestructorFnInfo); 3643 DestructorFn->setDoesNotRecurse(); 3644 CodeGenFunction CGF(CGM); 3645 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3646 Args, Loc, Loc); 3647 3648 LValue Base = CGF.EmitLoadOfPointerLValue( 3649 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3650 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3651 const auto *KmpTaskTWithPrivatesQTyRD = 3652 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3653 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3654 Base = CGF.EmitLValueForField(Base, *FI); 3655 for (const auto *Field : 3656 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3657 if (QualType::DestructionKind DtorKind = 3658 Field->getType().isDestructedType()) { 3659 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3660 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3661 } 3662 } 3663 CGF.FinishFunction(); 3664 return DestructorFn; 3665 } 3666 3667 /// Emit a privates mapping function for correct handling of private and 3668 /// firstprivate variables. 3669 /// \code 3670 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3671 /// **noalias priv1,..., <tyn> **noalias privn) { 3672 /// *priv1 = &.privates.priv1; 3673 /// ...; 3674 /// *privn = &.privates.privn; 3675 /// } 3676 /// \endcode 3677 static llvm::Value * 3678 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3679 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3680 ArrayRef<PrivateDataTy> Privates) { 3681 ASTContext &C = CGM.getContext(); 3682 FunctionArgList Args; 3683 ImplicitParamDecl TaskPrivatesArg( 3684 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3685 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3686 ImplicitParamDecl::Other); 3687 Args.push_back(&TaskPrivatesArg); 3688 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3689 unsigned Counter = 1; 3690 for (const Expr *E : Data.PrivateVars) { 3691 Args.push_back(ImplicitParamDecl::Create( 3692 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3693 C.getPointerType(C.getPointerType(E->getType())) 3694 .withConst() 3695 .withRestrict(), 3696 ImplicitParamDecl::Other)); 3697 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3698 PrivateVarsPos[VD] = Counter; 3699 ++Counter; 3700 } 3701 for (const Expr *E : Data.FirstprivateVars) { 3702 Args.push_back(ImplicitParamDecl::Create( 3703 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3704 C.getPointerType(C.getPointerType(E->getType())) 3705 .withConst() 3706 .withRestrict(), 3707 ImplicitParamDecl::Other)); 3708 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3709 PrivateVarsPos[VD] = Counter; 3710 ++Counter; 3711 } 3712 for (const Expr *E : Data.LastprivateVars) { 3713 Args.push_back(ImplicitParamDecl::Create( 3714 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3715 C.getPointerType(C.getPointerType(E->getType())) 3716 .withConst() 3717 .withRestrict(), 3718 ImplicitParamDecl::Other)); 3719 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3720 PrivateVarsPos[VD] = Counter; 3721 ++Counter; 3722 } 3723 for (const VarDecl *VD : Data.PrivateLocals) { 3724 QualType Ty = VD->getType().getNonReferenceType(); 3725 if (VD->getType()->isLValueReferenceType()) 3726 Ty = C.getPointerType(Ty); 3727 if (isAllocatableDecl(VD)) 3728 Ty = C.getPointerType(Ty); 3729 Args.push_back(ImplicitParamDecl::Create( 3730 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3731 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3732 ImplicitParamDecl::Other)); 3733 PrivateVarsPos[VD] = Counter; 3734 ++Counter; 3735 } 3736 const auto &TaskPrivatesMapFnInfo = 3737 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3738 llvm::FunctionType *TaskPrivatesMapTy = 3739 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3740 std::string Name = 3741 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3742 auto *TaskPrivatesMap = llvm::Function::Create( 3743 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3744 &CGM.getModule()); 3745 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3746 TaskPrivatesMapFnInfo); 3747 if (CGM.getLangOpts().Optimize) { 3748 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3749 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3750 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3751 } 3752 CodeGenFunction CGF(CGM); 3753 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3754 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3755 3756 // *privi = &.privates.privi; 3757 LValue Base = CGF.EmitLoadOfPointerLValue( 3758 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3759 TaskPrivatesArg.getType()->castAs<PointerType>()); 3760 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3761 Counter = 0; 3762 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3763 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3764 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3765 LValue RefLVal = 3766 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3767 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3768 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3769 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3770 ++Counter; 3771 } 3772 CGF.FinishFunction(); 3773 return TaskPrivatesMap; 3774 } 3775 3776 /// Emit initialization for private variables in task-based directives. 3777 static void emitPrivatesInit(CodeGenFunction &CGF, 3778 const OMPExecutableDirective &D, 3779 Address KmpTaskSharedsPtr, LValue TDBase, 3780 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3781 QualType SharedsTy, QualType SharedsPtrTy, 3782 const OMPTaskDataTy &Data, 3783 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3784 ASTContext &C = CGF.getContext(); 3785 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3786 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3787 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3788 ? OMPD_taskloop 3789 : OMPD_task; 3790 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3791 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3792 LValue SrcBase; 3793 bool IsTargetTask = 3794 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3795 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3796 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3797 // PointersArray, SizesArray, and MappersArray. The original variables for 3798 // these arrays are not captured and we get their addresses explicitly. 3799 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3800 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3801 SrcBase = CGF.MakeAddrLValue( 3802 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3803 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3804 SharedsTy); 3805 } 3806 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3807 for (const PrivateDataTy &Pair : Privates) { 3808 // Do not initialize private locals. 3809 if (Pair.second.isLocalPrivate()) { 3810 ++FI; 3811 continue; 3812 } 3813 const VarDecl *VD = Pair.second.PrivateCopy; 3814 const Expr *Init = VD->getAnyInitializer(); 3815 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3816 !CGF.isTrivialInitializer(Init)))) { 3817 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3818 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3819 const VarDecl *OriginalVD = Pair.second.Original; 3820 // Check if the variable is the target-based BasePointersArray, 3821 // PointersArray, SizesArray, or MappersArray. 3822 LValue SharedRefLValue; 3823 QualType Type = PrivateLValue.getType(); 3824 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3825 if (IsTargetTask && !SharedField) { 3826 assert(isa<ImplicitParamDecl>(OriginalVD) && 3827 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3828 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3829 ->getNumParams() == 0 && 3830 isa<TranslationUnitDecl>( 3831 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3832 ->getDeclContext()) && 3833 "Expected artificial target data variable."); 3834 SharedRefLValue = 3835 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3836 } else if (ForDup) { 3837 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3838 SharedRefLValue = CGF.MakeAddrLValue( 3839 Address(SharedRefLValue.getPointer(CGF), 3840 C.getDeclAlign(OriginalVD)), 3841 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3842 SharedRefLValue.getTBAAInfo()); 3843 } else if (CGF.LambdaCaptureFields.count( 3844 Pair.second.Original->getCanonicalDecl()) > 0 || 3845 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3846 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3847 } else { 3848 // Processing for implicitly captured variables. 3849 InlinedOpenMPRegionRAII Region( 3850 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3851 /*HasCancel=*/false); 3852 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3853 } 3854 if (Type->isArrayType()) { 3855 // Initialize firstprivate array. 3856 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3857 // Perform simple memcpy. 3858 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3859 } else { 3860 // Initialize firstprivate array using element-by-element 3861 // initialization. 3862 CGF.EmitOMPAggregateAssign( 3863 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3864 Type, 3865 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3866 Address SrcElement) { 3867 // Clean up any temporaries needed by the initialization. 3868 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3869 InitScope.addPrivate( 3870 Elem, [SrcElement]() -> Address { return SrcElement; }); 3871 (void)InitScope.Privatize(); 3872 // Emit initialization for single element. 3873 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3874 CGF, &CapturesInfo); 3875 CGF.EmitAnyExprToMem(Init, DestElement, 3876 Init->getType().getQualifiers(), 3877 /*IsInitializer=*/false); 3878 }); 3879 } 3880 } else { 3881 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3882 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3883 return SharedRefLValue.getAddress(CGF); 3884 }); 3885 (void)InitScope.Privatize(); 3886 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3887 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3888 /*capturedByInit=*/false); 3889 } 3890 } else { 3891 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3892 } 3893 } 3894 ++FI; 3895 } 3896 } 3897 3898 /// Check if duplication function is required for taskloops. 3899 static bool checkInitIsRequired(CodeGenFunction &CGF, 3900 ArrayRef<PrivateDataTy> Privates) { 3901 bool InitRequired = false; 3902 for (const PrivateDataTy &Pair : Privates) { 3903 if (Pair.second.isLocalPrivate()) 3904 continue; 3905 const VarDecl *VD = Pair.second.PrivateCopy; 3906 const Expr *Init = VD->getAnyInitializer(); 3907 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3908 !CGF.isTrivialInitializer(Init)); 3909 if (InitRequired) 3910 break; 3911 } 3912 return InitRequired; 3913 } 3914 3915 3916 /// Emit task_dup function (for initialization of 3917 /// private/firstprivate/lastprivate vars and last_iter flag) 3918 /// \code 3919 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3920 /// lastpriv) { 3921 /// // setup lastprivate flag 3922 /// task_dst->last = lastpriv; 3923 /// // could be constructor calls here... 3924 /// } 3925 /// \endcode 3926 static llvm::Value * 3927 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3928 const OMPExecutableDirective &D, 3929 QualType KmpTaskTWithPrivatesPtrQTy, 3930 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3931 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3932 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3933 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3934 ASTContext &C = CGM.getContext(); 3935 FunctionArgList Args; 3936 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3937 KmpTaskTWithPrivatesPtrQTy, 3938 ImplicitParamDecl::Other); 3939 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3940 KmpTaskTWithPrivatesPtrQTy, 3941 ImplicitParamDecl::Other); 3942 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3943 ImplicitParamDecl::Other); 3944 Args.push_back(&DstArg); 3945 Args.push_back(&SrcArg); 3946 Args.push_back(&LastprivArg); 3947 const auto &TaskDupFnInfo = 3948 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3949 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3950 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3951 auto *TaskDup = llvm::Function::Create( 3952 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3953 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3954 TaskDup->setDoesNotRecurse(); 3955 CodeGenFunction CGF(CGM); 3956 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3957 Loc); 3958 3959 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3960 CGF.GetAddrOfLocalVar(&DstArg), 3961 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3962 // task_dst->liter = lastpriv; 3963 if (WithLastIter) { 3964 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3965 LValue Base = CGF.EmitLValueForField( 3966 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3967 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3968 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3969 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3970 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3971 } 3972 3973 // Emit initial values for private copies (if any). 3974 assert(!Privates.empty()); 3975 Address KmpTaskSharedsPtr = Address::invalid(); 3976 if (!Data.FirstprivateVars.empty()) { 3977 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3978 CGF.GetAddrOfLocalVar(&SrcArg), 3979 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3980 LValue Base = CGF.EmitLValueForField( 3981 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3982 KmpTaskSharedsPtr = Address( 3983 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3984 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3985 KmpTaskTShareds)), 3986 Loc), 3987 CGM.getNaturalTypeAlignment(SharedsTy)); 3988 } 3989 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3990 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3991 CGF.FinishFunction(); 3992 return TaskDup; 3993 } 3994 3995 /// Checks if destructor function is required to be generated. 3996 /// \return true if cleanups are required, false otherwise. 3997 static bool 3998 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3999 ArrayRef<PrivateDataTy> Privates) { 4000 for (const PrivateDataTy &P : Privates) { 4001 if (P.second.isLocalPrivate()) 4002 continue; 4003 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4004 if (Ty.isDestructedType()) 4005 return true; 4006 } 4007 return false; 4008 } 4009 4010 namespace { 4011 /// Loop generator for OpenMP iterator expression. 4012 class OMPIteratorGeneratorScope final 4013 : public CodeGenFunction::OMPPrivateScope { 4014 CodeGenFunction &CGF; 4015 const OMPIteratorExpr *E = nullptr; 4016 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4017 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4018 OMPIteratorGeneratorScope() = delete; 4019 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4020 4021 public: 4022 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4023 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4024 if (!E) 4025 return; 4026 SmallVector<llvm::Value *, 4> Uppers; 4027 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4028 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4029 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4030 addPrivate(VD, [&CGF, VD]() { 4031 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4032 }); 4033 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4034 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4035 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4036 "counter.addr"); 4037 }); 4038 } 4039 Privatize(); 4040 4041 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4042 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4043 LValue CLVal = 4044 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4045 HelperData.CounterVD->getType()); 4046 // Counter = 0; 4047 CGF.EmitStoreOfScalar( 4048 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4049 CLVal); 4050 CodeGenFunction::JumpDest &ContDest = 4051 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4052 CodeGenFunction::JumpDest &ExitDest = 4053 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4054 // N = <number-of_iterations>; 4055 llvm::Value *N = Uppers[I]; 4056 // cont: 4057 // if (Counter < N) goto body; else goto exit; 4058 CGF.EmitBlock(ContDest.getBlock()); 4059 auto *CVal = 4060 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4061 llvm::Value *Cmp = 4062 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4063 ? CGF.Builder.CreateICmpSLT(CVal, N) 4064 : CGF.Builder.CreateICmpULT(CVal, N); 4065 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4066 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4067 // body: 4068 CGF.EmitBlock(BodyBB); 4069 // Iteri = Begini + Counter * Stepi; 4070 CGF.EmitIgnoredExpr(HelperData.Update); 4071 } 4072 } 4073 ~OMPIteratorGeneratorScope() { 4074 if (!E) 4075 return; 4076 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4077 // Counter = Counter + 1; 4078 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4079 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4080 // goto cont; 4081 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4082 // exit: 4083 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4084 } 4085 } 4086 }; 4087 } // namespace 4088 4089 static std::pair<llvm::Value *, llvm::Value *> 4090 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4091 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4092 llvm::Value *Addr; 4093 if (OASE) { 4094 const Expr *Base = OASE->getBase(); 4095 Addr = CGF.EmitScalarExpr(Base); 4096 } else { 4097 Addr = CGF.EmitLValue(E).getPointer(CGF); 4098 } 4099 llvm::Value *SizeVal; 4100 QualType Ty = E->getType(); 4101 if (OASE) { 4102 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4103 for (const Expr *SE : OASE->getDimensions()) { 4104 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4105 Sz = CGF.EmitScalarConversion( 4106 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4107 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4108 } 4109 } else if (const auto *ASE = 4110 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4111 LValue UpAddrLVal = 4112 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4113 llvm::Value *UpAddr = 4114 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4115 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4116 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4117 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4118 } else { 4119 SizeVal = CGF.getTypeSize(Ty); 4120 } 4121 return std::make_pair(Addr, SizeVal); 4122 } 4123 4124 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4125 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4126 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4127 if (KmpTaskAffinityInfoTy.isNull()) { 4128 RecordDecl *KmpAffinityInfoRD = 4129 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4130 KmpAffinityInfoRD->startDefinition(); 4131 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4132 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4133 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4134 KmpAffinityInfoRD->completeDefinition(); 4135 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4136 } 4137 } 4138 4139 CGOpenMPRuntime::TaskResultTy 4140 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4141 const OMPExecutableDirective &D, 4142 llvm::Function *TaskFunction, QualType SharedsTy, 4143 Address Shareds, const OMPTaskDataTy &Data) { 4144 ASTContext &C = CGM.getContext(); 4145 llvm::SmallVector<PrivateDataTy, 4> Privates; 4146 // Aggregate privates and sort them by the alignment. 4147 const auto *I = Data.PrivateCopies.begin(); 4148 for (const Expr *E : Data.PrivateVars) { 4149 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4150 Privates.emplace_back( 4151 C.getDeclAlign(VD), 4152 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4153 /*PrivateElemInit=*/nullptr)); 4154 ++I; 4155 } 4156 I = Data.FirstprivateCopies.begin(); 4157 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4158 for (const Expr *E : Data.FirstprivateVars) { 4159 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4160 Privates.emplace_back( 4161 C.getDeclAlign(VD), 4162 PrivateHelpersTy( 4163 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4164 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4165 ++I; 4166 ++IElemInitRef; 4167 } 4168 I = Data.LastprivateCopies.begin(); 4169 for (const Expr *E : Data.LastprivateVars) { 4170 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4171 Privates.emplace_back( 4172 C.getDeclAlign(VD), 4173 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4174 /*PrivateElemInit=*/nullptr)); 4175 ++I; 4176 } 4177 for (const VarDecl *VD : Data.PrivateLocals) { 4178 if (isAllocatableDecl(VD)) 4179 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4180 else 4181 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4182 } 4183 llvm::stable_sort(Privates, 4184 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4185 return L.first > R.first; 4186 }); 4187 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4188 // Build type kmp_routine_entry_t (if not built yet). 4189 emitKmpRoutineEntryT(KmpInt32Ty); 4190 // Build type kmp_task_t (if not built yet). 4191 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4192 if (SavedKmpTaskloopTQTy.isNull()) { 4193 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4194 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4195 } 4196 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4197 } else { 4198 assert((D.getDirectiveKind() == OMPD_task || 4199 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4200 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4201 "Expected taskloop, task or target directive"); 4202 if (SavedKmpTaskTQTy.isNull()) { 4203 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4204 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4205 } 4206 KmpTaskTQTy = SavedKmpTaskTQTy; 4207 } 4208 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4209 // Build particular struct kmp_task_t for the given task. 4210 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4211 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4212 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4213 QualType KmpTaskTWithPrivatesPtrQTy = 4214 C.getPointerType(KmpTaskTWithPrivatesQTy); 4215 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4216 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4217 KmpTaskTWithPrivatesTy->getPointerTo(); 4218 llvm::Value *KmpTaskTWithPrivatesTySize = 4219 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4220 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4221 4222 // Emit initial values for private copies (if any). 4223 llvm::Value *TaskPrivatesMap = nullptr; 4224 llvm::Type *TaskPrivatesMapTy = 4225 std::next(TaskFunction->arg_begin(), 3)->getType(); 4226 if (!Privates.empty()) { 4227 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4228 TaskPrivatesMap = 4229 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4230 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4231 TaskPrivatesMap, TaskPrivatesMapTy); 4232 } else { 4233 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4234 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4235 } 4236 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4237 // kmp_task_t *tt); 4238 llvm::Function *TaskEntry = emitProxyTaskFunction( 4239 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4240 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4241 TaskPrivatesMap); 4242 4243 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4244 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4245 // kmp_routine_entry_t *task_entry); 4246 // Task flags. Format is taken from 4247 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4248 // description of kmp_tasking_flags struct. 4249 enum { 4250 TiedFlag = 0x1, 4251 FinalFlag = 0x2, 4252 DestructorsFlag = 0x8, 4253 PriorityFlag = 0x20, 4254 DetachableFlag = 0x40, 4255 }; 4256 unsigned Flags = Data.Tied ? TiedFlag : 0; 4257 bool NeedsCleanup = false; 4258 if (!Privates.empty()) { 4259 NeedsCleanup = 4260 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4261 if (NeedsCleanup) 4262 Flags = Flags | DestructorsFlag; 4263 } 4264 if (Data.Priority.getInt()) 4265 Flags = Flags | PriorityFlag; 4266 if (D.hasClausesOfKind<OMPDetachClause>()) 4267 Flags = Flags | DetachableFlag; 4268 llvm::Value *TaskFlags = 4269 Data.Final.getPointer() 4270 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4271 CGF.Builder.getInt32(FinalFlag), 4272 CGF.Builder.getInt32(/*C=*/0)) 4273 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4274 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4275 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4276 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4277 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4278 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4279 TaskEntry, KmpRoutineEntryPtrTy)}; 4280 llvm::Value *NewTask; 4281 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4282 // Check if we have any device clause associated with the directive. 4283 const Expr *Device = nullptr; 4284 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4285 Device = C->getDevice(); 4286 // Emit device ID if any otherwise use default value. 4287 llvm::Value *DeviceID; 4288 if (Device) 4289 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4290 CGF.Int64Ty, /*isSigned=*/true); 4291 else 4292 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4293 AllocArgs.push_back(DeviceID); 4294 NewTask = CGF.EmitRuntimeCall( 4295 OMPBuilder.getOrCreateRuntimeFunction( 4296 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4297 AllocArgs); 4298 } else { 4299 NewTask = 4300 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4301 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4302 AllocArgs); 4303 } 4304 // Emit detach clause initialization. 4305 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4306 // task_descriptor); 4307 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4308 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4309 LValue EvtLVal = CGF.EmitLValue(Evt); 4310 4311 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4312 // int gtid, kmp_task_t *task); 4313 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4314 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4315 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4316 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4317 OMPBuilder.getOrCreateRuntimeFunction( 4318 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4319 {Loc, Tid, NewTask}); 4320 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4321 Evt->getExprLoc()); 4322 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4323 } 4324 // Process affinity clauses. 4325 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4326 // Process list of affinity data. 4327 ASTContext &C = CGM.getContext(); 4328 Address AffinitiesArray = Address::invalid(); 4329 // Calculate number of elements to form the array of affinity data. 4330 llvm::Value *NumOfElements = nullptr; 4331 unsigned NumAffinities = 0; 4332 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4333 if (const Expr *Modifier = C->getModifier()) { 4334 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4335 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4336 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4337 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4338 NumOfElements = 4339 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4340 } 4341 } else { 4342 NumAffinities += C->varlist_size(); 4343 } 4344 } 4345 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4346 // Fields ids in kmp_task_affinity_info record. 4347 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4348 4349 QualType KmpTaskAffinityInfoArrayTy; 4350 if (NumOfElements) { 4351 NumOfElements = CGF.Builder.CreateNUWAdd( 4352 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4353 OpaqueValueExpr OVE( 4354 Loc, 4355 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4356 VK_RValue); 4357 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4358 RValue::get(NumOfElements)); 4359 KmpTaskAffinityInfoArrayTy = 4360 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4361 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4362 // Properly emit variable-sized array. 4363 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4364 ImplicitParamDecl::Other); 4365 CGF.EmitVarDecl(*PD); 4366 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4367 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4368 /*isSigned=*/false); 4369 } else { 4370 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4371 KmpTaskAffinityInfoTy, 4372 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4373 ArrayType::Normal, /*IndexTypeQuals=*/0); 4374 AffinitiesArray = 4375 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4376 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4377 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4378 /*isSigned=*/false); 4379 } 4380 4381 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4382 // Fill array by elements without iterators. 4383 unsigned Pos = 0; 4384 bool HasIterator = false; 4385 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4386 if (C->getModifier()) { 4387 HasIterator = true; 4388 continue; 4389 } 4390 for (const Expr *E : C->varlists()) { 4391 llvm::Value *Addr; 4392 llvm::Value *Size; 4393 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4394 LValue Base = 4395 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4396 KmpTaskAffinityInfoTy); 4397 // affs[i].base_addr = &<Affinities[i].second>; 4398 LValue BaseAddrLVal = CGF.EmitLValueForField( 4399 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4400 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4401 BaseAddrLVal); 4402 // affs[i].len = sizeof(<Affinities[i].second>); 4403 LValue LenLVal = CGF.EmitLValueForField( 4404 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4405 CGF.EmitStoreOfScalar(Size, LenLVal); 4406 ++Pos; 4407 } 4408 } 4409 LValue PosLVal; 4410 if (HasIterator) { 4411 PosLVal = CGF.MakeAddrLValue( 4412 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4413 C.getSizeType()); 4414 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4415 } 4416 // Process elements with iterators. 4417 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4418 const Expr *Modifier = C->getModifier(); 4419 if (!Modifier) 4420 continue; 4421 OMPIteratorGeneratorScope IteratorScope( 4422 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4423 for (const Expr *E : C->varlists()) { 4424 llvm::Value *Addr; 4425 llvm::Value *Size; 4426 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4427 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4428 LValue Base = CGF.MakeAddrLValue( 4429 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4430 AffinitiesArray.getAlignment()), 4431 KmpTaskAffinityInfoTy); 4432 // affs[i].base_addr = &<Affinities[i].second>; 4433 LValue BaseAddrLVal = CGF.EmitLValueForField( 4434 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4435 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4436 BaseAddrLVal); 4437 // affs[i].len = sizeof(<Affinities[i].second>); 4438 LValue LenLVal = CGF.EmitLValueForField( 4439 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4440 CGF.EmitStoreOfScalar(Size, LenLVal); 4441 Idx = CGF.Builder.CreateNUWAdd( 4442 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4443 CGF.EmitStoreOfScalar(Idx, PosLVal); 4444 } 4445 } 4446 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4447 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4448 // naffins, kmp_task_affinity_info_t *affin_list); 4449 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4450 llvm::Value *GTid = getThreadID(CGF, Loc); 4451 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4452 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4453 // FIXME: Emit the function and ignore its result for now unless the 4454 // runtime function is properly implemented. 4455 (void)CGF.EmitRuntimeCall( 4456 OMPBuilder.getOrCreateRuntimeFunction( 4457 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4458 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4459 } 4460 llvm::Value *NewTaskNewTaskTTy = 4461 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4462 NewTask, KmpTaskTWithPrivatesPtrTy); 4463 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4464 KmpTaskTWithPrivatesQTy); 4465 LValue TDBase = 4466 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4467 // Fill the data in the resulting kmp_task_t record. 4468 // Copy shareds if there are any. 4469 Address KmpTaskSharedsPtr = Address::invalid(); 4470 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4471 KmpTaskSharedsPtr = 4472 Address(CGF.EmitLoadOfScalar( 4473 CGF.EmitLValueForField( 4474 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4475 KmpTaskTShareds)), 4476 Loc), 4477 CGM.getNaturalTypeAlignment(SharedsTy)); 4478 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4479 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4480 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4481 } 4482 // Emit initial values for private copies (if any). 4483 TaskResultTy Result; 4484 if (!Privates.empty()) { 4485 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4486 SharedsTy, SharedsPtrTy, Data, Privates, 4487 /*ForDup=*/false); 4488 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4489 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4490 Result.TaskDupFn = emitTaskDupFunction( 4491 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4492 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4493 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4494 } 4495 } 4496 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4497 enum { Priority = 0, Destructors = 1 }; 4498 // Provide pointer to function with destructors for privates. 4499 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4500 const RecordDecl *KmpCmplrdataUD = 4501 (*FI)->getType()->getAsUnionType()->getDecl(); 4502 if (NeedsCleanup) { 4503 llvm::Value *DestructorFn = emitDestructorsFunction( 4504 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4505 KmpTaskTWithPrivatesQTy); 4506 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4507 LValue DestructorsLV = CGF.EmitLValueForField( 4508 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4509 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4510 DestructorFn, KmpRoutineEntryPtrTy), 4511 DestructorsLV); 4512 } 4513 // Set priority. 4514 if (Data.Priority.getInt()) { 4515 LValue Data2LV = CGF.EmitLValueForField( 4516 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4517 LValue PriorityLV = CGF.EmitLValueForField( 4518 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4519 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4520 } 4521 Result.NewTask = NewTask; 4522 Result.TaskEntry = TaskEntry; 4523 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4524 Result.TDBase = TDBase; 4525 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4526 return Result; 4527 } 4528 4529 namespace { 4530 /// Dependence kind for RTL. 4531 enum RTLDependenceKindTy { 4532 DepIn = 0x01, 4533 DepInOut = 0x3, 4534 DepMutexInOutSet = 0x4 4535 }; 4536 /// Fields ids in kmp_depend_info record. 4537 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4538 } // namespace 4539 4540 /// Translates internal dependency kind into the runtime kind. 4541 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4542 RTLDependenceKindTy DepKind; 4543 switch (K) { 4544 case OMPC_DEPEND_in: 4545 DepKind = DepIn; 4546 break; 4547 // Out and InOut dependencies must use the same code. 4548 case OMPC_DEPEND_out: 4549 case OMPC_DEPEND_inout: 4550 DepKind = DepInOut; 4551 break; 4552 case OMPC_DEPEND_mutexinoutset: 4553 DepKind = DepMutexInOutSet; 4554 break; 4555 case OMPC_DEPEND_source: 4556 case OMPC_DEPEND_sink: 4557 case OMPC_DEPEND_depobj: 4558 case OMPC_DEPEND_unknown: 4559 llvm_unreachable("Unknown task dependence type"); 4560 } 4561 return DepKind; 4562 } 4563 4564 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4565 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4566 QualType &FlagsTy) { 4567 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4568 if (KmpDependInfoTy.isNull()) { 4569 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4570 KmpDependInfoRD->startDefinition(); 4571 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4572 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4573 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4574 KmpDependInfoRD->completeDefinition(); 4575 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4576 } 4577 } 4578 4579 std::pair<llvm::Value *, LValue> 4580 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4581 SourceLocation Loc) { 4582 ASTContext &C = CGM.getContext(); 4583 QualType FlagsTy; 4584 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4585 RecordDecl *KmpDependInfoRD = 4586 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4587 LValue Base = CGF.EmitLoadOfPointerLValue( 4588 DepobjLVal.getAddress(CGF), 4589 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4590 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4591 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4592 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4593 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4594 Base.getTBAAInfo()); 4595 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4596 Addr.getPointer(), 4597 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4598 LValue NumDepsBase = CGF.MakeAddrLValue( 4599 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4600 Base.getBaseInfo(), Base.getTBAAInfo()); 4601 // NumDeps = deps[i].base_addr; 4602 LValue BaseAddrLVal = CGF.EmitLValueForField( 4603 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4604 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4605 return std::make_pair(NumDeps, Base); 4606 } 4607 4608 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4609 llvm::PointerUnion<unsigned *, LValue *> Pos, 4610 const OMPTaskDataTy::DependData &Data, 4611 Address DependenciesArray) { 4612 CodeGenModule &CGM = CGF.CGM; 4613 ASTContext &C = CGM.getContext(); 4614 QualType FlagsTy; 4615 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4616 RecordDecl *KmpDependInfoRD = 4617 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4618 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4619 4620 OMPIteratorGeneratorScope IteratorScope( 4621 CGF, cast_or_null<OMPIteratorExpr>( 4622 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4623 : nullptr)); 4624 for (const Expr *E : Data.DepExprs) { 4625 llvm::Value *Addr; 4626 llvm::Value *Size; 4627 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4628 LValue Base; 4629 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4630 Base = CGF.MakeAddrLValue( 4631 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4632 } else { 4633 LValue &PosLVal = *Pos.get<LValue *>(); 4634 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4635 Base = CGF.MakeAddrLValue( 4636 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4637 DependenciesArray.getAlignment()), 4638 KmpDependInfoTy); 4639 } 4640 // deps[i].base_addr = &<Dependencies[i].second>; 4641 LValue BaseAddrLVal = CGF.EmitLValueForField( 4642 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4643 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4644 BaseAddrLVal); 4645 // deps[i].len = sizeof(<Dependencies[i].second>); 4646 LValue LenLVal = CGF.EmitLValueForField( 4647 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4648 CGF.EmitStoreOfScalar(Size, LenLVal); 4649 // deps[i].flags = <Dependencies[i].first>; 4650 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4651 LValue FlagsLVal = CGF.EmitLValueForField( 4652 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4653 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4654 FlagsLVal); 4655 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4656 ++(*P); 4657 } else { 4658 LValue &PosLVal = *Pos.get<LValue *>(); 4659 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4660 Idx = CGF.Builder.CreateNUWAdd(Idx, 4661 llvm::ConstantInt::get(Idx->getType(), 1)); 4662 CGF.EmitStoreOfScalar(Idx, PosLVal); 4663 } 4664 } 4665 } 4666 4667 static SmallVector<llvm::Value *, 4> 4668 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4669 const OMPTaskDataTy::DependData &Data) { 4670 assert(Data.DepKind == OMPC_DEPEND_depobj && 4671 "Expected depobj dependecy kind."); 4672 SmallVector<llvm::Value *, 4> Sizes; 4673 SmallVector<LValue, 4> SizeLVals; 4674 ASTContext &C = CGF.getContext(); 4675 QualType FlagsTy; 4676 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4677 RecordDecl *KmpDependInfoRD = 4678 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4679 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4680 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4681 { 4682 OMPIteratorGeneratorScope IteratorScope( 4683 CGF, cast_or_null<OMPIteratorExpr>( 4684 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4685 : nullptr)); 4686 for (const Expr *E : Data.DepExprs) { 4687 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4688 LValue Base = CGF.EmitLoadOfPointerLValue( 4689 DepobjLVal.getAddress(CGF), 4690 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4691 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4692 Base.getAddress(CGF), KmpDependInfoPtrT); 4693 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4694 Base.getTBAAInfo()); 4695 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4696 Addr.getPointer(), 4697 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4698 LValue NumDepsBase = CGF.MakeAddrLValue( 4699 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4700 Base.getBaseInfo(), Base.getTBAAInfo()); 4701 // NumDeps = deps[i].base_addr; 4702 LValue BaseAddrLVal = CGF.EmitLValueForField( 4703 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4704 llvm::Value *NumDeps = 4705 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4706 LValue NumLVal = CGF.MakeAddrLValue( 4707 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4708 C.getUIntPtrType()); 4709 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4710 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4711 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4712 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4713 CGF.EmitStoreOfScalar(Add, NumLVal); 4714 SizeLVals.push_back(NumLVal); 4715 } 4716 } 4717 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4718 llvm::Value *Size = 4719 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4720 Sizes.push_back(Size); 4721 } 4722 return Sizes; 4723 } 4724 4725 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4726 LValue PosLVal, 4727 const OMPTaskDataTy::DependData &Data, 4728 Address DependenciesArray) { 4729 assert(Data.DepKind == OMPC_DEPEND_depobj && 4730 "Expected depobj dependecy kind."); 4731 ASTContext &C = CGF.getContext(); 4732 QualType FlagsTy; 4733 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4734 RecordDecl *KmpDependInfoRD = 4735 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4736 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4737 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4738 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4739 { 4740 OMPIteratorGeneratorScope IteratorScope( 4741 CGF, cast_or_null<OMPIteratorExpr>( 4742 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4743 : nullptr)); 4744 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4745 const Expr *E = Data.DepExprs[I]; 4746 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4747 LValue Base = CGF.EmitLoadOfPointerLValue( 4748 DepobjLVal.getAddress(CGF), 4749 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4750 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4751 Base.getAddress(CGF), KmpDependInfoPtrT); 4752 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4753 Base.getTBAAInfo()); 4754 4755 // Get number of elements in a single depobj. 4756 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4757 Addr.getPointer(), 4758 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4759 LValue NumDepsBase = CGF.MakeAddrLValue( 4760 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4761 Base.getBaseInfo(), Base.getTBAAInfo()); 4762 // NumDeps = deps[i].base_addr; 4763 LValue BaseAddrLVal = CGF.EmitLValueForField( 4764 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4765 llvm::Value *NumDeps = 4766 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4767 4768 // memcopy dependency data. 4769 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4770 ElSize, 4771 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4772 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4773 Address DepAddr = 4774 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4775 DependenciesArray.getAlignment()); 4776 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4777 4778 // Increase pos. 4779 // pos += size; 4780 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4781 CGF.EmitStoreOfScalar(Add, PosLVal); 4782 } 4783 } 4784 } 4785 4786 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4787 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4788 SourceLocation Loc) { 4789 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4790 return D.DepExprs.empty(); 4791 })) 4792 return std::make_pair(nullptr, Address::invalid()); 4793 // Process list of dependencies. 4794 ASTContext &C = CGM.getContext(); 4795 Address DependenciesArray = Address::invalid(); 4796 llvm::Value *NumOfElements = nullptr; 4797 unsigned NumDependencies = std::accumulate( 4798 Dependencies.begin(), Dependencies.end(), 0, 4799 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4800 return D.DepKind == OMPC_DEPEND_depobj 4801 ? V 4802 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4803 }); 4804 QualType FlagsTy; 4805 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4806 bool HasDepobjDeps = false; 4807 bool HasRegularWithIterators = false; 4808 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4809 llvm::Value *NumOfRegularWithIterators = 4810 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4811 // Calculate number of depobj dependecies and regular deps with the iterators. 4812 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4813 if (D.DepKind == OMPC_DEPEND_depobj) { 4814 SmallVector<llvm::Value *, 4> Sizes = 4815 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4816 for (llvm::Value *Size : Sizes) { 4817 NumOfDepobjElements = 4818 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4819 } 4820 HasDepobjDeps = true; 4821 continue; 4822 } 4823 // Include number of iterations, if any. 4824 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4825 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4826 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4827 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4828 NumOfRegularWithIterators = 4829 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4830 } 4831 HasRegularWithIterators = true; 4832 continue; 4833 } 4834 } 4835 4836 QualType KmpDependInfoArrayTy; 4837 if (HasDepobjDeps || HasRegularWithIterators) { 4838 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4839 /*isSigned=*/false); 4840 if (HasDepobjDeps) { 4841 NumOfElements = 4842 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4843 } 4844 if (HasRegularWithIterators) { 4845 NumOfElements = 4846 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4847 } 4848 OpaqueValueExpr OVE(Loc, 4849 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4850 VK_RValue); 4851 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4852 RValue::get(NumOfElements)); 4853 KmpDependInfoArrayTy = 4854 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4855 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4856 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4857 // Properly emit variable-sized array. 4858 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4859 ImplicitParamDecl::Other); 4860 CGF.EmitVarDecl(*PD); 4861 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4862 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4863 /*isSigned=*/false); 4864 } else { 4865 KmpDependInfoArrayTy = C.getConstantArrayType( 4866 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4867 ArrayType::Normal, /*IndexTypeQuals=*/0); 4868 DependenciesArray = 4869 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4870 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4871 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4872 /*isSigned=*/false); 4873 } 4874 unsigned Pos = 0; 4875 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4876 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4877 Dependencies[I].IteratorExpr) 4878 continue; 4879 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4880 DependenciesArray); 4881 } 4882 // Copy regular dependecies with iterators. 4883 LValue PosLVal = CGF.MakeAddrLValue( 4884 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4885 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4886 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4887 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4888 !Dependencies[I].IteratorExpr) 4889 continue; 4890 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4891 DependenciesArray); 4892 } 4893 // Copy final depobj arrays without iterators. 4894 if (HasDepobjDeps) { 4895 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4896 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4897 continue; 4898 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4899 DependenciesArray); 4900 } 4901 } 4902 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4903 DependenciesArray, CGF.VoidPtrTy); 4904 return std::make_pair(NumOfElements, DependenciesArray); 4905 } 4906 4907 Address CGOpenMPRuntime::emitDepobjDependClause( 4908 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4909 SourceLocation Loc) { 4910 if (Dependencies.DepExprs.empty()) 4911 return Address::invalid(); 4912 // Process list of dependencies. 4913 ASTContext &C = CGM.getContext(); 4914 Address DependenciesArray = Address::invalid(); 4915 unsigned NumDependencies = Dependencies.DepExprs.size(); 4916 QualType FlagsTy; 4917 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4918 RecordDecl *KmpDependInfoRD = 4919 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4920 4921 llvm::Value *Size; 4922 // Define type kmp_depend_info[<Dependencies.size()>]; 4923 // For depobj reserve one extra element to store the number of elements. 4924 // It is required to handle depobj(x) update(in) construct. 4925 // kmp_depend_info[<Dependencies.size()>] deps; 4926 llvm::Value *NumDepsVal; 4927 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4928 if (const auto *IE = 4929 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4930 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4931 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4932 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4933 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4934 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4935 } 4936 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4937 NumDepsVal); 4938 CharUnits SizeInBytes = 4939 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4940 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4941 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4942 NumDepsVal = 4943 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4944 } else { 4945 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4946 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4947 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4948 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4949 Size = CGM.getSize(Sz.alignTo(Align)); 4950 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4951 } 4952 // Need to allocate on the dynamic memory. 4953 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4954 // Use default allocator. 4955 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4956 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4957 4958 llvm::Value *Addr = 4959 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4960 CGM.getModule(), OMPRTL___kmpc_alloc), 4961 Args, ".dep.arr.addr"); 4962 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4963 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4964 DependenciesArray = Address(Addr, Align); 4965 // Write number of elements in the first element of array for depobj. 4966 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4967 // deps[i].base_addr = NumDependencies; 4968 LValue BaseAddrLVal = CGF.EmitLValueForField( 4969 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4970 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4971 llvm::PointerUnion<unsigned *, LValue *> Pos; 4972 unsigned Idx = 1; 4973 LValue PosLVal; 4974 if (Dependencies.IteratorExpr) { 4975 PosLVal = CGF.MakeAddrLValue( 4976 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4977 C.getSizeType()); 4978 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4979 /*IsInit=*/true); 4980 Pos = &PosLVal; 4981 } else { 4982 Pos = &Idx; 4983 } 4984 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4985 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4986 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4987 return DependenciesArray; 4988 } 4989 4990 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4991 SourceLocation Loc) { 4992 ASTContext &C = CGM.getContext(); 4993 QualType FlagsTy; 4994 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4995 LValue Base = CGF.EmitLoadOfPointerLValue( 4996 DepobjLVal.getAddress(CGF), 4997 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4998 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4999 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5000 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5001 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5002 Addr.getPointer(), 5003 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5004 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5005 CGF.VoidPtrTy); 5006 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5007 // Use default allocator. 5008 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5009 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5010 5011 // _kmpc_free(gtid, addr, nullptr); 5012 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5013 CGM.getModule(), OMPRTL___kmpc_free), 5014 Args); 5015 } 5016 5017 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5018 OpenMPDependClauseKind NewDepKind, 5019 SourceLocation Loc) { 5020 ASTContext &C = CGM.getContext(); 5021 QualType FlagsTy; 5022 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5023 RecordDecl *KmpDependInfoRD = 5024 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5025 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5026 llvm::Value *NumDeps; 5027 LValue Base; 5028 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5029 5030 Address Begin = Base.getAddress(CGF); 5031 // Cast from pointer to array type to pointer to single element. 5032 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5033 // The basic structure here is a while-do loop. 5034 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5035 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5036 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5037 CGF.EmitBlock(BodyBB); 5038 llvm::PHINode *ElementPHI = 5039 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5040 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5041 Begin = Address(ElementPHI, Begin.getAlignment()); 5042 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5043 Base.getTBAAInfo()); 5044 // deps[i].flags = NewDepKind; 5045 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5046 LValue FlagsLVal = CGF.EmitLValueForField( 5047 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5048 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5049 FlagsLVal); 5050 5051 // Shift the address forward by one element. 5052 Address ElementNext = 5053 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5054 ElementPHI->addIncoming(ElementNext.getPointer(), 5055 CGF.Builder.GetInsertBlock()); 5056 llvm::Value *IsEmpty = 5057 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5058 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5059 // Done. 5060 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5061 } 5062 5063 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5064 const OMPExecutableDirective &D, 5065 llvm::Function *TaskFunction, 5066 QualType SharedsTy, Address Shareds, 5067 const Expr *IfCond, 5068 const OMPTaskDataTy &Data) { 5069 if (!CGF.HaveInsertPoint()) 5070 return; 5071 5072 TaskResultTy Result = 5073 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5074 llvm::Value *NewTask = Result.NewTask; 5075 llvm::Function *TaskEntry = Result.TaskEntry; 5076 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5077 LValue TDBase = Result.TDBase; 5078 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5079 // Process list of dependences. 5080 Address DependenciesArray = Address::invalid(); 5081 llvm::Value *NumOfElements; 5082 std::tie(NumOfElements, DependenciesArray) = 5083 emitDependClause(CGF, Data.Dependences, Loc); 5084 5085 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5086 // libcall. 5087 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5088 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5089 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5090 // list is not empty 5091 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5092 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5093 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5094 llvm::Value *DepTaskArgs[7]; 5095 if (!Data.Dependences.empty()) { 5096 DepTaskArgs[0] = UpLoc; 5097 DepTaskArgs[1] = ThreadID; 5098 DepTaskArgs[2] = NewTask; 5099 DepTaskArgs[3] = NumOfElements; 5100 DepTaskArgs[4] = DependenciesArray.getPointer(); 5101 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5102 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5103 } 5104 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5105 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5106 if (!Data.Tied) { 5107 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5108 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5109 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5110 } 5111 if (!Data.Dependences.empty()) { 5112 CGF.EmitRuntimeCall( 5113 OMPBuilder.getOrCreateRuntimeFunction( 5114 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5115 DepTaskArgs); 5116 } else { 5117 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5118 CGM.getModule(), OMPRTL___kmpc_omp_task), 5119 TaskArgs); 5120 } 5121 // Check if parent region is untied and build return for untied task; 5122 if (auto *Region = 5123 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5124 Region->emitUntiedSwitch(CGF); 5125 }; 5126 5127 llvm::Value *DepWaitTaskArgs[6]; 5128 if (!Data.Dependences.empty()) { 5129 DepWaitTaskArgs[0] = UpLoc; 5130 DepWaitTaskArgs[1] = ThreadID; 5131 DepWaitTaskArgs[2] = NumOfElements; 5132 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5133 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5134 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5135 } 5136 auto &M = CGM.getModule(); 5137 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5138 TaskEntry, &Data, &DepWaitTaskArgs, 5139 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5140 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5141 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5142 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5143 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5144 // is specified. 5145 if (!Data.Dependences.empty()) 5146 CGF.EmitRuntimeCall( 5147 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5148 DepWaitTaskArgs); 5149 // Call proxy_task_entry(gtid, new_task); 5150 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5151 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5152 Action.Enter(CGF); 5153 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5154 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5155 OutlinedFnArgs); 5156 }; 5157 5158 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5159 // kmp_task_t *new_task); 5160 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5161 // kmp_task_t *new_task); 5162 RegionCodeGenTy RCG(CodeGen); 5163 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5164 M, OMPRTL___kmpc_omp_task_begin_if0), 5165 TaskArgs, 5166 OMPBuilder.getOrCreateRuntimeFunction( 5167 M, OMPRTL___kmpc_omp_task_complete_if0), 5168 TaskArgs); 5169 RCG.setAction(Action); 5170 RCG(CGF); 5171 }; 5172 5173 if (IfCond) { 5174 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5175 } else { 5176 RegionCodeGenTy ThenRCG(ThenCodeGen); 5177 ThenRCG(CGF); 5178 } 5179 } 5180 5181 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5182 const OMPLoopDirective &D, 5183 llvm::Function *TaskFunction, 5184 QualType SharedsTy, Address Shareds, 5185 const Expr *IfCond, 5186 const OMPTaskDataTy &Data) { 5187 if (!CGF.HaveInsertPoint()) 5188 return; 5189 TaskResultTy Result = 5190 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5191 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5192 // libcall. 5193 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5194 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5195 // sched, kmp_uint64 grainsize, void *task_dup); 5196 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5197 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5198 llvm::Value *IfVal; 5199 if (IfCond) { 5200 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5201 /*isSigned=*/true); 5202 } else { 5203 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5204 } 5205 5206 LValue LBLVal = CGF.EmitLValueForField( 5207 Result.TDBase, 5208 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5209 const auto *LBVar = 5210 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5211 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5212 LBLVal.getQuals(), 5213 /*IsInitializer=*/true); 5214 LValue UBLVal = CGF.EmitLValueForField( 5215 Result.TDBase, 5216 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5217 const auto *UBVar = 5218 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5219 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5220 UBLVal.getQuals(), 5221 /*IsInitializer=*/true); 5222 LValue StLVal = CGF.EmitLValueForField( 5223 Result.TDBase, 5224 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5225 const auto *StVar = 5226 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5227 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5228 StLVal.getQuals(), 5229 /*IsInitializer=*/true); 5230 // Store reductions address. 5231 LValue RedLVal = CGF.EmitLValueForField( 5232 Result.TDBase, 5233 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5234 if (Data.Reductions) { 5235 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5236 } else { 5237 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5238 CGF.getContext().VoidPtrTy); 5239 } 5240 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5241 llvm::Value *TaskArgs[] = { 5242 UpLoc, 5243 ThreadID, 5244 Result.NewTask, 5245 IfVal, 5246 LBLVal.getPointer(CGF), 5247 UBLVal.getPointer(CGF), 5248 CGF.EmitLoadOfScalar(StLVal, Loc), 5249 llvm::ConstantInt::getSigned( 5250 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5251 llvm::ConstantInt::getSigned( 5252 CGF.IntTy, Data.Schedule.getPointer() 5253 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5254 : NoSchedule), 5255 Data.Schedule.getPointer() 5256 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5257 /*isSigned=*/false) 5258 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5259 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5260 Result.TaskDupFn, CGF.VoidPtrTy) 5261 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5262 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5263 CGM.getModule(), OMPRTL___kmpc_taskloop), 5264 TaskArgs); 5265 } 5266 5267 /// Emit reduction operation for each element of array (required for 5268 /// array sections) LHS op = RHS. 5269 /// \param Type Type of array. 5270 /// \param LHSVar Variable on the left side of the reduction operation 5271 /// (references element of array in original variable). 5272 /// \param RHSVar Variable on the right side of the reduction operation 5273 /// (references element of array in original variable). 5274 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5275 /// RHSVar. 5276 static void EmitOMPAggregateReduction( 5277 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5278 const VarDecl *RHSVar, 5279 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5280 const Expr *, const Expr *)> &RedOpGen, 5281 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5282 const Expr *UpExpr = nullptr) { 5283 // Perform element-by-element initialization. 5284 QualType ElementTy; 5285 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5286 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5287 5288 // Drill down to the base element type on both arrays. 5289 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5290 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5291 5292 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5293 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5294 // Cast from pointer to array type to pointer to single element. 5295 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5296 // The basic structure here is a while-do loop. 5297 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5298 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5299 llvm::Value *IsEmpty = 5300 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5301 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5302 5303 // Enter the loop body, making that address the current address. 5304 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5305 CGF.EmitBlock(BodyBB); 5306 5307 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5308 5309 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5310 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5311 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5312 Address RHSElementCurrent = 5313 Address(RHSElementPHI, 5314 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5315 5316 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5317 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5318 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5319 Address LHSElementCurrent = 5320 Address(LHSElementPHI, 5321 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5322 5323 // Emit copy. 5324 CodeGenFunction::OMPPrivateScope Scope(CGF); 5325 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5326 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5327 Scope.Privatize(); 5328 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5329 Scope.ForceCleanup(); 5330 5331 // Shift the address forward by one element. 5332 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5333 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5334 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5335 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5336 // Check whether we've reached the end. 5337 llvm::Value *Done = 5338 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5339 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5340 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5341 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5342 5343 // Done. 5344 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5345 } 5346 5347 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5348 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5349 /// UDR combiner function. 5350 static void emitReductionCombiner(CodeGenFunction &CGF, 5351 const Expr *ReductionOp) { 5352 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5353 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5354 if (const auto *DRE = 5355 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5356 if (const auto *DRD = 5357 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5358 std::pair<llvm::Function *, llvm::Function *> Reduction = 5359 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5360 RValue Func = RValue::get(Reduction.first); 5361 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5362 CGF.EmitIgnoredExpr(ReductionOp); 5363 return; 5364 } 5365 CGF.EmitIgnoredExpr(ReductionOp); 5366 } 5367 5368 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5369 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5370 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5371 ArrayRef<const Expr *> ReductionOps) { 5372 ASTContext &C = CGM.getContext(); 5373 5374 // void reduction_func(void *LHSArg, void *RHSArg); 5375 FunctionArgList Args; 5376 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5377 ImplicitParamDecl::Other); 5378 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5379 ImplicitParamDecl::Other); 5380 Args.push_back(&LHSArg); 5381 Args.push_back(&RHSArg); 5382 const auto &CGFI = 5383 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5384 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5385 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5386 llvm::GlobalValue::InternalLinkage, Name, 5387 &CGM.getModule()); 5388 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5389 Fn->setDoesNotRecurse(); 5390 CodeGenFunction CGF(CGM); 5391 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5392 5393 // Dst = (void*[n])(LHSArg); 5394 // Src = (void*[n])(RHSArg); 5395 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5396 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5397 ArgsType), CGF.getPointerAlign()); 5398 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5399 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5400 ArgsType), CGF.getPointerAlign()); 5401 5402 // ... 5403 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5404 // ... 5405 CodeGenFunction::OMPPrivateScope Scope(CGF); 5406 auto IPriv = Privates.begin(); 5407 unsigned Idx = 0; 5408 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5409 const auto *RHSVar = 5410 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5411 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5412 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5413 }); 5414 const auto *LHSVar = 5415 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5416 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5417 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5418 }); 5419 QualType PrivTy = (*IPriv)->getType(); 5420 if (PrivTy->isVariablyModifiedType()) { 5421 // Get array size and emit VLA type. 5422 ++Idx; 5423 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5424 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5425 const VariableArrayType *VLA = 5426 CGF.getContext().getAsVariableArrayType(PrivTy); 5427 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5428 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5429 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5430 CGF.EmitVariablyModifiedType(PrivTy); 5431 } 5432 } 5433 Scope.Privatize(); 5434 IPriv = Privates.begin(); 5435 auto ILHS = LHSExprs.begin(); 5436 auto IRHS = RHSExprs.begin(); 5437 for (const Expr *E : ReductionOps) { 5438 if ((*IPriv)->getType()->isArrayType()) { 5439 // Emit reduction for array section. 5440 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5441 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5442 EmitOMPAggregateReduction( 5443 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5444 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5445 emitReductionCombiner(CGF, E); 5446 }); 5447 } else { 5448 // Emit reduction for array subscript or single variable. 5449 emitReductionCombiner(CGF, E); 5450 } 5451 ++IPriv; 5452 ++ILHS; 5453 ++IRHS; 5454 } 5455 Scope.ForceCleanup(); 5456 CGF.FinishFunction(); 5457 return Fn; 5458 } 5459 5460 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5461 const Expr *ReductionOp, 5462 const Expr *PrivateRef, 5463 const DeclRefExpr *LHS, 5464 const DeclRefExpr *RHS) { 5465 if (PrivateRef->getType()->isArrayType()) { 5466 // Emit reduction for array section. 5467 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5468 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5469 EmitOMPAggregateReduction( 5470 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5471 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5472 emitReductionCombiner(CGF, ReductionOp); 5473 }); 5474 } else { 5475 // Emit reduction for array subscript or single variable. 5476 emitReductionCombiner(CGF, ReductionOp); 5477 } 5478 } 5479 5480 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5481 ArrayRef<const Expr *> Privates, 5482 ArrayRef<const Expr *> LHSExprs, 5483 ArrayRef<const Expr *> RHSExprs, 5484 ArrayRef<const Expr *> ReductionOps, 5485 ReductionOptionsTy Options) { 5486 if (!CGF.HaveInsertPoint()) 5487 return; 5488 5489 bool WithNowait = Options.WithNowait; 5490 bool SimpleReduction = Options.SimpleReduction; 5491 5492 // Next code should be emitted for reduction: 5493 // 5494 // static kmp_critical_name lock = { 0 }; 5495 // 5496 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5497 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5498 // ... 5499 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5500 // *(Type<n>-1*)rhs[<n>-1]); 5501 // } 5502 // 5503 // ... 5504 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5505 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5506 // RedList, reduce_func, &<lock>)) { 5507 // case 1: 5508 // ... 5509 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5510 // ... 5511 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5512 // break; 5513 // case 2: 5514 // ... 5515 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5516 // ... 5517 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5518 // break; 5519 // default:; 5520 // } 5521 // 5522 // if SimpleReduction is true, only the next code is generated: 5523 // ... 5524 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5525 // ... 5526 5527 ASTContext &C = CGM.getContext(); 5528 5529 if (SimpleReduction) { 5530 CodeGenFunction::RunCleanupsScope Scope(CGF); 5531 auto IPriv = Privates.begin(); 5532 auto ILHS = LHSExprs.begin(); 5533 auto IRHS = RHSExprs.begin(); 5534 for (const Expr *E : ReductionOps) { 5535 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5536 cast<DeclRefExpr>(*IRHS)); 5537 ++IPriv; 5538 ++ILHS; 5539 ++IRHS; 5540 } 5541 return; 5542 } 5543 5544 // 1. Build a list of reduction variables. 5545 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5546 auto Size = RHSExprs.size(); 5547 for (const Expr *E : Privates) { 5548 if (E->getType()->isVariablyModifiedType()) 5549 // Reserve place for array size. 5550 ++Size; 5551 } 5552 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5553 QualType ReductionArrayTy = 5554 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5555 /*IndexTypeQuals=*/0); 5556 Address ReductionList = 5557 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5558 auto IPriv = Privates.begin(); 5559 unsigned Idx = 0; 5560 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5561 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5562 CGF.Builder.CreateStore( 5563 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5564 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5565 Elem); 5566 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5567 // Store array size. 5568 ++Idx; 5569 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5570 llvm::Value *Size = CGF.Builder.CreateIntCast( 5571 CGF.getVLASize( 5572 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5573 .NumElts, 5574 CGF.SizeTy, /*isSigned=*/false); 5575 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5576 Elem); 5577 } 5578 } 5579 5580 // 2. Emit reduce_func(). 5581 llvm::Function *ReductionFn = emitReductionFunction( 5582 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5583 LHSExprs, RHSExprs, ReductionOps); 5584 5585 // 3. Create static kmp_critical_name lock = { 0 }; 5586 std::string Name = getName({"reduction"}); 5587 llvm::Value *Lock = getCriticalRegionLock(Name); 5588 5589 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5590 // RedList, reduce_func, &<lock>); 5591 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5592 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5593 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5594 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5595 ReductionList.getPointer(), CGF.VoidPtrTy); 5596 llvm::Value *Args[] = { 5597 IdentTLoc, // ident_t *<loc> 5598 ThreadId, // i32 <gtid> 5599 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5600 ReductionArrayTySize, // size_type sizeof(RedList) 5601 RL, // void *RedList 5602 ReductionFn, // void (*) (void *, void *) <reduce_func> 5603 Lock // kmp_critical_name *&<lock> 5604 }; 5605 llvm::Value *Res = CGF.EmitRuntimeCall( 5606 OMPBuilder.getOrCreateRuntimeFunction( 5607 CGM.getModule(), 5608 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5609 Args); 5610 5611 // 5. Build switch(res) 5612 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5613 llvm::SwitchInst *SwInst = 5614 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5615 5616 // 6. Build case 1: 5617 // ... 5618 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5619 // ... 5620 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5621 // break; 5622 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5623 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5624 CGF.EmitBlock(Case1BB); 5625 5626 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5627 llvm::Value *EndArgs[] = { 5628 IdentTLoc, // ident_t *<loc> 5629 ThreadId, // i32 <gtid> 5630 Lock // kmp_critical_name *&<lock> 5631 }; 5632 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5633 CodeGenFunction &CGF, PrePostActionTy &Action) { 5634 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5635 auto IPriv = Privates.begin(); 5636 auto ILHS = LHSExprs.begin(); 5637 auto IRHS = RHSExprs.begin(); 5638 for (const Expr *E : ReductionOps) { 5639 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5640 cast<DeclRefExpr>(*IRHS)); 5641 ++IPriv; 5642 ++ILHS; 5643 ++IRHS; 5644 } 5645 }; 5646 RegionCodeGenTy RCG(CodeGen); 5647 CommonActionTy Action( 5648 nullptr, llvm::None, 5649 OMPBuilder.getOrCreateRuntimeFunction( 5650 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5651 : OMPRTL___kmpc_end_reduce), 5652 EndArgs); 5653 RCG.setAction(Action); 5654 RCG(CGF); 5655 5656 CGF.EmitBranch(DefaultBB); 5657 5658 // 7. Build case 2: 5659 // ... 5660 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5661 // ... 5662 // break; 5663 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5664 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5665 CGF.EmitBlock(Case2BB); 5666 5667 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5668 CodeGenFunction &CGF, PrePostActionTy &Action) { 5669 auto ILHS = LHSExprs.begin(); 5670 auto IRHS = RHSExprs.begin(); 5671 auto IPriv = Privates.begin(); 5672 for (const Expr *E : ReductionOps) { 5673 const Expr *XExpr = nullptr; 5674 const Expr *EExpr = nullptr; 5675 const Expr *UpExpr = nullptr; 5676 BinaryOperatorKind BO = BO_Comma; 5677 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5678 if (BO->getOpcode() == BO_Assign) { 5679 XExpr = BO->getLHS(); 5680 UpExpr = BO->getRHS(); 5681 } 5682 } 5683 // Try to emit update expression as a simple atomic. 5684 const Expr *RHSExpr = UpExpr; 5685 if (RHSExpr) { 5686 // Analyze RHS part of the whole expression. 5687 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5688 RHSExpr->IgnoreParenImpCasts())) { 5689 // If this is a conditional operator, analyze its condition for 5690 // min/max reduction operator. 5691 RHSExpr = ACO->getCond(); 5692 } 5693 if (const auto *BORHS = 5694 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5695 EExpr = BORHS->getRHS(); 5696 BO = BORHS->getOpcode(); 5697 } 5698 } 5699 if (XExpr) { 5700 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5701 auto &&AtomicRedGen = [BO, VD, 5702 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5703 const Expr *EExpr, const Expr *UpExpr) { 5704 LValue X = CGF.EmitLValue(XExpr); 5705 RValue E; 5706 if (EExpr) 5707 E = CGF.EmitAnyExpr(EExpr); 5708 CGF.EmitOMPAtomicSimpleUpdateExpr( 5709 X, E, BO, /*IsXLHSInRHSPart=*/true, 5710 llvm::AtomicOrdering::Monotonic, Loc, 5711 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5712 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5713 PrivateScope.addPrivate( 5714 VD, [&CGF, VD, XRValue, Loc]() { 5715 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5716 CGF.emitOMPSimpleStore( 5717 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5718 VD->getType().getNonReferenceType(), Loc); 5719 return LHSTemp; 5720 }); 5721 (void)PrivateScope.Privatize(); 5722 return CGF.EmitAnyExpr(UpExpr); 5723 }); 5724 }; 5725 if ((*IPriv)->getType()->isArrayType()) { 5726 // Emit atomic reduction for array section. 5727 const auto *RHSVar = 5728 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5729 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5730 AtomicRedGen, XExpr, EExpr, UpExpr); 5731 } else { 5732 // Emit atomic reduction for array subscript or single variable. 5733 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5734 } 5735 } else { 5736 // Emit as a critical region. 5737 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5738 const Expr *, const Expr *) { 5739 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5740 std::string Name = RT.getName({"atomic_reduction"}); 5741 RT.emitCriticalRegion( 5742 CGF, Name, 5743 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5744 Action.Enter(CGF); 5745 emitReductionCombiner(CGF, E); 5746 }, 5747 Loc); 5748 }; 5749 if ((*IPriv)->getType()->isArrayType()) { 5750 const auto *LHSVar = 5751 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5752 const auto *RHSVar = 5753 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5754 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5755 CritRedGen); 5756 } else { 5757 CritRedGen(CGF, nullptr, nullptr, nullptr); 5758 } 5759 } 5760 ++ILHS; 5761 ++IRHS; 5762 ++IPriv; 5763 } 5764 }; 5765 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5766 if (!WithNowait) { 5767 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5768 llvm::Value *EndArgs[] = { 5769 IdentTLoc, // ident_t *<loc> 5770 ThreadId, // i32 <gtid> 5771 Lock // kmp_critical_name *&<lock> 5772 }; 5773 CommonActionTy Action(nullptr, llvm::None, 5774 OMPBuilder.getOrCreateRuntimeFunction( 5775 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5776 EndArgs); 5777 AtomicRCG.setAction(Action); 5778 AtomicRCG(CGF); 5779 } else { 5780 AtomicRCG(CGF); 5781 } 5782 5783 CGF.EmitBranch(DefaultBB); 5784 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5785 } 5786 5787 /// Generates unique name for artificial threadprivate variables. 5788 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5789 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5790 const Expr *Ref) { 5791 SmallString<256> Buffer; 5792 llvm::raw_svector_ostream Out(Buffer); 5793 const clang::DeclRefExpr *DE; 5794 const VarDecl *D = ::getBaseDecl(Ref, DE); 5795 if (!D) 5796 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5797 D = D->getCanonicalDecl(); 5798 std::string Name = CGM.getOpenMPRuntime().getName( 5799 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5800 Out << Prefix << Name << "_" 5801 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5802 return std::string(Out.str()); 5803 } 5804 5805 /// Emits reduction initializer function: 5806 /// \code 5807 /// void @.red_init(void* %arg, void* %orig) { 5808 /// %0 = bitcast void* %arg to <type>* 5809 /// store <type> <init>, <type>* %0 5810 /// ret void 5811 /// } 5812 /// \endcode 5813 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5814 SourceLocation Loc, 5815 ReductionCodeGen &RCG, unsigned N) { 5816 ASTContext &C = CGM.getContext(); 5817 QualType VoidPtrTy = C.VoidPtrTy; 5818 VoidPtrTy.addRestrict(); 5819 FunctionArgList Args; 5820 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5821 ImplicitParamDecl::Other); 5822 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5823 ImplicitParamDecl::Other); 5824 Args.emplace_back(&Param); 5825 Args.emplace_back(&ParamOrig); 5826 const auto &FnInfo = 5827 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5828 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5829 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5830 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5831 Name, &CGM.getModule()); 5832 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5833 Fn->setDoesNotRecurse(); 5834 CodeGenFunction CGF(CGM); 5835 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5836 Address PrivateAddr = CGF.EmitLoadOfPointer( 5837 CGF.GetAddrOfLocalVar(&Param), 5838 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5839 llvm::Value *Size = nullptr; 5840 // If the size of the reduction item is non-constant, load it from global 5841 // threadprivate variable. 5842 if (RCG.getSizes(N).second) { 5843 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5844 CGF, CGM.getContext().getSizeType(), 5845 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5846 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5847 CGM.getContext().getSizeType(), Loc); 5848 } 5849 RCG.emitAggregateType(CGF, N, Size); 5850 LValue OrigLVal; 5851 // If initializer uses initializer from declare reduction construct, emit a 5852 // pointer to the address of the original reduction item (reuired by reduction 5853 // initializer) 5854 if (RCG.usesReductionInitializer(N)) { 5855 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5856 SharedAddr = CGF.EmitLoadOfPointer( 5857 SharedAddr, 5858 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5859 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5860 } else { 5861 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5862 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5863 CGM.getContext().VoidPtrTy); 5864 } 5865 // Emit the initializer: 5866 // %0 = bitcast void* %arg to <type>* 5867 // store <type> <init>, <type>* %0 5868 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5869 [](CodeGenFunction &) { return false; }); 5870 CGF.FinishFunction(); 5871 return Fn; 5872 } 5873 5874 /// Emits reduction combiner function: 5875 /// \code 5876 /// void @.red_comb(void* %arg0, void* %arg1) { 5877 /// %lhs = bitcast void* %arg0 to <type>* 5878 /// %rhs = bitcast void* %arg1 to <type>* 5879 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5880 /// store <type> %2, <type>* %lhs 5881 /// ret void 5882 /// } 5883 /// \endcode 5884 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5885 SourceLocation Loc, 5886 ReductionCodeGen &RCG, unsigned N, 5887 const Expr *ReductionOp, 5888 const Expr *LHS, const Expr *RHS, 5889 const Expr *PrivateRef) { 5890 ASTContext &C = CGM.getContext(); 5891 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5892 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5893 FunctionArgList Args; 5894 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5895 C.VoidPtrTy, ImplicitParamDecl::Other); 5896 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5897 ImplicitParamDecl::Other); 5898 Args.emplace_back(&ParamInOut); 5899 Args.emplace_back(&ParamIn); 5900 const auto &FnInfo = 5901 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5902 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5903 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5904 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5905 Name, &CGM.getModule()); 5906 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5907 Fn->setDoesNotRecurse(); 5908 CodeGenFunction CGF(CGM); 5909 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5910 llvm::Value *Size = nullptr; 5911 // If the size of the reduction item is non-constant, load it from global 5912 // threadprivate variable. 5913 if (RCG.getSizes(N).second) { 5914 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5915 CGF, CGM.getContext().getSizeType(), 5916 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5917 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5918 CGM.getContext().getSizeType(), Loc); 5919 } 5920 RCG.emitAggregateType(CGF, N, Size); 5921 // Remap lhs and rhs variables to the addresses of the function arguments. 5922 // %lhs = bitcast void* %arg0 to <type>* 5923 // %rhs = bitcast void* %arg1 to <type>* 5924 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5925 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5926 // Pull out the pointer to the variable. 5927 Address PtrAddr = CGF.EmitLoadOfPointer( 5928 CGF.GetAddrOfLocalVar(&ParamInOut), 5929 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5930 return CGF.Builder.CreateElementBitCast( 5931 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5932 }); 5933 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5934 // Pull out the pointer to the variable. 5935 Address PtrAddr = CGF.EmitLoadOfPointer( 5936 CGF.GetAddrOfLocalVar(&ParamIn), 5937 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5938 return CGF.Builder.CreateElementBitCast( 5939 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5940 }); 5941 PrivateScope.Privatize(); 5942 // Emit the combiner body: 5943 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5944 // store <type> %2, <type>* %lhs 5945 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5946 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5947 cast<DeclRefExpr>(RHS)); 5948 CGF.FinishFunction(); 5949 return Fn; 5950 } 5951 5952 /// Emits reduction finalizer function: 5953 /// \code 5954 /// void @.red_fini(void* %arg) { 5955 /// %0 = bitcast void* %arg to <type>* 5956 /// <destroy>(<type>* %0) 5957 /// ret void 5958 /// } 5959 /// \endcode 5960 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5961 SourceLocation Loc, 5962 ReductionCodeGen &RCG, unsigned N) { 5963 if (!RCG.needCleanups(N)) 5964 return nullptr; 5965 ASTContext &C = CGM.getContext(); 5966 FunctionArgList Args; 5967 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5968 ImplicitParamDecl::Other); 5969 Args.emplace_back(&Param); 5970 const auto &FnInfo = 5971 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5972 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5973 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5974 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5975 Name, &CGM.getModule()); 5976 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5977 Fn->setDoesNotRecurse(); 5978 CodeGenFunction CGF(CGM); 5979 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5980 Address PrivateAddr = CGF.EmitLoadOfPointer( 5981 CGF.GetAddrOfLocalVar(&Param), 5982 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5983 llvm::Value *Size = nullptr; 5984 // If the size of the reduction item is non-constant, load it from global 5985 // threadprivate variable. 5986 if (RCG.getSizes(N).second) { 5987 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5988 CGF, CGM.getContext().getSizeType(), 5989 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5990 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5991 CGM.getContext().getSizeType(), Loc); 5992 } 5993 RCG.emitAggregateType(CGF, N, Size); 5994 // Emit the finalizer body: 5995 // <destroy>(<type>* %0) 5996 RCG.emitCleanups(CGF, N, PrivateAddr); 5997 CGF.FinishFunction(Loc); 5998 return Fn; 5999 } 6000 6001 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6002 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6003 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6004 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6005 return nullptr; 6006 6007 // Build typedef struct: 6008 // kmp_taskred_input { 6009 // void *reduce_shar; // shared reduction item 6010 // void *reduce_orig; // original reduction item used for initialization 6011 // size_t reduce_size; // size of data item 6012 // void *reduce_init; // data initialization routine 6013 // void *reduce_fini; // data finalization routine 6014 // void *reduce_comb; // data combiner routine 6015 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6016 // } kmp_taskred_input_t; 6017 ASTContext &C = CGM.getContext(); 6018 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6019 RD->startDefinition(); 6020 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6021 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6022 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6023 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6024 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6025 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6026 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6027 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6028 RD->completeDefinition(); 6029 QualType RDType = C.getRecordType(RD); 6030 unsigned Size = Data.ReductionVars.size(); 6031 llvm::APInt ArraySize(/*numBits=*/64, Size); 6032 QualType ArrayRDType = C.getConstantArrayType( 6033 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6034 // kmp_task_red_input_t .rd_input.[Size]; 6035 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6036 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6037 Data.ReductionCopies, Data.ReductionOps); 6038 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6039 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6040 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6041 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6042 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6043 TaskRedInput.getPointer(), Idxs, 6044 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6045 ".rd_input.gep."); 6046 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6047 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6048 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6049 RCG.emitSharedOrigLValue(CGF, Cnt); 6050 llvm::Value *CastedShared = 6051 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6052 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6053 // ElemLVal.reduce_orig = &Origs[Cnt]; 6054 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6055 llvm::Value *CastedOrig = 6056 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6057 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6058 RCG.emitAggregateType(CGF, Cnt); 6059 llvm::Value *SizeValInChars; 6060 llvm::Value *SizeVal; 6061 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6062 // We use delayed creation/initialization for VLAs and array sections. It is 6063 // required because runtime does not provide the way to pass the sizes of 6064 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6065 // threadprivate global variables are used to store these values and use 6066 // them in the functions. 6067 bool DelayedCreation = !!SizeVal; 6068 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6069 /*isSigned=*/false); 6070 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6071 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6072 // ElemLVal.reduce_init = init; 6073 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6074 llvm::Value *InitAddr = 6075 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6076 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6077 // ElemLVal.reduce_fini = fini; 6078 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6079 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6080 llvm::Value *FiniAddr = Fini 6081 ? CGF.EmitCastToVoidPtr(Fini) 6082 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6083 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6084 // ElemLVal.reduce_comb = comb; 6085 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6086 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6087 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6088 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6089 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6090 // ElemLVal.flags = 0; 6091 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6092 if (DelayedCreation) { 6093 CGF.EmitStoreOfScalar( 6094 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6095 FlagsLVal); 6096 } else 6097 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6098 FlagsLVal.getType()); 6099 } 6100 if (Data.IsReductionWithTaskMod) { 6101 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6102 // is_ws, int num, void *data); 6103 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6104 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6105 CGM.IntTy, /*isSigned=*/true); 6106 llvm::Value *Args[] = { 6107 IdentTLoc, GTid, 6108 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6109 /*isSigned=*/true), 6110 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6111 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6112 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6113 return CGF.EmitRuntimeCall( 6114 OMPBuilder.getOrCreateRuntimeFunction( 6115 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6116 Args); 6117 } 6118 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6119 llvm::Value *Args[] = { 6120 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6121 /*isSigned=*/true), 6122 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6123 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6124 CGM.VoidPtrTy)}; 6125 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6126 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6127 Args); 6128 } 6129 6130 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6131 SourceLocation Loc, 6132 bool IsWorksharingReduction) { 6133 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6134 // is_ws, int num, void *data); 6135 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6136 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6137 CGM.IntTy, /*isSigned=*/true); 6138 llvm::Value *Args[] = {IdentTLoc, GTid, 6139 llvm::ConstantInt::get(CGM.IntTy, 6140 IsWorksharingReduction ? 1 : 0, 6141 /*isSigned=*/true)}; 6142 (void)CGF.EmitRuntimeCall( 6143 OMPBuilder.getOrCreateRuntimeFunction( 6144 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6145 Args); 6146 } 6147 6148 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6149 SourceLocation Loc, 6150 ReductionCodeGen &RCG, 6151 unsigned N) { 6152 auto Sizes = RCG.getSizes(N); 6153 // Emit threadprivate global variable if the type is non-constant 6154 // (Sizes.second = nullptr). 6155 if (Sizes.second) { 6156 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6157 /*isSigned=*/false); 6158 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6159 CGF, CGM.getContext().getSizeType(), 6160 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6161 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6162 } 6163 } 6164 6165 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6166 SourceLocation Loc, 6167 llvm::Value *ReductionsPtr, 6168 LValue SharedLVal) { 6169 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6170 // *d); 6171 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6172 CGM.IntTy, 6173 /*isSigned=*/true), 6174 ReductionsPtr, 6175 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6176 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6177 return Address( 6178 CGF.EmitRuntimeCall( 6179 OMPBuilder.getOrCreateRuntimeFunction( 6180 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6181 Args), 6182 SharedLVal.getAlignment()); 6183 } 6184 6185 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6186 SourceLocation Loc) { 6187 if (!CGF.HaveInsertPoint()) 6188 return; 6189 6190 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6191 OMPBuilder.createTaskwait(CGF.Builder); 6192 } else { 6193 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6194 // global_tid); 6195 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6196 // Ignore return result until untied tasks are supported. 6197 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6198 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6199 Args); 6200 } 6201 6202 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6203 Region->emitUntiedSwitch(CGF); 6204 } 6205 6206 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6207 OpenMPDirectiveKind InnerKind, 6208 const RegionCodeGenTy &CodeGen, 6209 bool HasCancel) { 6210 if (!CGF.HaveInsertPoint()) 6211 return; 6212 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6213 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6214 } 6215 6216 namespace { 6217 enum RTCancelKind { 6218 CancelNoreq = 0, 6219 CancelParallel = 1, 6220 CancelLoop = 2, 6221 CancelSections = 3, 6222 CancelTaskgroup = 4 6223 }; 6224 } // anonymous namespace 6225 6226 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6227 RTCancelKind CancelKind = CancelNoreq; 6228 if (CancelRegion == OMPD_parallel) 6229 CancelKind = CancelParallel; 6230 else if (CancelRegion == OMPD_for) 6231 CancelKind = CancelLoop; 6232 else if (CancelRegion == OMPD_sections) 6233 CancelKind = CancelSections; 6234 else { 6235 assert(CancelRegion == OMPD_taskgroup); 6236 CancelKind = CancelTaskgroup; 6237 } 6238 return CancelKind; 6239 } 6240 6241 void CGOpenMPRuntime::emitCancellationPointCall( 6242 CodeGenFunction &CGF, SourceLocation Loc, 6243 OpenMPDirectiveKind CancelRegion) { 6244 if (!CGF.HaveInsertPoint()) 6245 return; 6246 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6247 // global_tid, kmp_int32 cncl_kind); 6248 if (auto *OMPRegionInfo = 6249 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6250 // For 'cancellation point taskgroup', the task region info may not have a 6251 // cancel. This may instead happen in another adjacent task. 6252 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6253 llvm::Value *Args[] = { 6254 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6255 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6256 // Ignore return result until untied tasks are supported. 6257 llvm::Value *Result = CGF.EmitRuntimeCall( 6258 OMPBuilder.getOrCreateRuntimeFunction( 6259 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6260 Args); 6261 // if (__kmpc_cancellationpoint()) { 6262 // exit from construct; 6263 // } 6264 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6265 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6266 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6267 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6268 CGF.EmitBlock(ExitBB); 6269 // exit from construct; 6270 CodeGenFunction::JumpDest CancelDest = 6271 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6272 CGF.EmitBranchThroughCleanup(CancelDest); 6273 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6274 } 6275 } 6276 } 6277 6278 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6279 const Expr *IfCond, 6280 OpenMPDirectiveKind CancelRegion) { 6281 if (!CGF.HaveInsertPoint()) 6282 return; 6283 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6284 // kmp_int32 cncl_kind); 6285 auto &M = CGM.getModule(); 6286 if (auto *OMPRegionInfo = 6287 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6288 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6289 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6290 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6291 llvm::Value *Args[] = { 6292 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6293 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6294 // Ignore return result until untied tasks are supported. 6295 llvm::Value *Result = CGF.EmitRuntimeCall( 6296 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6297 // if (__kmpc_cancel()) { 6298 // exit from construct; 6299 // } 6300 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6301 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6302 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6303 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6304 CGF.EmitBlock(ExitBB); 6305 // exit from construct; 6306 CodeGenFunction::JumpDest CancelDest = 6307 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6308 CGF.EmitBranchThroughCleanup(CancelDest); 6309 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6310 }; 6311 if (IfCond) { 6312 emitIfClause(CGF, IfCond, ThenGen, 6313 [](CodeGenFunction &, PrePostActionTy &) {}); 6314 } else { 6315 RegionCodeGenTy ThenRCG(ThenGen); 6316 ThenRCG(CGF); 6317 } 6318 } 6319 } 6320 6321 namespace { 6322 /// Cleanup action for uses_allocators support. 6323 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6324 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6325 6326 public: 6327 OMPUsesAllocatorsActionTy( 6328 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6329 : Allocators(Allocators) {} 6330 void Enter(CodeGenFunction &CGF) override { 6331 if (!CGF.HaveInsertPoint()) 6332 return; 6333 for (const auto &AllocatorData : Allocators) { 6334 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6335 CGF, AllocatorData.first, AllocatorData.second); 6336 } 6337 } 6338 void Exit(CodeGenFunction &CGF) override { 6339 if (!CGF.HaveInsertPoint()) 6340 return; 6341 for (const auto &AllocatorData : Allocators) { 6342 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6343 AllocatorData.first); 6344 } 6345 } 6346 }; 6347 } // namespace 6348 6349 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6350 const OMPExecutableDirective &D, StringRef ParentName, 6351 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6352 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6353 assert(!ParentName.empty() && "Invalid target region parent name!"); 6354 HasEmittedTargetRegion = true; 6355 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6356 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6357 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6358 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6359 if (!D.AllocatorTraits) 6360 continue; 6361 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6362 } 6363 } 6364 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6365 CodeGen.setAction(UsesAllocatorAction); 6366 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6367 IsOffloadEntry, CodeGen); 6368 } 6369 6370 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6371 const Expr *Allocator, 6372 const Expr *AllocatorTraits) { 6373 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6374 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6375 // Use default memspace handle. 6376 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6377 llvm::Value *NumTraits = llvm::ConstantInt::get( 6378 CGF.IntTy, cast<ConstantArrayType>( 6379 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6380 ->getSize() 6381 .getLimitedValue()); 6382 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6383 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6384 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6385 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6386 AllocatorTraitsLVal.getBaseInfo(), 6387 AllocatorTraitsLVal.getTBAAInfo()); 6388 llvm::Value *Traits = 6389 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6390 6391 llvm::Value *AllocatorVal = 6392 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6393 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6394 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6395 // Store to allocator. 6396 CGF.EmitVarDecl(*cast<VarDecl>( 6397 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6398 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6399 AllocatorVal = 6400 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6401 Allocator->getType(), Allocator->getExprLoc()); 6402 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6403 } 6404 6405 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6406 const Expr *Allocator) { 6407 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6408 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6409 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6410 llvm::Value *AllocatorVal = 6411 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6412 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6413 CGF.getContext().VoidPtrTy, 6414 Allocator->getExprLoc()); 6415 (void)CGF.EmitRuntimeCall( 6416 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6417 OMPRTL___kmpc_destroy_allocator), 6418 {ThreadId, AllocatorVal}); 6419 } 6420 6421 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6422 const OMPExecutableDirective &D, StringRef ParentName, 6423 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6424 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6425 // Create a unique name for the entry function using the source location 6426 // information of the current target region. The name will be something like: 6427 // 6428 // __omp_offloading_DD_FFFF_PP_lBB 6429 // 6430 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6431 // mangled name of the function that encloses the target region and BB is the 6432 // line number of the target region. 6433 6434 unsigned DeviceID; 6435 unsigned FileID; 6436 unsigned Line; 6437 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6438 Line); 6439 SmallString<64> EntryFnName; 6440 { 6441 llvm::raw_svector_ostream OS(EntryFnName); 6442 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6443 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6444 } 6445 6446 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6447 6448 CodeGenFunction CGF(CGM, true); 6449 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6450 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6451 6452 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6453 6454 // If this target outline function is not an offload entry, we don't need to 6455 // register it. 6456 if (!IsOffloadEntry) 6457 return; 6458 6459 // The target region ID is used by the runtime library to identify the current 6460 // target region, so it only has to be unique and not necessarily point to 6461 // anything. It could be the pointer to the outlined function that implements 6462 // the target region, but we aren't using that so that the compiler doesn't 6463 // need to keep that, and could therefore inline the host function if proven 6464 // worthwhile during optimization. In the other hand, if emitting code for the 6465 // device, the ID has to be the function address so that it can retrieved from 6466 // the offloading entry and launched by the runtime library. We also mark the 6467 // outlined function to have external linkage in case we are emitting code for 6468 // the device, because these functions will be entry points to the device. 6469 6470 if (CGM.getLangOpts().OpenMPIsDevice) { 6471 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6472 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6473 OutlinedFn->setDSOLocal(false); 6474 } else { 6475 std::string Name = getName({EntryFnName, "region_id"}); 6476 OutlinedFnID = new llvm::GlobalVariable( 6477 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6478 llvm::GlobalValue::WeakAnyLinkage, 6479 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6480 } 6481 6482 // Register the information for the entry associated with this target region. 6483 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6484 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6485 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6486 } 6487 6488 /// Checks if the expression is constant or does not have non-trivial function 6489 /// calls. 6490 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6491 // We can skip constant expressions. 6492 // We can skip expressions with trivial calls or simple expressions. 6493 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6494 !E->hasNonTrivialCall(Ctx)) && 6495 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6496 } 6497 6498 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6499 const Stmt *Body) { 6500 const Stmt *Child = Body->IgnoreContainers(); 6501 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6502 Child = nullptr; 6503 for (const Stmt *S : C->body()) { 6504 if (const auto *E = dyn_cast<Expr>(S)) { 6505 if (isTrivial(Ctx, E)) 6506 continue; 6507 } 6508 // Some of the statements can be ignored. 6509 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6510 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6511 continue; 6512 // Analyze declarations. 6513 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6514 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6515 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6516 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6517 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6518 isa<UsingDirectiveDecl>(D) || 6519 isa<OMPDeclareReductionDecl>(D) || 6520 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6521 return true; 6522 const auto *VD = dyn_cast<VarDecl>(D); 6523 if (!VD) 6524 return false; 6525 return VD->isConstexpr() || 6526 ((VD->getType().isTrivialType(Ctx) || 6527 VD->getType()->isReferenceType()) && 6528 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6529 })) 6530 continue; 6531 } 6532 // Found multiple children - cannot get the one child only. 6533 if (Child) 6534 return nullptr; 6535 Child = S; 6536 } 6537 if (Child) 6538 Child = Child->IgnoreContainers(); 6539 } 6540 return Child; 6541 } 6542 6543 /// Emit the number of teams for a target directive. Inspect the num_teams 6544 /// clause associated with a teams construct combined or closely nested 6545 /// with the target directive. 6546 /// 6547 /// Emit a team of size one for directives such as 'target parallel' that 6548 /// have no associated teams construct. 6549 /// 6550 /// Otherwise, return nullptr. 6551 static llvm::Value * 6552 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6553 const OMPExecutableDirective &D) { 6554 assert(!CGF.getLangOpts().OpenMPIsDevice && 6555 "Clauses associated with the teams directive expected to be emitted " 6556 "only for the host!"); 6557 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6558 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6559 "Expected target-based executable directive."); 6560 CGBuilderTy &Bld = CGF.Builder; 6561 switch (DirectiveKind) { 6562 case OMPD_target: { 6563 const auto *CS = D.getInnermostCapturedStmt(); 6564 const auto *Body = 6565 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6566 const Stmt *ChildStmt = 6567 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6568 if (const auto *NestedDir = 6569 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6570 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6571 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6572 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6573 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6574 const Expr *NumTeams = 6575 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6576 llvm::Value *NumTeamsVal = 6577 CGF.EmitScalarExpr(NumTeams, 6578 /*IgnoreResultAssign*/ true); 6579 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6580 /*isSigned=*/true); 6581 } 6582 return Bld.getInt32(0); 6583 } 6584 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6585 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6586 return Bld.getInt32(1); 6587 return Bld.getInt32(0); 6588 } 6589 return nullptr; 6590 } 6591 case OMPD_target_teams: 6592 case OMPD_target_teams_distribute: 6593 case OMPD_target_teams_distribute_simd: 6594 case OMPD_target_teams_distribute_parallel_for: 6595 case OMPD_target_teams_distribute_parallel_for_simd: { 6596 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6597 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6598 const Expr *NumTeams = 6599 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6600 llvm::Value *NumTeamsVal = 6601 CGF.EmitScalarExpr(NumTeams, 6602 /*IgnoreResultAssign*/ true); 6603 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6604 /*isSigned=*/true); 6605 } 6606 return Bld.getInt32(0); 6607 } 6608 case OMPD_target_parallel: 6609 case OMPD_target_parallel_for: 6610 case OMPD_target_parallel_for_simd: 6611 case OMPD_target_simd: 6612 return Bld.getInt32(1); 6613 case OMPD_parallel: 6614 case OMPD_for: 6615 case OMPD_parallel_for: 6616 case OMPD_parallel_master: 6617 case OMPD_parallel_sections: 6618 case OMPD_for_simd: 6619 case OMPD_parallel_for_simd: 6620 case OMPD_cancel: 6621 case OMPD_cancellation_point: 6622 case OMPD_ordered: 6623 case OMPD_threadprivate: 6624 case OMPD_allocate: 6625 case OMPD_task: 6626 case OMPD_simd: 6627 case OMPD_sections: 6628 case OMPD_section: 6629 case OMPD_single: 6630 case OMPD_master: 6631 case OMPD_critical: 6632 case OMPD_taskyield: 6633 case OMPD_barrier: 6634 case OMPD_taskwait: 6635 case OMPD_taskgroup: 6636 case OMPD_atomic: 6637 case OMPD_flush: 6638 case OMPD_depobj: 6639 case OMPD_scan: 6640 case OMPD_teams: 6641 case OMPD_target_data: 6642 case OMPD_target_exit_data: 6643 case OMPD_target_enter_data: 6644 case OMPD_distribute: 6645 case OMPD_distribute_simd: 6646 case OMPD_distribute_parallel_for: 6647 case OMPD_distribute_parallel_for_simd: 6648 case OMPD_teams_distribute: 6649 case OMPD_teams_distribute_simd: 6650 case OMPD_teams_distribute_parallel_for: 6651 case OMPD_teams_distribute_parallel_for_simd: 6652 case OMPD_target_update: 6653 case OMPD_declare_simd: 6654 case OMPD_declare_variant: 6655 case OMPD_begin_declare_variant: 6656 case OMPD_end_declare_variant: 6657 case OMPD_declare_target: 6658 case OMPD_end_declare_target: 6659 case OMPD_declare_reduction: 6660 case OMPD_declare_mapper: 6661 case OMPD_taskloop: 6662 case OMPD_taskloop_simd: 6663 case OMPD_master_taskloop: 6664 case OMPD_master_taskloop_simd: 6665 case OMPD_parallel_master_taskloop: 6666 case OMPD_parallel_master_taskloop_simd: 6667 case OMPD_requires: 6668 case OMPD_unknown: 6669 break; 6670 default: 6671 break; 6672 } 6673 llvm_unreachable("Unexpected directive kind."); 6674 } 6675 6676 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6677 llvm::Value *DefaultThreadLimitVal) { 6678 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6679 CGF.getContext(), CS->getCapturedStmt()); 6680 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6681 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6682 llvm::Value *NumThreads = nullptr; 6683 llvm::Value *CondVal = nullptr; 6684 // Handle if clause. If if clause present, the number of threads is 6685 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6686 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6687 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6688 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6689 const OMPIfClause *IfClause = nullptr; 6690 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6691 if (C->getNameModifier() == OMPD_unknown || 6692 C->getNameModifier() == OMPD_parallel) { 6693 IfClause = C; 6694 break; 6695 } 6696 } 6697 if (IfClause) { 6698 const Expr *Cond = IfClause->getCondition(); 6699 bool Result; 6700 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6701 if (!Result) 6702 return CGF.Builder.getInt32(1); 6703 } else { 6704 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6705 if (const auto *PreInit = 6706 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6707 for (const auto *I : PreInit->decls()) { 6708 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6709 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6710 } else { 6711 CodeGenFunction::AutoVarEmission Emission = 6712 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6713 CGF.EmitAutoVarCleanups(Emission); 6714 } 6715 } 6716 } 6717 CondVal = CGF.EvaluateExprAsBool(Cond); 6718 } 6719 } 6720 } 6721 // Check the value of num_threads clause iff if clause was not specified 6722 // or is not evaluated to false. 6723 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6724 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6725 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6726 const auto *NumThreadsClause = 6727 Dir->getSingleClause<OMPNumThreadsClause>(); 6728 CodeGenFunction::LexicalScope Scope( 6729 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6730 if (const auto *PreInit = 6731 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6732 for (const auto *I : PreInit->decls()) { 6733 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6734 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6735 } else { 6736 CodeGenFunction::AutoVarEmission Emission = 6737 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6738 CGF.EmitAutoVarCleanups(Emission); 6739 } 6740 } 6741 } 6742 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6743 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6744 /*isSigned=*/false); 6745 if (DefaultThreadLimitVal) 6746 NumThreads = CGF.Builder.CreateSelect( 6747 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6748 DefaultThreadLimitVal, NumThreads); 6749 } else { 6750 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6751 : CGF.Builder.getInt32(0); 6752 } 6753 // Process condition of the if clause. 6754 if (CondVal) { 6755 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6756 CGF.Builder.getInt32(1)); 6757 } 6758 return NumThreads; 6759 } 6760 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6761 return CGF.Builder.getInt32(1); 6762 return DefaultThreadLimitVal; 6763 } 6764 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6765 : CGF.Builder.getInt32(0); 6766 } 6767 6768 /// Emit the number of threads for a target directive. Inspect the 6769 /// thread_limit clause associated with a teams construct combined or closely 6770 /// nested with the target directive. 6771 /// 6772 /// Emit the num_threads clause for directives such as 'target parallel' that 6773 /// have no associated teams construct. 6774 /// 6775 /// Otherwise, return nullptr. 6776 static llvm::Value * 6777 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6778 const OMPExecutableDirective &D) { 6779 assert(!CGF.getLangOpts().OpenMPIsDevice && 6780 "Clauses associated with the teams directive expected to be emitted " 6781 "only for the host!"); 6782 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6783 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6784 "Expected target-based executable directive."); 6785 CGBuilderTy &Bld = CGF.Builder; 6786 llvm::Value *ThreadLimitVal = nullptr; 6787 llvm::Value *NumThreadsVal = nullptr; 6788 switch (DirectiveKind) { 6789 case OMPD_target: { 6790 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6791 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6792 return NumThreads; 6793 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6794 CGF.getContext(), CS->getCapturedStmt()); 6795 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6796 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6797 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6798 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6799 const auto *ThreadLimitClause = 6800 Dir->getSingleClause<OMPThreadLimitClause>(); 6801 CodeGenFunction::LexicalScope Scope( 6802 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6803 if (const auto *PreInit = 6804 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6805 for (const auto *I : PreInit->decls()) { 6806 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6807 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6808 } else { 6809 CodeGenFunction::AutoVarEmission Emission = 6810 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6811 CGF.EmitAutoVarCleanups(Emission); 6812 } 6813 } 6814 } 6815 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6816 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6817 ThreadLimitVal = 6818 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6819 } 6820 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6821 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6822 CS = Dir->getInnermostCapturedStmt(); 6823 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6824 CGF.getContext(), CS->getCapturedStmt()); 6825 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6826 } 6827 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6828 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6829 CS = Dir->getInnermostCapturedStmt(); 6830 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6831 return NumThreads; 6832 } 6833 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6834 return Bld.getInt32(1); 6835 } 6836 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6837 } 6838 case OMPD_target_teams: { 6839 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6840 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6841 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6842 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6843 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6844 ThreadLimitVal = 6845 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6846 } 6847 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6848 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6849 return NumThreads; 6850 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6851 CGF.getContext(), CS->getCapturedStmt()); 6852 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6853 if (Dir->getDirectiveKind() == OMPD_distribute) { 6854 CS = Dir->getInnermostCapturedStmt(); 6855 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6856 return NumThreads; 6857 } 6858 } 6859 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6860 } 6861 case OMPD_target_teams_distribute: 6862 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6863 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6864 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6865 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6866 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6867 ThreadLimitVal = 6868 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6869 } 6870 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6871 case OMPD_target_parallel: 6872 case OMPD_target_parallel_for: 6873 case OMPD_target_parallel_for_simd: 6874 case OMPD_target_teams_distribute_parallel_for: 6875 case OMPD_target_teams_distribute_parallel_for_simd: { 6876 llvm::Value *CondVal = nullptr; 6877 // Handle if clause. If if clause present, the number of threads is 6878 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6879 if (D.hasClausesOfKind<OMPIfClause>()) { 6880 const OMPIfClause *IfClause = nullptr; 6881 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6882 if (C->getNameModifier() == OMPD_unknown || 6883 C->getNameModifier() == OMPD_parallel) { 6884 IfClause = C; 6885 break; 6886 } 6887 } 6888 if (IfClause) { 6889 const Expr *Cond = IfClause->getCondition(); 6890 bool Result; 6891 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6892 if (!Result) 6893 return Bld.getInt32(1); 6894 } else { 6895 CodeGenFunction::RunCleanupsScope Scope(CGF); 6896 CondVal = CGF.EvaluateExprAsBool(Cond); 6897 } 6898 } 6899 } 6900 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6901 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6902 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6903 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6904 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6905 ThreadLimitVal = 6906 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6907 } 6908 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6909 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6910 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6911 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6912 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6913 NumThreadsVal = 6914 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6915 ThreadLimitVal = ThreadLimitVal 6916 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6917 ThreadLimitVal), 6918 NumThreadsVal, ThreadLimitVal) 6919 : NumThreadsVal; 6920 } 6921 if (!ThreadLimitVal) 6922 ThreadLimitVal = Bld.getInt32(0); 6923 if (CondVal) 6924 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6925 return ThreadLimitVal; 6926 } 6927 case OMPD_target_teams_distribute_simd: 6928 case OMPD_target_simd: 6929 return Bld.getInt32(1); 6930 case OMPD_parallel: 6931 case OMPD_for: 6932 case OMPD_parallel_for: 6933 case OMPD_parallel_master: 6934 case OMPD_parallel_sections: 6935 case OMPD_for_simd: 6936 case OMPD_parallel_for_simd: 6937 case OMPD_cancel: 6938 case OMPD_cancellation_point: 6939 case OMPD_ordered: 6940 case OMPD_threadprivate: 6941 case OMPD_allocate: 6942 case OMPD_task: 6943 case OMPD_simd: 6944 case OMPD_sections: 6945 case OMPD_section: 6946 case OMPD_single: 6947 case OMPD_master: 6948 case OMPD_critical: 6949 case OMPD_taskyield: 6950 case OMPD_barrier: 6951 case OMPD_taskwait: 6952 case OMPD_taskgroup: 6953 case OMPD_atomic: 6954 case OMPD_flush: 6955 case OMPD_depobj: 6956 case OMPD_scan: 6957 case OMPD_teams: 6958 case OMPD_target_data: 6959 case OMPD_target_exit_data: 6960 case OMPD_target_enter_data: 6961 case OMPD_distribute: 6962 case OMPD_distribute_simd: 6963 case OMPD_distribute_parallel_for: 6964 case OMPD_distribute_parallel_for_simd: 6965 case OMPD_teams_distribute: 6966 case OMPD_teams_distribute_simd: 6967 case OMPD_teams_distribute_parallel_for: 6968 case OMPD_teams_distribute_parallel_for_simd: 6969 case OMPD_target_update: 6970 case OMPD_declare_simd: 6971 case OMPD_declare_variant: 6972 case OMPD_begin_declare_variant: 6973 case OMPD_end_declare_variant: 6974 case OMPD_declare_target: 6975 case OMPD_end_declare_target: 6976 case OMPD_declare_reduction: 6977 case OMPD_declare_mapper: 6978 case OMPD_taskloop: 6979 case OMPD_taskloop_simd: 6980 case OMPD_master_taskloop: 6981 case OMPD_master_taskloop_simd: 6982 case OMPD_parallel_master_taskloop: 6983 case OMPD_parallel_master_taskloop_simd: 6984 case OMPD_requires: 6985 case OMPD_unknown: 6986 break; 6987 default: 6988 break; 6989 } 6990 llvm_unreachable("Unsupported directive kind."); 6991 } 6992 6993 namespace { 6994 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6995 6996 // Utility to handle information from clauses associated with a given 6997 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6998 // It provides a convenient interface to obtain the information and generate 6999 // code for that information. 7000 class MappableExprsHandler { 7001 public: 7002 /// Values for bit flags used to specify the mapping type for 7003 /// offloading. 7004 enum OpenMPOffloadMappingFlags : uint64_t { 7005 /// No flags 7006 OMP_MAP_NONE = 0x0, 7007 /// Allocate memory on the device and move data from host to device. 7008 OMP_MAP_TO = 0x01, 7009 /// Allocate memory on the device and move data from device to host. 7010 OMP_MAP_FROM = 0x02, 7011 /// Always perform the requested mapping action on the element, even 7012 /// if it was already mapped before. 7013 OMP_MAP_ALWAYS = 0x04, 7014 /// Delete the element from the device environment, ignoring the 7015 /// current reference count associated with the element. 7016 OMP_MAP_DELETE = 0x08, 7017 /// The element being mapped is a pointer-pointee pair; both the 7018 /// pointer and the pointee should be mapped. 7019 OMP_MAP_PTR_AND_OBJ = 0x10, 7020 /// This flags signals that the base address of an entry should be 7021 /// passed to the target kernel as an argument. 7022 OMP_MAP_TARGET_PARAM = 0x20, 7023 /// Signal that the runtime library has to return the device pointer 7024 /// in the current position for the data being mapped. Used when we have the 7025 /// use_device_ptr or use_device_addr clause. 7026 OMP_MAP_RETURN_PARAM = 0x40, 7027 /// This flag signals that the reference being passed is a pointer to 7028 /// private data. 7029 OMP_MAP_PRIVATE = 0x80, 7030 /// Pass the element to the device by value. 7031 OMP_MAP_LITERAL = 0x100, 7032 /// Implicit map 7033 OMP_MAP_IMPLICIT = 0x200, 7034 /// Close is a hint to the runtime to allocate memory close to 7035 /// the target device. 7036 OMP_MAP_CLOSE = 0x400, 7037 /// 0x800 is reserved for compatibility with XLC. 7038 /// Produce a runtime error if the data is not already allocated. 7039 OMP_MAP_PRESENT = 0x1000, 7040 /// Signal that the runtime library should use args as an array of 7041 /// descriptor_dim pointers and use args_size as dims. Used when we have 7042 /// non-contiguous list items in target update directive 7043 OMP_MAP_NON_CONTIG = 0x100000000000, 7044 /// The 16 MSBs of the flags indicate whether the entry is member of some 7045 /// struct/class. 7046 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7047 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7048 }; 7049 7050 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7051 static unsigned getFlagMemberOffset() { 7052 unsigned Offset = 0; 7053 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7054 Remain = Remain >> 1) 7055 Offset++; 7056 return Offset; 7057 } 7058 7059 /// Class that holds debugging information for a data mapping to be passed to 7060 /// the runtime library. 7061 class MappingExprInfo { 7062 /// The variable declaration used for the data mapping. 7063 const ValueDecl *MapDecl = nullptr; 7064 /// The original expression used in the map clause, or null if there is 7065 /// none. 7066 const Expr *MapExpr = nullptr; 7067 7068 public: 7069 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7070 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7071 7072 const ValueDecl *getMapDecl() const { return MapDecl; } 7073 const Expr *getMapExpr() const { return MapExpr; } 7074 }; 7075 7076 /// Class that associates information with a base pointer to be passed to the 7077 /// runtime library. 7078 class BasePointerInfo { 7079 /// The base pointer. 7080 llvm::Value *Ptr = nullptr; 7081 /// The base declaration that refers to this device pointer, or null if 7082 /// there is none. 7083 const ValueDecl *DevPtrDecl = nullptr; 7084 7085 public: 7086 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7087 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7088 llvm::Value *operator*() const { return Ptr; } 7089 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7090 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7091 }; 7092 7093 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7094 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7095 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7096 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7097 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7098 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7099 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7100 7101 /// This structure contains combined information generated for mappable 7102 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7103 /// mappers, and non-contiguous information. 7104 struct MapCombinedInfoTy { 7105 struct StructNonContiguousInfo { 7106 bool IsNonContiguous = false; 7107 MapDimArrayTy Dims; 7108 MapNonContiguousArrayTy Offsets; 7109 MapNonContiguousArrayTy Counts; 7110 MapNonContiguousArrayTy Strides; 7111 }; 7112 MapExprsArrayTy Exprs; 7113 MapBaseValuesArrayTy BasePointers; 7114 MapValuesArrayTy Pointers; 7115 MapValuesArrayTy Sizes; 7116 MapFlagsArrayTy Types; 7117 MapMappersArrayTy Mappers; 7118 StructNonContiguousInfo NonContigInfo; 7119 7120 /// Append arrays in \a CurInfo. 7121 void append(MapCombinedInfoTy &CurInfo) { 7122 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7123 BasePointers.append(CurInfo.BasePointers.begin(), 7124 CurInfo.BasePointers.end()); 7125 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7126 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7127 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7128 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7129 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7130 CurInfo.NonContigInfo.Dims.end()); 7131 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7132 CurInfo.NonContigInfo.Offsets.end()); 7133 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7134 CurInfo.NonContigInfo.Counts.end()); 7135 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7136 CurInfo.NonContigInfo.Strides.end()); 7137 } 7138 }; 7139 7140 /// Map between a struct and the its lowest & highest elements which have been 7141 /// mapped. 7142 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7143 /// HE(FieldIndex, Pointer)} 7144 struct StructRangeInfoTy { 7145 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7146 0, Address::invalid()}; 7147 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7148 0, Address::invalid()}; 7149 Address Base = Address::invalid(); 7150 bool IsArraySection = false; 7151 }; 7152 7153 private: 7154 /// Kind that defines how a device pointer has to be returned. 7155 struct MapInfo { 7156 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7157 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7158 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7159 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7160 bool ReturnDevicePointer = false; 7161 bool IsImplicit = false; 7162 const ValueDecl *Mapper = nullptr; 7163 const Expr *VarRef = nullptr; 7164 bool ForDeviceAddr = false; 7165 7166 MapInfo() = default; 7167 MapInfo( 7168 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7169 OpenMPMapClauseKind MapType, 7170 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7171 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7172 bool ReturnDevicePointer, bool IsImplicit, 7173 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7174 bool ForDeviceAddr = false) 7175 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7176 MotionModifiers(MotionModifiers), 7177 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7178 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7179 }; 7180 7181 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7182 /// member and there is no map information about it, then emission of that 7183 /// entry is deferred until the whole struct has been processed. 7184 struct DeferredDevicePtrEntryTy { 7185 const Expr *IE = nullptr; 7186 const ValueDecl *VD = nullptr; 7187 bool ForDeviceAddr = false; 7188 7189 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7190 bool ForDeviceAddr) 7191 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7192 }; 7193 7194 /// The target directive from where the mappable clauses were extracted. It 7195 /// is either a executable directive or a user-defined mapper directive. 7196 llvm::PointerUnion<const OMPExecutableDirective *, 7197 const OMPDeclareMapperDecl *> 7198 CurDir; 7199 7200 /// Function the directive is being generated for. 7201 CodeGenFunction &CGF; 7202 7203 /// Set of all first private variables in the current directive. 7204 /// bool data is set to true if the variable is implicitly marked as 7205 /// firstprivate, false otherwise. 7206 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7207 7208 /// Map between device pointer declarations and their expression components. 7209 /// The key value for declarations in 'this' is null. 7210 llvm::DenseMap< 7211 const ValueDecl *, 7212 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7213 DevPointersMap; 7214 7215 llvm::Value *getExprTypeSize(const Expr *E) const { 7216 QualType ExprTy = E->getType().getCanonicalType(); 7217 7218 // Calculate the size for array shaping expression. 7219 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7220 llvm::Value *Size = 7221 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7222 for (const Expr *SE : OAE->getDimensions()) { 7223 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7224 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7225 CGF.getContext().getSizeType(), 7226 SE->getExprLoc()); 7227 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7228 } 7229 return Size; 7230 } 7231 7232 // Reference types are ignored for mapping purposes. 7233 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7234 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7235 7236 // Given that an array section is considered a built-in type, we need to 7237 // do the calculation based on the length of the section instead of relying 7238 // on CGF.getTypeSize(E->getType()). 7239 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7240 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7241 OAE->getBase()->IgnoreParenImpCasts()) 7242 .getCanonicalType(); 7243 7244 // If there is no length associated with the expression and lower bound is 7245 // not specified too, that means we are using the whole length of the 7246 // base. 7247 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7248 !OAE->getLowerBound()) 7249 return CGF.getTypeSize(BaseTy); 7250 7251 llvm::Value *ElemSize; 7252 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7253 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7254 } else { 7255 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7256 assert(ATy && "Expecting array type if not a pointer type."); 7257 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7258 } 7259 7260 // If we don't have a length at this point, that is because we have an 7261 // array section with a single element. 7262 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7263 return ElemSize; 7264 7265 if (const Expr *LenExpr = OAE->getLength()) { 7266 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7267 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7268 CGF.getContext().getSizeType(), 7269 LenExpr->getExprLoc()); 7270 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7271 } 7272 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7273 OAE->getLowerBound() && "expected array_section[lb:]."); 7274 // Size = sizetype - lb * elemtype; 7275 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7276 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7277 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7278 CGF.getContext().getSizeType(), 7279 OAE->getLowerBound()->getExprLoc()); 7280 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7281 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7282 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7283 LengthVal = CGF.Builder.CreateSelect( 7284 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7285 return LengthVal; 7286 } 7287 return CGF.getTypeSize(ExprTy); 7288 } 7289 7290 /// Return the corresponding bits for a given map clause modifier. Add 7291 /// a flag marking the map as a pointer if requested. Add a flag marking the 7292 /// map as the first one of a series of maps that relate to the same map 7293 /// expression. 7294 OpenMPOffloadMappingFlags getMapTypeBits( 7295 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7296 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7297 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7298 OpenMPOffloadMappingFlags Bits = 7299 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7300 switch (MapType) { 7301 case OMPC_MAP_alloc: 7302 case OMPC_MAP_release: 7303 // alloc and release is the default behavior in the runtime library, i.e. 7304 // if we don't pass any bits alloc/release that is what the runtime is 7305 // going to do. Therefore, we don't need to signal anything for these two 7306 // type modifiers. 7307 break; 7308 case OMPC_MAP_to: 7309 Bits |= OMP_MAP_TO; 7310 break; 7311 case OMPC_MAP_from: 7312 Bits |= OMP_MAP_FROM; 7313 break; 7314 case OMPC_MAP_tofrom: 7315 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7316 break; 7317 case OMPC_MAP_delete: 7318 Bits |= OMP_MAP_DELETE; 7319 break; 7320 case OMPC_MAP_unknown: 7321 llvm_unreachable("Unexpected map type!"); 7322 } 7323 if (AddPtrFlag) 7324 Bits |= OMP_MAP_PTR_AND_OBJ; 7325 if (AddIsTargetParamFlag) 7326 Bits |= OMP_MAP_TARGET_PARAM; 7327 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7328 != MapModifiers.end()) 7329 Bits |= OMP_MAP_ALWAYS; 7330 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7331 != MapModifiers.end()) 7332 Bits |= OMP_MAP_CLOSE; 7333 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7334 != MapModifiers.end()) 7335 Bits |= OMP_MAP_PRESENT; 7336 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7337 != MotionModifiers.end()) 7338 Bits |= OMP_MAP_PRESENT; 7339 if (IsNonContiguous) 7340 Bits |= OMP_MAP_NON_CONTIG; 7341 return Bits; 7342 } 7343 7344 /// Return true if the provided expression is a final array section. A 7345 /// final array section, is one whose length can't be proved to be one. 7346 bool isFinalArraySectionExpression(const Expr *E) const { 7347 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7348 7349 // It is not an array section and therefore not a unity-size one. 7350 if (!OASE) 7351 return false; 7352 7353 // An array section with no colon always refer to a single element. 7354 if (OASE->getColonLocFirst().isInvalid()) 7355 return false; 7356 7357 const Expr *Length = OASE->getLength(); 7358 7359 // If we don't have a length we have to check if the array has size 1 7360 // for this dimension. Also, we should always expect a length if the 7361 // base type is pointer. 7362 if (!Length) { 7363 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7364 OASE->getBase()->IgnoreParenImpCasts()) 7365 .getCanonicalType(); 7366 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7367 return ATy->getSize().getSExtValue() != 1; 7368 // If we don't have a constant dimension length, we have to consider 7369 // the current section as having any size, so it is not necessarily 7370 // unitary. If it happen to be unity size, that's user fault. 7371 return true; 7372 } 7373 7374 // Check if the length evaluates to 1. 7375 Expr::EvalResult Result; 7376 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7377 return true; // Can have more that size 1. 7378 7379 llvm::APSInt ConstLength = Result.Val.getInt(); 7380 return ConstLength.getSExtValue() != 1; 7381 } 7382 7383 /// Generate the base pointers, section pointers, sizes, map type bits, and 7384 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7385 /// map type, map or motion modifiers, and expression components. 7386 /// \a IsFirstComponent should be set to true if the provided set of 7387 /// components is the first associated with a capture. 7388 void generateInfoForComponentList( 7389 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7390 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7391 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7392 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7393 bool IsFirstComponentList, bool IsImplicit, 7394 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7395 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7396 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7397 OverlappedElements = llvm::None) const { 7398 // The following summarizes what has to be generated for each map and the 7399 // types below. The generated information is expressed in this order: 7400 // base pointer, section pointer, size, flags 7401 // (to add to the ones that come from the map type and modifier). 7402 // 7403 // double d; 7404 // int i[100]; 7405 // float *p; 7406 // 7407 // struct S1 { 7408 // int i; 7409 // float f[50]; 7410 // } 7411 // struct S2 { 7412 // int i; 7413 // float f[50]; 7414 // S1 s; 7415 // double *p; 7416 // struct S2 *ps; 7417 // } 7418 // S2 s; 7419 // S2 *ps; 7420 // 7421 // map(d) 7422 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7423 // 7424 // map(i) 7425 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7426 // 7427 // map(i[1:23]) 7428 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7429 // 7430 // map(p) 7431 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7432 // 7433 // map(p[1:24]) 7434 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7435 // in unified shared memory mode or for local pointers 7436 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7437 // 7438 // map(s) 7439 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7440 // 7441 // map(s.i) 7442 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7443 // 7444 // map(s.s.f) 7445 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7446 // 7447 // map(s.p) 7448 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7449 // 7450 // map(to: s.p[:22]) 7451 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7452 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7453 // &(s.p), &(s.p[0]), 22*sizeof(double), 7454 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7455 // (*) alloc space for struct members, only this is a target parameter 7456 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7457 // optimizes this entry out, same in the examples below) 7458 // (***) map the pointee (map: to) 7459 // 7460 // map(s.ps) 7461 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7462 // 7463 // map(from: s.ps->s.i) 7464 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7465 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7466 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7467 // 7468 // map(to: s.ps->ps) 7469 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7470 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7471 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7472 // 7473 // map(s.ps->ps->ps) 7474 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7475 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7476 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7477 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7478 // 7479 // map(to: s.ps->ps->s.f[:22]) 7480 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7481 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7482 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7483 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7484 // 7485 // map(ps) 7486 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7487 // 7488 // map(ps->i) 7489 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7490 // 7491 // map(ps->s.f) 7492 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7493 // 7494 // map(from: ps->p) 7495 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7496 // 7497 // map(to: ps->p[:22]) 7498 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7499 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7500 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7501 // 7502 // map(ps->ps) 7503 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7504 // 7505 // map(from: ps->ps->s.i) 7506 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7507 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7508 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7509 // 7510 // map(from: ps->ps->ps) 7511 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7512 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7513 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7514 // 7515 // map(ps->ps->ps->ps) 7516 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7517 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7518 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7519 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7520 // 7521 // map(to: ps->ps->ps->s.f[:22]) 7522 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7523 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7524 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7525 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7526 // 7527 // map(to: s.f[:22]) map(from: s.p[:33]) 7528 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7529 // sizeof(double*) (**), TARGET_PARAM 7530 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7531 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7532 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7533 // (*) allocate contiguous space needed to fit all mapped members even if 7534 // we allocate space for members not mapped (in this example, 7535 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7536 // them as well because they fall between &s.f[0] and &s.p) 7537 // 7538 // map(from: s.f[:22]) map(to: ps->p[:33]) 7539 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7540 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7541 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7542 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7543 // (*) the struct this entry pertains to is the 2nd element in the list of 7544 // arguments, hence MEMBER_OF(2) 7545 // 7546 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7547 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7548 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7549 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7550 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7551 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7552 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7553 // (*) the struct this entry pertains to is the 4th element in the list 7554 // of arguments, hence MEMBER_OF(4) 7555 7556 // Track if the map information being generated is the first for a capture. 7557 bool IsCaptureFirstInfo = IsFirstComponentList; 7558 // When the variable is on a declare target link or in a to clause with 7559 // unified memory, a reference is needed to hold the host/device address 7560 // of the variable. 7561 bool RequiresReference = false; 7562 7563 // Scan the components from the base to the complete expression. 7564 auto CI = Components.rbegin(); 7565 auto CE = Components.rend(); 7566 auto I = CI; 7567 7568 // Track if the map information being generated is the first for a list of 7569 // components. 7570 bool IsExpressionFirstInfo = true; 7571 bool FirstPointerInComplexData = false; 7572 Address BP = Address::invalid(); 7573 const Expr *AssocExpr = I->getAssociatedExpression(); 7574 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7575 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7576 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7577 7578 if (isa<MemberExpr>(AssocExpr)) { 7579 // The base is the 'this' pointer. The content of the pointer is going 7580 // to be the base of the field being mapped. 7581 BP = CGF.LoadCXXThisAddress(); 7582 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7583 (OASE && 7584 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7585 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7586 } else if (OAShE && 7587 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7588 BP = Address( 7589 CGF.EmitScalarExpr(OAShE->getBase()), 7590 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7591 } else { 7592 // The base is the reference to the variable. 7593 // BP = &Var. 7594 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7595 if (const auto *VD = 7596 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7597 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7598 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7599 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7600 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7601 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7602 RequiresReference = true; 7603 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7604 } 7605 } 7606 } 7607 7608 // If the variable is a pointer and is being dereferenced (i.e. is not 7609 // the last component), the base has to be the pointer itself, not its 7610 // reference. References are ignored for mapping purposes. 7611 QualType Ty = 7612 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7613 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7614 // No need to generate individual map information for the pointer, it 7615 // can be associated with the combined storage if shared memory mode is 7616 // active or the base declaration is not global variable. 7617 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7618 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7619 !VD || VD->hasLocalStorage()) 7620 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7621 else 7622 FirstPointerInComplexData = true; 7623 ++I; 7624 } 7625 } 7626 7627 // Track whether a component of the list should be marked as MEMBER_OF some 7628 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7629 // in a component list should be marked as MEMBER_OF, all subsequent entries 7630 // do not belong to the base struct. E.g. 7631 // struct S2 s; 7632 // s.ps->ps->ps->f[:] 7633 // (1) (2) (3) (4) 7634 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7635 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7636 // is the pointee of ps(2) which is not member of struct s, so it should not 7637 // be marked as such (it is still PTR_AND_OBJ). 7638 // The variable is initialized to false so that PTR_AND_OBJ entries which 7639 // are not struct members are not considered (e.g. array of pointers to 7640 // data). 7641 bool ShouldBeMemberOf = false; 7642 7643 // Variable keeping track of whether or not we have encountered a component 7644 // in the component list which is a member expression. Useful when we have a 7645 // pointer or a final array section, in which case it is the previous 7646 // component in the list which tells us whether we have a member expression. 7647 // E.g. X.f[:] 7648 // While processing the final array section "[:]" it is "f" which tells us 7649 // whether we are dealing with a member of a declared struct. 7650 const MemberExpr *EncounteredME = nullptr; 7651 7652 // Track for the total number of dimension. Start from one for the dummy 7653 // dimension. 7654 uint64_t DimSize = 1; 7655 7656 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7657 7658 for (; I != CE; ++I) { 7659 // If the current component is member of a struct (parent struct) mark it. 7660 if (!EncounteredME) { 7661 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7662 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7663 // as MEMBER_OF the parent struct. 7664 if (EncounteredME) { 7665 ShouldBeMemberOf = true; 7666 // Do not emit as complex pointer if this is actually not array-like 7667 // expression. 7668 if (FirstPointerInComplexData) { 7669 QualType Ty = std::prev(I) 7670 ->getAssociatedDeclaration() 7671 ->getType() 7672 .getNonReferenceType(); 7673 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7674 FirstPointerInComplexData = false; 7675 } 7676 } 7677 } 7678 7679 auto Next = std::next(I); 7680 7681 // We need to generate the addresses and sizes if this is the last 7682 // component, if the component is a pointer or if it is an array section 7683 // whose length can't be proved to be one. If this is a pointer, it 7684 // becomes the base address for the following components. 7685 7686 // A final array section, is one whose length can't be proved to be one. 7687 // If the map item is non-contiguous then we don't treat any array section 7688 // as final array section. 7689 bool IsFinalArraySection = 7690 !IsNonContiguous && 7691 isFinalArraySectionExpression(I->getAssociatedExpression()); 7692 7693 // If we have a declaration for the mapping use that, otherwise use 7694 // the base declaration of the map clause. 7695 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7696 ? I->getAssociatedDeclaration() 7697 : BaseDecl; 7698 7699 // Get information on whether the element is a pointer. Have to do a 7700 // special treatment for array sections given that they are built-in 7701 // types. 7702 const auto *OASE = 7703 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7704 const auto *OAShE = 7705 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7706 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7707 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7708 bool IsPointer = 7709 OAShE || 7710 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7711 .getCanonicalType() 7712 ->isAnyPointerType()) || 7713 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7714 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7715 7716 if (OASE) 7717 ++DimSize; 7718 7719 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7720 // If this is not the last component, we expect the pointer to be 7721 // associated with an array expression or member expression. 7722 assert((Next == CE || 7723 isa<MemberExpr>(Next->getAssociatedExpression()) || 7724 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7725 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7726 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7727 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7728 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7729 "Unexpected expression"); 7730 7731 Address LB = Address::invalid(); 7732 if (OAShE) { 7733 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7734 CGF.getContext().getTypeAlignInChars( 7735 OAShE->getBase()->getType())); 7736 } else { 7737 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7738 .getAddress(CGF); 7739 } 7740 7741 // If this component is a pointer inside the base struct then we don't 7742 // need to create any entry for it - it will be combined with the object 7743 // it is pointing to into a single PTR_AND_OBJ entry. 7744 bool IsMemberPointerOrAddr = 7745 (IsPointer || ForDeviceAddr) && EncounteredME && 7746 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7747 EncounteredME); 7748 if (!OverlappedElements.empty()) { 7749 // Handle base element with the info for overlapped elements. 7750 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7751 assert(Next == CE && 7752 "Expected last element for the overlapped elements."); 7753 assert(!IsPointer && 7754 "Unexpected base element with the pointer type."); 7755 // Mark the whole struct as the struct that requires allocation on the 7756 // device. 7757 PartialStruct.LowestElem = {0, LB}; 7758 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7759 I->getAssociatedExpression()->getType()); 7760 Address HB = CGF.Builder.CreateConstGEP( 7761 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7762 CGF.VoidPtrTy), 7763 TypeSize.getQuantity() - 1); 7764 PartialStruct.HighestElem = { 7765 std::numeric_limits<decltype( 7766 PartialStruct.HighestElem.first)>::max(), 7767 HB}; 7768 PartialStruct.Base = BP; 7769 // Emit data for non-overlapped data. 7770 OpenMPOffloadMappingFlags Flags = 7771 OMP_MAP_MEMBER_OF | 7772 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7773 /*AddPtrFlag=*/false, 7774 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7775 LB = BP; 7776 llvm::Value *Size = nullptr; 7777 // Do bitcopy of all non-overlapped structure elements. 7778 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7779 Component : OverlappedElements) { 7780 Address ComponentLB = Address::invalid(); 7781 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7782 Component) { 7783 if (MC.getAssociatedDeclaration()) { 7784 ComponentLB = 7785 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7786 .getAddress(CGF); 7787 Size = CGF.Builder.CreatePtrDiff( 7788 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7789 CGF.EmitCastToVoidPtr(LB.getPointer())); 7790 break; 7791 } 7792 } 7793 assert(Size && "Failed to determine structure size"); 7794 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7795 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7796 CombinedInfo.Pointers.push_back(LB.getPointer()); 7797 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7798 Size, CGF.Int64Ty, /*isSigned=*/true)); 7799 CombinedInfo.Types.push_back(Flags); 7800 CombinedInfo.Mappers.push_back(nullptr); 7801 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7802 : 1); 7803 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7804 } 7805 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7806 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7807 CombinedInfo.Pointers.push_back(LB.getPointer()); 7808 Size = CGF.Builder.CreatePtrDiff( 7809 CGF.EmitCastToVoidPtr( 7810 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7811 CGF.EmitCastToVoidPtr(LB.getPointer())); 7812 CombinedInfo.Sizes.push_back( 7813 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7814 CombinedInfo.Types.push_back(Flags); 7815 CombinedInfo.Mappers.push_back(nullptr); 7816 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7817 : 1); 7818 break; 7819 } 7820 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7821 if (!IsMemberPointerOrAddr || 7822 (Next == CE && MapType != OMPC_MAP_unknown)) { 7823 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7824 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7825 CombinedInfo.Pointers.push_back(LB.getPointer()); 7826 CombinedInfo.Sizes.push_back( 7827 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7828 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7829 : 1); 7830 7831 // If Mapper is valid, the last component inherits the mapper. 7832 bool HasMapper = Mapper && Next == CE; 7833 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7834 7835 // We need to add a pointer flag for each map that comes from the 7836 // same expression except for the first one. We also need to signal 7837 // this map is the first one that relates with the current capture 7838 // (there is a set of entries for each capture). 7839 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7840 MapType, MapModifiers, MotionModifiers, IsImplicit, 7841 !IsExpressionFirstInfo || RequiresReference || 7842 FirstPointerInComplexData, 7843 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7844 7845 if (!IsExpressionFirstInfo) { 7846 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7847 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7848 if (IsPointer) 7849 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7850 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7851 7852 if (ShouldBeMemberOf) { 7853 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7854 // should be later updated with the correct value of MEMBER_OF. 7855 Flags |= OMP_MAP_MEMBER_OF; 7856 // From now on, all subsequent PTR_AND_OBJ entries should not be 7857 // marked as MEMBER_OF. 7858 ShouldBeMemberOf = false; 7859 } 7860 } 7861 7862 CombinedInfo.Types.push_back(Flags); 7863 } 7864 7865 // If we have encountered a member expression so far, keep track of the 7866 // mapped member. If the parent is "*this", then the value declaration 7867 // is nullptr. 7868 if (EncounteredME) { 7869 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7870 unsigned FieldIndex = FD->getFieldIndex(); 7871 7872 // Update info about the lowest and highest elements for this struct 7873 if (!PartialStruct.Base.isValid()) { 7874 PartialStruct.LowestElem = {FieldIndex, LB}; 7875 if (IsFinalArraySection) { 7876 Address HB = 7877 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7878 .getAddress(CGF); 7879 PartialStruct.HighestElem = {FieldIndex, HB}; 7880 } else { 7881 PartialStruct.HighestElem = {FieldIndex, LB}; 7882 } 7883 PartialStruct.Base = BP; 7884 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7885 PartialStruct.LowestElem = {FieldIndex, LB}; 7886 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7887 PartialStruct.HighestElem = {FieldIndex, LB}; 7888 } 7889 } 7890 7891 // Need to emit combined struct for array sections. 7892 if (IsFinalArraySection || IsNonContiguous) 7893 PartialStruct.IsArraySection = true; 7894 7895 // If we have a final array section, we are done with this expression. 7896 if (IsFinalArraySection) 7897 break; 7898 7899 // The pointer becomes the base for the next element. 7900 if (Next != CE) 7901 BP = LB; 7902 7903 IsExpressionFirstInfo = false; 7904 IsCaptureFirstInfo = false; 7905 FirstPointerInComplexData = false; 7906 } else if (FirstPointerInComplexData) { 7907 QualType Ty = Components.rbegin() 7908 ->getAssociatedDeclaration() 7909 ->getType() 7910 .getNonReferenceType(); 7911 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7912 FirstPointerInComplexData = false; 7913 } 7914 } 7915 7916 if (!IsNonContiguous) 7917 return; 7918 7919 const ASTContext &Context = CGF.getContext(); 7920 7921 // For supporting stride in array section, we need to initialize the first 7922 // dimension size as 1, first offset as 0, and first count as 1 7923 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7924 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7925 MapValuesArrayTy CurStrides; 7926 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7927 uint64_t ElementTypeSize; 7928 7929 // Collect Size information for each dimension and get the element size as 7930 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7931 // should be [10, 10] and the first stride is 4 btyes. 7932 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7933 Components) { 7934 const Expr *AssocExpr = Component.getAssociatedExpression(); 7935 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7936 7937 if (!OASE) 7938 continue; 7939 7940 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7941 auto *CAT = Context.getAsConstantArrayType(Ty); 7942 auto *VAT = Context.getAsVariableArrayType(Ty); 7943 7944 // We need all the dimension size except for the last dimension. 7945 assert((VAT || CAT || &Component == &*Components.begin()) && 7946 "Should be either ConstantArray or VariableArray if not the " 7947 "first Component"); 7948 7949 // Get element size if CurStrides is empty. 7950 if (CurStrides.empty()) { 7951 const Type *ElementType = nullptr; 7952 if (CAT) 7953 ElementType = CAT->getElementType().getTypePtr(); 7954 else if (VAT) 7955 ElementType = VAT->getElementType().getTypePtr(); 7956 else 7957 assert(&Component == &*Components.begin() && 7958 "Only expect pointer (non CAT or VAT) when this is the " 7959 "first Component"); 7960 // If ElementType is null, then it means the base is a pointer 7961 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7962 // for next iteration. 7963 if (ElementType) { 7964 // For the case that having pointer as base, we need to remove one 7965 // level of indirection. 7966 if (&Component != &*Components.begin()) 7967 ElementType = ElementType->getPointeeOrArrayElementType(); 7968 ElementTypeSize = 7969 Context.getTypeSizeInChars(ElementType).getQuantity(); 7970 CurStrides.push_back( 7971 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7972 } 7973 } 7974 // Get dimension value except for the last dimension since we don't need 7975 // it. 7976 if (DimSizes.size() < Components.size() - 1) { 7977 if (CAT) 7978 DimSizes.push_back(llvm::ConstantInt::get( 7979 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7980 else if (VAT) 7981 DimSizes.push_back(CGF.Builder.CreateIntCast( 7982 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7983 /*IsSigned=*/false)); 7984 } 7985 } 7986 7987 // Skip the dummy dimension since we have already have its information. 7988 auto DI = DimSizes.begin() + 1; 7989 // Product of dimension. 7990 llvm::Value *DimProd = 7991 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7992 7993 // Collect info for non-contiguous. Notice that offset, count, and stride 7994 // are only meaningful for array-section, so we insert a null for anything 7995 // other than array-section. 7996 // Also, the size of offset, count, and stride are not the same as 7997 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7998 // count, and stride are the same as the number of non-contiguous 7999 // declaration in target update to/from clause. 8000 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8001 Components) { 8002 const Expr *AssocExpr = Component.getAssociatedExpression(); 8003 8004 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8005 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8006 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8007 /*isSigned=*/false); 8008 CurOffsets.push_back(Offset); 8009 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8010 CurStrides.push_back(CurStrides.back()); 8011 continue; 8012 } 8013 8014 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8015 8016 if (!OASE) 8017 continue; 8018 8019 // Offset 8020 const Expr *OffsetExpr = OASE->getLowerBound(); 8021 llvm::Value *Offset = nullptr; 8022 if (!OffsetExpr) { 8023 // If offset is absent, then we just set it to zero. 8024 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8025 } else { 8026 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8027 CGF.Int64Ty, 8028 /*isSigned=*/false); 8029 } 8030 CurOffsets.push_back(Offset); 8031 8032 // Count 8033 const Expr *CountExpr = OASE->getLength(); 8034 llvm::Value *Count = nullptr; 8035 if (!CountExpr) { 8036 // In Clang, once a high dimension is an array section, we construct all 8037 // the lower dimension as array section, however, for case like 8038 // arr[0:2][2], Clang construct the inner dimension as an array section 8039 // but it actually is not in an array section form according to spec. 8040 if (!OASE->getColonLocFirst().isValid() && 8041 !OASE->getColonLocSecond().isValid()) { 8042 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8043 } else { 8044 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8045 // When the length is absent it defaults to ⌈(size − 8046 // lower-bound)/stride⌉, where size is the size of the array 8047 // dimension. 8048 const Expr *StrideExpr = OASE->getStride(); 8049 llvm::Value *Stride = 8050 StrideExpr 8051 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8052 CGF.Int64Ty, /*isSigned=*/false) 8053 : nullptr; 8054 if (Stride) 8055 Count = CGF.Builder.CreateUDiv( 8056 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8057 else 8058 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8059 } 8060 } else { 8061 Count = CGF.EmitScalarExpr(CountExpr); 8062 } 8063 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8064 CurCounts.push_back(Count); 8065 8066 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8067 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8068 // Offset Count Stride 8069 // D0 0 1 4 (int) <- dummy dimension 8070 // D1 0 2 8 (2 * (1) * 4) 8071 // D2 1 2 20 (1 * (1 * 5) * 4) 8072 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8073 const Expr *StrideExpr = OASE->getStride(); 8074 llvm::Value *Stride = 8075 StrideExpr 8076 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8077 CGF.Int64Ty, /*isSigned=*/false) 8078 : nullptr; 8079 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8080 if (Stride) 8081 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8082 else 8083 CurStrides.push_back(DimProd); 8084 if (DI != DimSizes.end()) 8085 ++DI; 8086 } 8087 8088 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8089 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8090 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8091 } 8092 8093 /// Return the adjusted map modifiers if the declaration a capture refers to 8094 /// appears in a first-private clause. This is expected to be used only with 8095 /// directives that start with 'target'. 8096 MappableExprsHandler::OpenMPOffloadMappingFlags 8097 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8098 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8099 8100 // A first private variable captured by reference will use only the 8101 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8102 // declaration is known as first-private in this handler. 8103 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8104 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8105 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8106 return MappableExprsHandler::OMP_MAP_ALWAYS | 8107 MappableExprsHandler::OMP_MAP_TO; 8108 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8109 return MappableExprsHandler::OMP_MAP_TO | 8110 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8111 return MappableExprsHandler::OMP_MAP_PRIVATE | 8112 MappableExprsHandler::OMP_MAP_TO; 8113 } 8114 return MappableExprsHandler::OMP_MAP_TO | 8115 MappableExprsHandler::OMP_MAP_FROM; 8116 } 8117 8118 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8119 // Rotate by getFlagMemberOffset() bits. 8120 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8121 << getFlagMemberOffset()); 8122 } 8123 8124 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8125 OpenMPOffloadMappingFlags MemberOfFlag) { 8126 // If the entry is PTR_AND_OBJ but has not been marked with the special 8127 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8128 // marked as MEMBER_OF. 8129 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8130 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8131 return; 8132 8133 // Reset the placeholder value to prepare the flag for the assignment of the 8134 // proper MEMBER_OF value. 8135 Flags &= ~OMP_MAP_MEMBER_OF; 8136 Flags |= MemberOfFlag; 8137 } 8138 8139 void getPlainLayout(const CXXRecordDecl *RD, 8140 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8141 bool AsBase) const { 8142 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8143 8144 llvm::StructType *St = 8145 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8146 8147 unsigned NumElements = St->getNumElements(); 8148 llvm::SmallVector< 8149 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8150 RecordLayout(NumElements); 8151 8152 // Fill bases. 8153 for (const auto &I : RD->bases()) { 8154 if (I.isVirtual()) 8155 continue; 8156 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8157 // Ignore empty bases. 8158 if (Base->isEmpty() || CGF.getContext() 8159 .getASTRecordLayout(Base) 8160 .getNonVirtualSize() 8161 .isZero()) 8162 continue; 8163 8164 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8165 RecordLayout[FieldIndex] = Base; 8166 } 8167 // Fill in virtual bases. 8168 for (const auto &I : RD->vbases()) { 8169 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8170 // Ignore empty bases. 8171 if (Base->isEmpty()) 8172 continue; 8173 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8174 if (RecordLayout[FieldIndex]) 8175 continue; 8176 RecordLayout[FieldIndex] = Base; 8177 } 8178 // Fill in all the fields. 8179 assert(!RD->isUnion() && "Unexpected union."); 8180 for (const auto *Field : RD->fields()) { 8181 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8182 // will fill in later.) 8183 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8184 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8185 RecordLayout[FieldIndex] = Field; 8186 } 8187 } 8188 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8189 &Data : RecordLayout) { 8190 if (Data.isNull()) 8191 continue; 8192 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8193 getPlainLayout(Base, Layout, /*AsBase=*/true); 8194 else 8195 Layout.push_back(Data.get<const FieldDecl *>()); 8196 } 8197 } 8198 8199 public: 8200 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8201 : CurDir(&Dir), CGF(CGF) { 8202 // Extract firstprivate clause information. 8203 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8204 for (const auto *D : C->varlists()) 8205 FirstPrivateDecls.try_emplace( 8206 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8207 // Extract implicit firstprivates from uses_allocators clauses. 8208 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8209 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8210 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8211 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8212 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8213 /*Implicit=*/true); 8214 else if (const auto *VD = dyn_cast<VarDecl>( 8215 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8216 ->getDecl())) 8217 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8218 } 8219 } 8220 // Extract device pointer clause information. 8221 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8222 for (auto L : C->component_lists()) 8223 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8224 } 8225 8226 /// Constructor for the declare mapper directive. 8227 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8228 : CurDir(&Dir), CGF(CGF) {} 8229 8230 /// Generate code for the combined entry if we have a partially mapped struct 8231 /// and take care of the mapping flags of the arguments corresponding to 8232 /// individual struct members. 8233 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8234 MapFlagsArrayTy &CurTypes, 8235 const StructRangeInfoTy &PartialStruct, 8236 const ValueDecl *VD = nullptr, 8237 bool NotTargetParams = false) const { 8238 if (CurTypes.size() == 1 && 8239 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8240 !PartialStruct.IsArraySection) 8241 return; 8242 CombinedInfo.Exprs.push_back(VD); 8243 // Base is the base of the struct 8244 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8245 // Pointer is the address of the lowest element 8246 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8247 CombinedInfo.Pointers.push_back(LB); 8248 // There should not be a mapper for a combined entry. 8249 CombinedInfo.Mappers.push_back(nullptr); 8250 // Size is (addr of {highest+1} element) - (addr of lowest element) 8251 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8252 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8253 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8254 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8255 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8256 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8257 /*isSigned=*/false); 8258 CombinedInfo.Sizes.push_back(Size); 8259 // Map type is always TARGET_PARAM, if generate info for captures. 8260 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8261 : OMP_MAP_TARGET_PARAM); 8262 // If any element has the present modifier, then make sure the runtime 8263 // doesn't attempt to allocate the struct. 8264 if (CurTypes.end() != 8265 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8266 return Type & OMP_MAP_PRESENT; 8267 })) 8268 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8269 // Remove TARGET_PARAM flag from the first element if any. 8270 if (!CurTypes.empty()) 8271 CurTypes.front() &= ~OMP_MAP_TARGET_PARAM; 8272 8273 // All other current entries will be MEMBER_OF the combined entry 8274 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8275 // 0xFFFF in the MEMBER_OF field). 8276 OpenMPOffloadMappingFlags MemberOfFlag = 8277 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8278 for (auto &M : CurTypes) 8279 setCorrectMemberOfFlag(M, MemberOfFlag); 8280 } 8281 8282 /// Generate all the base pointers, section pointers, sizes, map types, and 8283 /// mappers for the extracted mappable expressions (all included in \a 8284 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8285 /// pair of the relevant declaration and index where it occurs is appended to 8286 /// the device pointers info array. 8287 void generateAllInfo( 8288 MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false, 8289 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8290 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8291 // We have to process the component lists that relate with the same 8292 // declaration in a single chunk so that we can generate the map flags 8293 // correctly. Therefore, we organize all lists in a map. 8294 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8295 8296 // Helper function to fill the information map for the different supported 8297 // clauses. 8298 auto &&InfoGen = 8299 [&Info, &SkipVarSet]( 8300 const ValueDecl *D, 8301 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8302 OpenMPMapClauseKind MapType, 8303 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8304 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8305 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8306 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8307 const ValueDecl *VD = 8308 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8309 if (SkipVarSet.count(VD)) 8310 return; 8311 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 8312 ReturnDevicePointer, IsImplicit, Mapper, VarRef, 8313 ForDeviceAddr); 8314 }; 8315 8316 assert(CurDir.is<const OMPExecutableDirective *>() && 8317 "Expect a executable directive"); 8318 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8319 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8320 const auto *EI = C->getVarRefs().begin(); 8321 for (const auto L : C->component_lists()) { 8322 // The Expression is not correct if the mapping is implicit 8323 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8324 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8325 C->getMapTypeModifiers(), llvm::None, 8326 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8327 E); 8328 ++EI; 8329 } 8330 } 8331 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) { 8332 const auto *EI = C->getVarRefs().begin(); 8333 for (const auto L : C->component_lists()) { 8334 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8335 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8336 C->isImplicit(), std::get<2>(L), *EI); 8337 ++EI; 8338 } 8339 } 8340 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) { 8341 const auto *EI = C->getVarRefs().begin(); 8342 for (const auto L : C->component_lists()) { 8343 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8344 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8345 C->isImplicit(), std::get<2>(L), *EI); 8346 ++EI; 8347 } 8348 } 8349 8350 // Look at the use_device_ptr clause information and mark the existing map 8351 // entries as such. If there is no map information for an entry in the 8352 // use_device_ptr list, we create one with map type 'alloc' and zero size 8353 // section. It is the user fault if that was not mapped before. If there is 8354 // no map information and the pointer is a struct member, then we defer the 8355 // emission of that entry until the whole struct has been processed. 8356 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8357 DeferredInfo; 8358 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8359 8360 for (const auto *C : 8361 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8362 for (const auto L : C->component_lists()) { 8363 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8364 std::get<1>(L); 8365 assert(!Components.empty() && 8366 "Not expecting empty list of components!"); 8367 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8368 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8369 const Expr *IE = Components.back().getAssociatedExpression(); 8370 // If the first component is a member expression, we have to look into 8371 // 'this', which maps to null in the map of map information. Otherwise 8372 // look directly for the information. 8373 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8374 8375 // We potentially have map information for this declaration already. 8376 // Look for the first set of components that refer to it. 8377 if (It != Info.end()) { 8378 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8379 return MI.Components.back().getAssociatedDeclaration() == VD; 8380 }); 8381 // If we found a map entry, signal that the pointer has to be returned 8382 // and move on to the next declaration. 8383 // Exclude cases where the base pointer is mapped as array subscript, 8384 // array section or array shaping. The base address is passed as a 8385 // pointer to base in this case and cannot be used as a base for 8386 // use_device_ptr list item. 8387 if (CI != It->second.end()) { 8388 auto PrevCI = std::next(CI->Components.rbegin()); 8389 const auto *VarD = dyn_cast<VarDecl>(VD); 8390 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8391 isa<MemberExpr>(IE) || 8392 !VD->getType().getNonReferenceType()->isPointerType() || 8393 PrevCI == CI->Components.rend() || 8394 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8395 VarD->hasLocalStorage()) { 8396 CI->ReturnDevicePointer = true; 8397 continue; 8398 } 8399 } 8400 } 8401 8402 // We didn't find any match in our map information - generate a zero 8403 // size array section - if the pointer is a struct member we defer this 8404 // action until the whole struct has been processed. 8405 if (isa<MemberExpr>(IE)) { 8406 // Insert the pointer into Info to be processed by 8407 // generateInfoForComponentList. Because it is a member pointer 8408 // without a pointee, no entry will be generated for it, therefore 8409 // we need to generate one after the whole struct has been processed. 8410 // Nonetheless, generateInfoForComponentList must be called to take 8411 // the pointer into account for the calculation of the range of the 8412 // partial struct. 8413 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8414 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8415 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8416 } else { 8417 llvm::Value *Ptr = 8418 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8419 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8420 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8421 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8422 UseDevicePtrCombinedInfo.Sizes.push_back( 8423 llvm::Constant::getNullValue(CGF.Int64Ty)); 8424 UseDevicePtrCombinedInfo.Types.push_back( 8425 OMP_MAP_RETURN_PARAM | 8426 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8427 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8428 } 8429 } 8430 } 8431 8432 // Look at the use_device_addr clause information and mark the existing map 8433 // entries as such. If there is no map information for an entry in the 8434 // use_device_addr list, we create one with map type 'alloc' and zero size 8435 // section. It is the user fault if that was not mapped before. If there is 8436 // no map information and the pointer is a struct member, then we defer the 8437 // emission of that entry until the whole struct has been processed. 8438 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8439 for (const auto *C : 8440 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8441 for (const auto L : C->component_lists()) { 8442 assert(!std::get<1>(L).empty() && 8443 "Not expecting empty list of components!"); 8444 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8445 if (!Processed.insert(VD).second) 8446 continue; 8447 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8448 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8449 // If the first component is a member expression, we have to look into 8450 // 'this', which maps to null in the map of map information. Otherwise 8451 // look directly for the information. 8452 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8453 8454 // We potentially have map information for this declaration already. 8455 // Look for the first set of components that refer to it. 8456 if (It != Info.end()) { 8457 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8458 return MI.Components.back().getAssociatedDeclaration() == VD; 8459 }); 8460 // If we found a map entry, signal that the pointer has to be returned 8461 // and move on to the next declaration. 8462 if (CI != It->second.end()) { 8463 CI->ReturnDevicePointer = true; 8464 continue; 8465 } 8466 } 8467 8468 // We didn't find any match in our map information - generate a zero 8469 // size array section - if the pointer is a struct member we defer this 8470 // action until the whole struct has been processed. 8471 if (isa<MemberExpr>(IE)) { 8472 // Insert the pointer into Info to be processed by 8473 // generateInfoForComponentList. Because it is a member pointer 8474 // without a pointee, no entry will be generated for it, therefore 8475 // we need to generate one after the whole struct has been processed. 8476 // Nonetheless, generateInfoForComponentList must be called to take 8477 // the pointer into account for the calculation of the range of the 8478 // partial struct. 8479 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8480 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8481 nullptr, nullptr, /*ForDeviceAddr=*/true); 8482 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8483 } else { 8484 llvm::Value *Ptr; 8485 if (IE->isGLValue()) 8486 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8487 else 8488 Ptr = CGF.EmitScalarExpr(IE); 8489 CombinedInfo.Exprs.push_back(VD); 8490 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8491 CombinedInfo.Pointers.push_back(Ptr); 8492 CombinedInfo.Sizes.push_back( 8493 llvm::Constant::getNullValue(CGF.Int64Ty)); 8494 CombinedInfo.Types.push_back( 8495 OMP_MAP_RETURN_PARAM | 8496 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8497 CombinedInfo.Mappers.push_back(nullptr); 8498 } 8499 } 8500 } 8501 8502 for (const auto &M : Info) { 8503 // We need to know when we generate information for the first component 8504 // associated with a capture, because the mapping flags depend on it. 8505 bool IsFirstComponentList = !NotTargetParams; 8506 8507 // Underlying variable declaration used in the map clause. 8508 const ValueDecl *VD = std::get<0>(M); 8509 8510 // Temporary generated information. 8511 MapCombinedInfoTy CurInfo; 8512 StructRangeInfoTy PartialStruct; 8513 8514 for (const MapInfo &L : M.second) { 8515 assert(!L.Components.empty() && 8516 "Not expecting declaration with no component lists."); 8517 8518 // Remember the current base pointer index. 8519 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8520 CurInfo.NonContigInfo.IsNonContiguous = 8521 L.Components.back().isNonContiguous(); 8522 generateInfoForComponentList( 8523 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, 8524 PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper, 8525 L.ForDeviceAddr, VD, L.VarRef); 8526 8527 // If this entry relates with a device pointer, set the relevant 8528 // declaration and add the 'return pointer' flag. 8529 if (L.ReturnDevicePointer) { 8530 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8531 "Unexpected number of mapped base pointers."); 8532 8533 const ValueDecl *RelevantVD = 8534 L.Components.back().getAssociatedDeclaration(); 8535 assert(RelevantVD && 8536 "No relevant declaration related with device pointer??"); 8537 8538 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8539 RelevantVD); 8540 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8541 } 8542 IsFirstComponentList = false; 8543 } 8544 8545 // Append any pending zero-length pointers which are struct members and 8546 // used with use_device_ptr or use_device_addr. 8547 auto CI = DeferredInfo.find(M.first); 8548 if (CI != DeferredInfo.end()) { 8549 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8550 llvm::Value *BasePtr; 8551 llvm::Value *Ptr; 8552 if (L.ForDeviceAddr) { 8553 if (L.IE->isGLValue()) 8554 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8555 else 8556 Ptr = this->CGF.EmitScalarExpr(L.IE); 8557 BasePtr = Ptr; 8558 // Entry is RETURN_PARAM. Also, set the placeholder value 8559 // MEMBER_OF=FFFF so that the entry is later updated with the 8560 // correct value of MEMBER_OF. 8561 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8562 } else { 8563 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8564 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8565 L.IE->getExprLoc()); 8566 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8567 // value MEMBER_OF=FFFF so that the entry is later updated with the 8568 // correct value of MEMBER_OF. 8569 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8570 OMP_MAP_MEMBER_OF); 8571 } 8572 CurInfo.Exprs.push_back(L.VD); 8573 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8574 CurInfo.Pointers.push_back(Ptr); 8575 CurInfo.Sizes.push_back( 8576 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8577 CurInfo.Mappers.push_back(nullptr); 8578 } 8579 } 8580 8581 // If there is an entry in PartialStruct it means we have a struct with 8582 // individual members mapped. Emit an extra combined entry. 8583 if (PartialStruct.Base.isValid()) 8584 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD, 8585 NotTargetParams); 8586 8587 // We need to append the results of this capture to what we already have. 8588 CombinedInfo.append(CurInfo); 8589 } 8590 // Append data for use_device_ptr clauses. 8591 CombinedInfo.append(UseDevicePtrCombinedInfo); 8592 } 8593 8594 /// Generate all the base pointers, section pointers, sizes, map types, and 8595 /// mappers for the extracted map clauses of user-defined mapper (all included 8596 /// in \a CombinedInfo). 8597 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8598 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8599 "Expect a declare mapper directive"); 8600 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8601 // We have to process the component lists that relate with the same 8602 // declaration in a single chunk so that we can generate the map flags 8603 // correctly. Therefore, we organize all lists in a map. 8604 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8605 8606 // Fill the information map for map clauses. 8607 for (const auto *C : CurMapperDir->clauselists()) { 8608 const auto *MC = cast<OMPMapClause>(C); 8609 const auto *EI = MC->getVarRefs().begin(); 8610 for (const auto L : MC->component_lists()) { 8611 // The Expression is not correct if the mapping is implicit 8612 const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr; 8613 const ValueDecl *VD = 8614 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8615 : nullptr; 8616 // Get the corresponding user-defined mapper. 8617 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8618 MC->getMapTypeModifiers(), llvm::None, 8619 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8620 std::get<2>(L), E); 8621 ++EI; 8622 } 8623 } 8624 8625 for (const auto &M : Info) { 8626 // We need to know when we generate information for the first component 8627 // associated with a capture, because the mapping flags depend on it. 8628 bool IsFirstComponentList = true; 8629 8630 // Underlying variable declaration used in the map clause. 8631 const ValueDecl *VD = std::get<0>(M); 8632 8633 // Temporary generated information. 8634 MapCombinedInfoTy CurInfo; 8635 StructRangeInfoTy PartialStruct; 8636 8637 for (const MapInfo &L : M.second) { 8638 assert(!L.Components.empty() && 8639 "Not expecting declaration with no component lists."); 8640 generateInfoForComponentList( 8641 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, 8642 PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper, 8643 L.ForDeviceAddr, VD, L.VarRef); 8644 IsFirstComponentList = false; 8645 } 8646 8647 // If there is an entry in PartialStruct it means we have a struct with 8648 // individual members mapped. Emit an extra combined entry. 8649 if (PartialStruct.Base.isValid()) { 8650 CurInfo.NonContigInfo.Dims.push_back(0); 8651 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8652 } 8653 8654 // We need to append the results of this capture to what we already have. 8655 CombinedInfo.append(CurInfo); 8656 } 8657 } 8658 8659 /// Emit capture info for lambdas for variables captured by reference. 8660 void generateInfoForLambdaCaptures( 8661 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8662 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8663 const auto *RD = VD->getType() 8664 .getCanonicalType() 8665 .getNonReferenceType() 8666 ->getAsCXXRecordDecl(); 8667 if (!RD || !RD->isLambda()) 8668 return; 8669 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8670 LValue VDLVal = CGF.MakeAddrLValue( 8671 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8672 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8673 FieldDecl *ThisCapture = nullptr; 8674 RD->getCaptureFields(Captures, ThisCapture); 8675 if (ThisCapture) { 8676 LValue ThisLVal = 8677 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8678 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8679 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8680 VDLVal.getPointer(CGF)); 8681 CombinedInfo.Exprs.push_back(VD); 8682 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8683 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8684 CombinedInfo.Sizes.push_back( 8685 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8686 CGF.Int64Ty, /*isSigned=*/true)); 8687 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8688 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8689 CombinedInfo.Mappers.push_back(nullptr); 8690 } 8691 for (const LambdaCapture &LC : RD->captures()) { 8692 if (!LC.capturesVariable()) 8693 continue; 8694 const VarDecl *VD = LC.getCapturedVar(); 8695 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8696 continue; 8697 auto It = Captures.find(VD); 8698 assert(It != Captures.end() && "Found lambda capture without field."); 8699 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8700 if (LC.getCaptureKind() == LCK_ByRef) { 8701 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8702 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8703 VDLVal.getPointer(CGF)); 8704 CombinedInfo.Exprs.push_back(VD); 8705 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8706 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8707 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8708 CGF.getTypeSize( 8709 VD->getType().getCanonicalType().getNonReferenceType()), 8710 CGF.Int64Ty, /*isSigned=*/true)); 8711 } else { 8712 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8713 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8714 VDLVal.getPointer(CGF)); 8715 CombinedInfo.Exprs.push_back(VD); 8716 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8717 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8718 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8719 } 8720 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8721 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8722 CombinedInfo.Mappers.push_back(nullptr); 8723 } 8724 } 8725 8726 /// Set correct indices for lambdas captures. 8727 void adjustMemberOfForLambdaCaptures( 8728 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8729 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8730 MapFlagsArrayTy &Types) const { 8731 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8732 // Set correct member_of idx for all implicit lambda captures. 8733 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8734 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8735 continue; 8736 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8737 assert(BasePtr && "Unable to find base lambda address."); 8738 int TgtIdx = -1; 8739 for (unsigned J = I; J > 0; --J) { 8740 unsigned Idx = J - 1; 8741 if (Pointers[Idx] != BasePtr) 8742 continue; 8743 TgtIdx = Idx; 8744 break; 8745 } 8746 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8747 // All other current entries will be MEMBER_OF the combined entry 8748 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8749 // 0xFFFF in the MEMBER_OF field). 8750 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8751 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8752 } 8753 } 8754 8755 /// Generate the base pointers, section pointers, sizes, map types, and 8756 /// mappers associated to a given capture (all included in \a CombinedInfo). 8757 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8758 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8759 StructRangeInfoTy &PartialStruct) const { 8760 assert(!Cap->capturesVariableArrayType() && 8761 "Not expecting to generate map info for a variable array type!"); 8762 8763 // We need to know when we generating information for the first component 8764 const ValueDecl *VD = Cap->capturesThis() 8765 ? nullptr 8766 : Cap->getCapturedVar()->getCanonicalDecl(); 8767 8768 // If this declaration appears in a is_device_ptr clause we just have to 8769 // pass the pointer by value. If it is a reference to a declaration, we just 8770 // pass its value. 8771 if (DevPointersMap.count(VD)) { 8772 CombinedInfo.Exprs.push_back(VD); 8773 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8774 CombinedInfo.Pointers.push_back(Arg); 8775 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8776 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8777 /*isSigned=*/true)); 8778 CombinedInfo.Types.push_back( 8779 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8780 OMP_MAP_TARGET_PARAM); 8781 CombinedInfo.Mappers.push_back(nullptr); 8782 return; 8783 } 8784 8785 using MapData = 8786 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8787 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8788 const ValueDecl *, const Expr *>; 8789 SmallVector<MapData, 4> DeclComponentLists; 8790 assert(CurDir.is<const OMPExecutableDirective *>() && 8791 "Expect a executable directive"); 8792 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8793 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8794 const auto *EI = C->getVarRefs().begin(); 8795 for (const auto L : C->decl_component_lists(VD)) { 8796 const ValueDecl *VDecl, *Mapper; 8797 // The Expression is not correct if the mapping is implicit 8798 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8799 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8800 std::tie(VDecl, Components, Mapper) = L; 8801 assert(VDecl == VD && "We got information for the wrong declaration??"); 8802 assert(!Components.empty() && 8803 "Not expecting declaration with no component lists."); 8804 DeclComponentLists.emplace_back(Components, C->getMapType(), 8805 C->getMapTypeModifiers(), 8806 C->isImplicit(), Mapper, E); 8807 ++EI; 8808 } 8809 } 8810 8811 // Find overlapping elements (including the offset from the base element). 8812 llvm::SmallDenseMap< 8813 const MapData *, 8814 llvm::SmallVector< 8815 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8816 4> 8817 OverlappedData; 8818 size_t Count = 0; 8819 for (const MapData &L : DeclComponentLists) { 8820 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8821 OpenMPMapClauseKind MapType; 8822 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8823 bool IsImplicit; 8824 const ValueDecl *Mapper; 8825 const Expr *VarRef; 8826 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8827 L; 8828 ++Count; 8829 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8830 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8831 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8832 VarRef) = L1; 8833 auto CI = Components.rbegin(); 8834 auto CE = Components.rend(); 8835 auto SI = Components1.rbegin(); 8836 auto SE = Components1.rend(); 8837 for (; CI != CE && SI != SE; ++CI, ++SI) { 8838 if (CI->getAssociatedExpression()->getStmtClass() != 8839 SI->getAssociatedExpression()->getStmtClass()) 8840 break; 8841 // Are we dealing with different variables/fields? 8842 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8843 break; 8844 } 8845 // Found overlapping if, at least for one component, reached the head of 8846 // the components list. 8847 if (CI == CE || SI == SE) { 8848 assert((CI != CE || SI != SE) && 8849 "Unexpected full match of the mapping components."); 8850 const MapData &BaseData = CI == CE ? L : L1; 8851 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8852 SI == SE ? Components : Components1; 8853 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8854 OverlappedElements.getSecond().push_back(SubData); 8855 } 8856 } 8857 } 8858 // Sort the overlapped elements for each item. 8859 llvm::SmallVector<const FieldDecl *, 4> Layout; 8860 if (!OverlappedData.empty()) { 8861 if (const auto *CRD = 8862 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8863 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8864 else { 8865 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8866 Layout.append(RD->field_begin(), RD->field_end()); 8867 } 8868 } 8869 for (auto &Pair : OverlappedData) { 8870 llvm::sort( 8871 Pair.getSecond(), 8872 [&Layout]( 8873 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8874 OMPClauseMappableExprCommon::MappableExprComponentListRef 8875 Second) { 8876 auto CI = First.rbegin(); 8877 auto CE = First.rend(); 8878 auto SI = Second.rbegin(); 8879 auto SE = Second.rend(); 8880 for (; CI != CE && SI != SE; ++CI, ++SI) { 8881 if (CI->getAssociatedExpression()->getStmtClass() != 8882 SI->getAssociatedExpression()->getStmtClass()) 8883 break; 8884 // Are we dealing with different variables/fields? 8885 if (CI->getAssociatedDeclaration() != 8886 SI->getAssociatedDeclaration()) 8887 break; 8888 } 8889 8890 // Lists contain the same elements. 8891 if (CI == CE && SI == SE) 8892 return false; 8893 8894 // List with less elements is less than list with more elements. 8895 if (CI == CE || SI == SE) 8896 return CI == CE; 8897 8898 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8899 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8900 if (FD1->getParent() == FD2->getParent()) 8901 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8902 const auto It = 8903 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8904 return FD == FD1 || FD == FD2; 8905 }); 8906 return *It == FD1; 8907 }); 8908 } 8909 8910 // Associated with a capture, because the mapping flags depend on it. 8911 // Go through all of the elements with the overlapped elements. 8912 for (const auto &Pair : OverlappedData) { 8913 const MapData &L = *Pair.getFirst(); 8914 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8915 OpenMPMapClauseKind MapType; 8916 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8917 bool IsImplicit; 8918 const ValueDecl *Mapper; 8919 const Expr *VarRef; 8920 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8921 L; 8922 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8923 OverlappedComponents = Pair.getSecond(); 8924 bool IsFirstComponentList = true; 8925 generateInfoForComponentList( 8926 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8927 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8928 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8929 } 8930 // Go through other elements without overlapped elements. 8931 bool IsFirstComponentList = OverlappedData.empty(); 8932 for (const MapData &L : DeclComponentLists) { 8933 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8934 OpenMPMapClauseKind MapType; 8935 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8936 bool IsImplicit; 8937 const ValueDecl *Mapper; 8938 const Expr *VarRef; 8939 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8940 L; 8941 auto It = OverlappedData.find(&L); 8942 if (It == OverlappedData.end()) 8943 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8944 Components, CombinedInfo, PartialStruct, 8945 IsFirstComponentList, IsImplicit, Mapper, 8946 /*ForDeviceAddr=*/false, VD, VarRef); 8947 IsFirstComponentList = false; 8948 } 8949 } 8950 8951 /// Generate the default map information for a given capture \a CI, 8952 /// record field declaration \a RI and captured value \a CV. 8953 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8954 const FieldDecl &RI, llvm::Value *CV, 8955 MapCombinedInfoTy &CombinedInfo) const { 8956 bool IsImplicit = true; 8957 // Do the default mapping. 8958 if (CI.capturesThis()) { 8959 CombinedInfo.Exprs.push_back(nullptr); 8960 CombinedInfo.BasePointers.push_back(CV); 8961 CombinedInfo.Pointers.push_back(CV); 8962 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8963 CombinedInfo.Sizes.push_back( 8964 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8965 CGF.Int64Ty, /*isSigned=*/true)); 8966 // Default map type. 8967 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8968 } else if (CI.capturesVariableByCopy()) { 8969 const VarDecl *VD = CI.getCapturedVar(); 8970 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8971 CombinedInfo.BasePointers.push_back(CV); 8972 CombinedInfo.Pointers.push_back(CV); 8973 if (!RI.getType()->isAnyPointerType()) { 8974 // We have to signal to the runtime captures passed by value that are 8975 // not pointers. 8976 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8977 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8978 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8979 } else { 8980 // Pointers are implicitly mapped with a zero size and no flags 8981 // (other than first map that is added for all implicit maps). 8982 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8983 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8984 } 8985 auto I = FirstPrivateDecls.find(VD); 8986 if (I != FirstPrivateDecls.end()) 8987 IsImplicit = I->getSecond(); 8988 } else { 8989 assert(CI.capturesVariable() && "Expected captured reference."); 8990 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8991 QualType ElementType = PtrTy->getPointeeType(); 8992 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8993 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8994 // The default map type for a scalar/complex type is 'to' because by 8995 // default the value doesn't have to be retrieved. For an aggregate 8996 // type, the default is 'tofrom'. 8997 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8998 const VarDecl *VD = CI.getCapturedVar(); 8999 auto I = FirstPrivateDecls.find(VD); 9000 if (I != FirstPrivateDecls.end() && 9001 VD->getType().isConstant(CGF.getContext())) { 9002 llvm::Constant *Addr = 9003 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9004 // Copy the value of the original variable to the new global copy. 9005 CGF.Builder.CreateMemCpy( 9006 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9007 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9008 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9009 // Use new global variable as the base pointers. 9010 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9011 CombinedInfo.BasePointers.push_back(Addr); 9012 CombinedInfo.Pointers.push_back(Addr); 9013 } else { 9014 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9015 CombinedInfo.BasePointers.push_back(CV); 9016 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9017 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9018 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9019 AlignmentSource::Decl)); 9020 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9021 } else { 9022 CombinedInfo.Pointers.push_back(CV); 9023 } 9024 } 9025 if (I != FirstPrivateDecls.end()) 9026 IsImplicit = I->getSecond(); 9027 } 9028 // Every default map produces a single argument which is a target parameter. 9029 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9030 9031 // Add flag stating this is an implicit map. 9032 if (IsImplicit) 9033 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9034 9035 // No user-defined mapper for default mapping. 9036 CombinedInfo.Mappers.push_back(nullptr); 9037 } 9038 }; 9039 } // anonymous namespace 9040 9041 static void emitNonContiguousDescriptor( 9042 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9043 CGOpenMPRuntime::TargetDataInfo &Info) { 9044 CodeGenModule &CGM = CGF.CGM; 9045 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9046 &NonContigInfo = CombinedInfo.NonContigInfo; 9047 9048 // Build an array of struct descriptor_dim and then assign it to 9049 // offload_args. 9050 // 9051 // struct descriptor_dim { 9052 // uint64_t offset; 9053 // uint64_t count; 9054 // uint64_t stride 9055 // }; 9056 ASTContext &C = CGF.getContext(); 9057 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9058 RecordDecl *RD; 9059 RD = C.buildImplicitRecord("descriptor_dim"); 9060 RD->startDefinition(); 9061 addFieldToRecordDecl(C, RD, Int64Ty); 9062 addFieldToRecordDecl(C, RD, Int64Ty); 9063 addFieldToRecordDecl(C, RD, Int64Ty); 9064 RD->completeDefinition(); 9065 QualType DimTy = C.getRecordType(RD); 9066 9067 enum { OffsetFD = 0, CountFD, StrideFD }; 9068 // We need two index variable here since the size of "Dims" is the same as the 9069 // size of Components, however, the size of offset, count, and stride is equal 9070 // to the size of base declaration that is non-contiguous. 9071 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9072 // Skip emitting ir if dimension size is 1 since it cannot be 9073 // non-contiguous. 9074 if (NonContigInfo.Dims[I] == 1) 9075 continue; 9076 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9077 QualType ArrayTy = 9078 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9079 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9080 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9081 unsigned RevIdx = EE - II - 1; 9082 LValue DimsLVal = CGF.MakeAddrLValue( 9083 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9084 // Offset 9085 LValue OffsetLVal = CGF.EmitLValueForField( 9086 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9087 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9088 // Count 9089 LValue CountLVal = CGF.EmitLValueForField( 9090 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9091 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9092 // Stride 9093 LValue StrideLVal = CGF.EmitLValueForField( 9094 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9095 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9096 } 9097 // args[I] = &dims 9098 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9099 DimsAddr, CGM.Int8PtrTy); 9100 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9101 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9102 Info.PointersArray, 0, I); 9103 Address PAddr(P, CGF.getPointerAlign()); 9104 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9105 ++L; 9106 } 9107 } 9108 9109 /// Emit a string constant containing the names of the values mapped to the 9110 /// offloading runtime library. 9111 llvm::Constant * 9112 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9113 MappableExprsHandler::MappingExprInfo &MapExprs) { 9114 llvm::Constant *SrcLocStr; 9115 if (!MapExprs.getMapDecl()) { 9116 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9117 } else { 9118 std::string ExprName = ""; 9119 if (MapExprs.getMapExpr()) { 9120 PrintingPolicy P(CGF.getContext().getLangOpts()); 9121 llvm::raw_string_ostream OS(ExprName); 9122 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9123 OS.flush(); 9124 } else { 9125 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9126 } 9127 9128 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9129 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9130 const char *FileName = PLoc.getFilename(); 9131 unsigned Line = PLoc.getLine(); 9132 unsigned Column = PLoc.getColumn(); 9133 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9134 Line, Column); 9135 } 9136 9137 return SrcLocStr; 9138 } 9139 9140 /// Emit the arrays used to pass the captures and map information to the 9141 /// offloading runtime library. If there is no map or capture information, 9142 /// return nullptr by reference. 9143 static void emitOffloadingArrays( 9144 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9145 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9146 bool IsNonContiguous = false) { 9147 CodeGenModule &CGM = CGF.CGM; 9148 ASTContext &Ctx = CGF.getContext(); 9149 9150 // Reset the array information. 9151 Info.clearArrayInfo(); 9152 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9153 9154 if (Info.NumberOfPtrs) { 9155 // Detect if we have any capture size requiring runtime evaluation of the 9156 // size so that a constant array could be eventually used. 9157 bool hasRuntimeEvaluationCaptureSize = false; 9158 for (llvm::Value *S : CombinedInfo.Sizes) 9159 if (!isa<llvm::Constant>(S)) { 9160 hasRuntimeEvaluationCaptureSize = true; 9161 break; 9162 } 9163 9164 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9165 QualType PointerArrayType = Ctx.getConstantArrayType( 9166 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9167 /*IndexTypeQuals=*/0); 9168 9169 Info.BasePointersArray = 9170 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9171 Info.PointersArray = 9172 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9173 Address MappersArray = 9174 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9175 Info.MappersArray = MappersArray.getPointer(); 9176 9177 // If we don't have any VLA types or other types that require runtime 9178 // evaluation, we can use a constant array for the map sizes, otherwise we 9179 // need to fill up the arrays as we do for the pointers. 9180 QualType Int64Ty = 9181 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9182 if (hasRuntimeEvaluationCaptureSize) { 9183 QualType SizeArrayType = Ctx.getConstantArrayType( 9184 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9185 /*IndexTypeQuals=*/0); 9186 Info.SizesArray = 9187 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9188 } else { 9189 // We expect all the sizes to be constant, so we collect them to create 9190 // a constant array. 9191 SmallVector<llvm::Constant *, 16> ConstSizes; 9192 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9193 if (IsNonContiguous && 9194 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9195 ConstSizes.push_back(llvm::ConstantInt::get( 9196 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9197 } else { 9198 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9199 } 9200 } 9201 9202 auto *SizesArrayInit = llvm::ConstantArray::get( 9203 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9204 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9205 auto *SizesArrayGbl = new llvm::GlobalVariable( 9206 CGM.getModule(), SizesArrayInit->getType(), 9207 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9208 SizesArrayInit, Name); 9209 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9210 Info.SizesArray = SizesArrayGbl; 9211 } 9212 9213 // The map types are always constant so we don't need to generate code to 9214 // fill arrays. Instead, we create an array constant. 9215 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9216 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9217 llvm::Constant *MapTypesArrayInit = 9218 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9219 std::string MaptypesName = 9220 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9221 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9222 CGM.getModule(), MapTypesArrayInit->getType(), 9223 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9224 MapTypesArrayInit, MaptypesName); 9225 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9226 Info.MapTypesArray = MapTypesArrayGbl; 9227 9228 // The information types are only built if there is debug information 9229 // requested. 9230 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9231 Info.MapNamesArray = llvm::Constant::getNullValue( 9232 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9233 } else { 9234 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9235 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9236 }; 9237 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9238 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9239 9240 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9241 llvm::ArrayType::get( 9242 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9243 CombinedInfo.Exprs.size()), 9244 InfoMap); 9245 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9246 CGM.getModule(), MapNamesArrayInit->getType(), 9247 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9248 MapNamesArrayInit, 9249 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9250 Info.MapNamesArray = MapNamesArrayGbl; 9251 } 9252 9253 // If there's a present map type modifier, it must not be applied to the end 9254 // of a region, so generate a separate map type array in that case. 9255 if (Info.separateBeginEndCalls()) { 9256 bool EndMapTypesDiffer = false; 9257 for (uint64_t &Type : Mapping) { 9258 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9259 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9260 EndMapTypesDiffer = true; 9261 } 9262 } 9263 if (EndMapTypesDiffer) { 9264 MapTypesArrayInit = 9265 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9266 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9267 MapTypesArrayGbl = new llvm::GlobalVariable( 9268 CGM.getModule(), MapTypesArrayInit->getType(), 9269 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9270 MapTypesArrayInit, MaptypesName); 9271 MapTypesArrayGbl->setUnnamedAddr( 9272 llvm::GlobalValue::UnnamedAddr::Global); 9273 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9274 } 9275 } 9276 9277 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9278 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9279 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9280 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9281 Info.BasePointersArray, 0, I); 9282 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9283 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9284 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9285 CGF.Builder.CreateStore(BPVal, BPAddr); 9286 9287 if (Info.requiresDevicePointerInfo()) 9288 if (const ValueDecl *DevVD = 9289 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9290 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9291 9292 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9293 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9294 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9295 Info.PointersArray, 0, I); 9296 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9297 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9298 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9299 CGF.Builder.CreateStore(PVal, PAddr); 9300 9301 if (hasRuntimeEvaluationCaptureSize) { 9302 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9303 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9304 Info.SizesArray, 9305 /*Idx0=*/0, 9306 /*Idx1=*/I); 9307 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9308 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9309 CGM.Int64Ty, 9310 /*isSigned=*/true), 9311 SAddr); 9312 } 9313 9314 // Fill up the mapper array. 9315 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9316 if (CombinedInfo.Mappers[I]) { 9317 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9318 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9319 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9320 Info.HasMapper = true; 9321 } 9322 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9323 CGF.Builder.CreateStore(MFunc, MAddr); 9324 } 9325 } 9326 9327 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9328 Info.NumberOfPtrs == 0) 9329 return; 9330 9331 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9332 } 9333 9334 namespace { 9335 /// Additional arguments for emitOffloadingArraysArgument function. 9336 struct ArgumentsOptions { 9337 bool ForEndCall = false; 9338 ArgumentsOptions() = default; 9339 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9340 }; 9341 } // namespace 9342 9343 /// Emit the arguments to be passed to the runtime library based on the 9344 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9345 /// ForEndCall, emit map types to be passed for the end of the region instead of 9346 /// the beginning. 9347 static void emitOffloadingArraysArgument( 9348 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9349 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9350 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9351 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9352 const ArgumentsOptions &Options = ArgumentsOptions()) { 9353 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9354 "expected region end call to runtime only when end call is separate"); 9355 CodeGenModule &CGM = CGF.CGM; 9356 if (Info.NumberOfPtrs) { 9357 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9358 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9359 Info.BasePointersArray, 9360 /*Idx0=*/0, /*Idx1=*/0); 9361 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9362 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9363 Info.PointersArray, 9364 /*Idx0=*/0, 9365 /*Idx1=*/0); 9366 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9367 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9368 /*Idx0=*/0, /*Idx1=*/0); 9369 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9370 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9371 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9372 : Info.MapTypesArray, 9373 /*Idx0=*/0, 9374 /*Idx1=*/0); 9375 9376 // Only emit the mapper information arrays if debug information is 9377 // requested. 9378 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9379 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9380 else 9381 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9382 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9383 Info.MapNamesArray, 9384 /*Idx0=*/0, 9385 /*Idx1=*/0); 9386 // If there is no user-defined mapper, set the mapper array to nullptr to 9387 // avoid an unnecessary data privatization 9388 if (!Info.HasMapper) 9389 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9390 else 9391 MappersArrayArg = 9392 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9393 } else { 9394 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9395 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9396 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9397 MapTypesArrayArg = 9398 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9399 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9400 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9401 } 9402 } 9403 9404 /// Check for inner distribute directive. 9405 static const OMPExecutableDirective * 9406 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9407 const auto *CS = D.getInnermostCapturedStmt(); 9408 const auto *Body = 9409 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9410 const Stmt *ChildStmt = 9411 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9412 9413 if (const auto *NestedDir = 9414 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9415 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9416 switch (D.getDirectiveKind()) { 9417 case OMPD_target: 9418 if (isOpenMPDistributeDirective(DKind)) 9419 return NestedDir; 9420 if (DKind == OMPD_teams) { 9421 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9422 /*IgnoreCaptured=*/true); 9423 if (!Body) 9424 return nullptr; 9425 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9426 if (const auto *NND = 9427 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9428 DKind = NND->getDirectiveKind(); 9429 if (isOpenMPDistributeDirective(DKind)) 9430 return NND; 9431 } 9432 } 9433 return nullptr; 9434 case OMPD_target_teams: 9435 if (isOpenMPDistributeDirective(DKind)) 9436 return NestedDir; 9437 return nullptr; 9438 case OMPD_target_parallel: 9439 case OMPD_target_simd: 9440 case OMPD_target_parallel_for: 9441 case OMPD_target_parallel_for_simd: 9442 return nullptr; 9443 case OMPD_target_teams_distribute: 9444 case OMPD_target_teams_distribute_simd: 9445 case OMPD_target_teams_distribute_parallel_for: 9446 case OMPD_target_teams_distribute_parallel_for_simd: 9447 case OMPD_parallel: 9448 case OMPD_for: 9449 case OMPD_parallel_for: 9450 case OMPD_parallel_master: 9451 case OMPD_parallel_sections: 9452 case OMPD_for_simd: 9453 case OMPD_parallel_for_simd: 9454 case OMPD_cancel: 9455 case OMPD_cancellation_point: 9456 case OMPD_ordered: 9457 case OMPD_threadprivate: 9458 case OMPD_allocate: 9459 case OMPD_task: 9460 case OMPD_simd: 9461 case OMPD_sections: 9462 case OMPD_section: 9463 case OMPD_single: 9464 case OMPD_master: 9465 case OMPD_critical: 9466 case OMPD_taskyield: 9467 case OMPD_barrier: 9468 case OMPD_taskwait: 9469 case OMPD_taskgroup: 9470 case OMPD_atomic: 9471 case OMPD_flush: 9472 case OMPD_depobj: 9473 case OMPD_scan: 9474 case OMPD_teams: 9475 case OMPD_target_data: 9476 case OMPD_target_exit_data: 9477 case OMPD_target_enter_data: 9478 case OMPD_distribute: 9479 case OMPD_distribute_simd: 9480 case OMPD_distribute_parallel_for: 9481 case OMPD_distribute_parallel_for_simd: 9482 case OMPD_teams_distribute: 9483 case OMPD_teams_distribute_simd: 9484 case OMPD_teams_distribute_parallel_for: 9485 case OMPD_teams_distribute_parallel_for_simd: 9486 case OMPD_target_update: 9487 case OMPD_declare_simd: 9488 case OMPD_declare_variant: 9489 case OMPD_begin_declare_variant: 9490 case OMPD_end_declare_variant: 9491 case OMPD_declare_target: 9492 case OMPD_end_declare_target: 9493 case OMPD_declare_reduction: 9494 case OMPD_declare_mapper: 9495 case OMPD_taskloop: 9496 case OMPD_taskloop_simd: 9497 case OMPD_master_taskloop: 9498 case OMPD_master_taskloop_simd: 9499 case OMPD_parallel_master_taskloop: 9500 case OMPD_parallel_master_taskloop_simd: 9501 case OMPD_requires: 9502 case OMPD_unknown: 9503 default: 9504 llvm_unreachable("Unexpected directive."); 9505 } 9506 } 9507 9508 return nullptr; 9509 } 9510 9511 /// Emit the user-defined mapper function. The code generation follows the 9512 /// pattern in the example below. 9513 /// \code 9514 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9515 /// void *base, void *begin, 9516 /// int64_t size, int64_t type) { 9517 /// // Allocate space for an array section first. 9518 /// if (size > 1 && !maptype.IsDelete) 9519 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9520 /// size*sizeof(Ty), clearToFrom(type)); 9521 /// // Map members. 9522 /// for (unsigned i = 0; i < size; i++) { 9523 /// // For each component specified by this mapper: 9524 /// for (auto c : all_components) { 9525 /// if (c.hasMapper()) 9526 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9527 /// c.arg_type); 9528 /// else 9529 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9530 /// c.arg_begin, c.arg_size, c.arg_type); 9531 /// } 9532 /// } 9533 /// // Delete the array section. 9534 /// if (size > 1 && maptype.IsDelete) 9535 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9536 /// size*sizeof(Ty), clearToFrom(type)); 9537 /// } 9538 /// \endcode 9539 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9540 CodeGenFunction *CGF) { 9541 if (UDMMap.count(D) > 0) 9542 return; 9543 ASTContext &C = CGM.getContext(); 9544 QualType Ty = D->getType(); 9545 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9546 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9547 auto *MapperVarDecl = 9548 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9549 SourceLocation Loc = D->getLocation(); 9550 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9551 9552 // Prepare mapper function arguments and attributes. 9553 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9554 C.VoidPtrTy, ImplicitParamDecl::Other); 9555 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9556 ImplicitParamDecl::Other); 9557 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9558 C.VoidPtrTy, ImplicitParamDecl::Other); 9559 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9560 ImplicitParamDecl::Other); 9561 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9562 ImplicitParamDecl::Other); 9563 FunctionArgList Args; 9564 Args.push_back(&HandleArg); 9565 Args.push_back(&BaseArg); 9566 Args.push_back(&BeginArg); 9567 Args.push_back(&SizeArg); 9568 Args.push_back(&TypeArg); 9569 const CGFunctionInfo &FnInfo = 9570 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9571 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9572 SmallString<64> TyStr; 9573 llvm::raw_svector_ostream Out(TyStr); 9574 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9575 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9576 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9577 Name, &CGM.getModule()); 9578 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9579 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9580 // Start the mapper function code generation. 9581 CodeGenFunction MapperCGF(CGM); 9582 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9583 // Compute the starting and end addreses of array elements. 9584 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9585 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9586 C.getPointerType(Int64Ty), Loc); 9587 // Convert the size in bytes into the number of array elements. 9588 Size = MapperCGF.Builder.CreateExactUDiv( 9589 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9590 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9591 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9592 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9593 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9594 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9595 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9596 C.getPointerType(Int64Ty), Loc); 9597 // Prepare common arguments for array initiation and deletion. 9598 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9599 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9600 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9601 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9602 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9603 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9604 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9605 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9606 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9607 9608 // Emit array initiation if this is an array section and \p MapType indicates 9609 // that memory allocation is required. 9610 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9611 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9612 ElementSize, HeadBB, /*IsInit=*/true); 9613 9614 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9615 9616 // Emit the loop header block. 9617 MapperCGF.EmitBlock(HeadBB); 9618 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9619 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9620 // Evaluate whether the initial condition is satisfied. 9621 llvm::Value *IsEmpty = 9622 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9623 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9624 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9625 9626 // Emit the loop body block. 9627 MapperCGF.EmitBlock(BodyBB); 9628 llvm::BasicBlock *LastBB = BodyBB; 9629 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9630 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9631 PtrPHI->addIncoming(PtrBegin, EntryBB); 9632 Address PtrCurrent = 9633 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9634 .getAlignment() 9635 .alignmentOfArrayElement(ElementSize)); 9636 // Privatize the declared variable of mapper to be the current array element. 9637 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9638 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9639 return MapperCGF 9640 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9641 .getAddress(MapperCGF); 9642 }); 9643 (void)Scope.Privatize(); 9644 9645 // Get map clause information. Fill up the arrays with all mapped variables. 9646 MappableExprsHandler::MapCombinedInfoTy Info; 9647 MappableExprsHandler MEHandler(*D, MapperCGF); 9648 MEHandler.generateAllInfoForMapper(Info); 9649 9650 // Call the runtime API __tgt_mapper_num_components to get the number of 9651 // pre-existing components. 9652 llvm::Value *OffloadingArgs[] = {Handle}; 9653 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9654 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9655 OMPRTL___tgt_mapper_num_components), 9656 OffloadingArgs); 9657 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9658 PreviousSize, 9659 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9660 9661 // Fill up the runtime mapper handle for all components. 9662 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9663 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9664 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9665 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9666 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9667 llvm::Value *CurSizeArg = Info.Sizes[I]; 9668 9669 // Extract the MEMBER_OF field from the map type. 9670 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9671 MapperCGF.EmitBlock(MemberBB); 9672 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9673 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9674 OriMapType, 9675 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9676 llvm::BasicBlock *MemberCombineBB = 9677 MapperCGF.createBasicBlock("omp.member.combine"); 9678 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9679 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9680 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9681 // Add the number of pre-existing components to the MEMBER_OF field if it 9682 // is valid. 9683 MapperCGF.EmitBlock(MemberCombineBB); 9684 llvm::Value *CombinedMember = 9685 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9686 // Do nothing if it is not a member of previous components. 9687 MapperCGF.EmitBlock(TypeBB); 9688 llvm::PHINode *MemberMapType = 9689 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9690 MemberMapType->addIncoming(OriMapType, MemberBB); 9691 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9692 9693 // Combine the map type inherited from user-defined mapper with that 9694 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9695 // bits of the \a MapType, which is the input argument of the mapper 9696 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9697 // bits of MemberMapType. 9698 // [OpenMP 5.0], 1.2.6. map-type decay. 9699 // | alloc | to | from | tofrom | release | delete 9700 // ---------------------------------------------------------- 9701 // alloc | alloc | alloc | alloc | alloc | release | delete 9702 // to | alloc | to | alloc | to | release | delete 9703 // from | alloc | alloc | from | from | release | delete 9704 // tofrom | alloc | to | from | tofrom | release | delete 9705 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9706 MapType, 9707 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9708 MappableExprsHandler::OMP_MAP_FROM)); 9709 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9710 llvm::BasicBlock *AllocElseBB = 9711 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9712 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9713 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9714 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9715 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9716 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9717 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9718 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9719 MapperCGF.EmitBlock(AllocBB); 9720 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9721 MemberMapType, 9722 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9723 MappableExprsHandler::OMP_MAP_FROM))); 9724 MapperCGF.Builder.CreateBr(EndBB); 9725 MapperCGF.EmitBlock(AllocElseBB); 9726 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9727 LeftToFrom, 9728 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9729 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9730 // In case of to, clear OMP_MAP_FROM. 9731 MapperCGF.EmitBlock(ToBB); 9732 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9733 MemberMapType, 9734 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9735 MapperCGF.Builder.CreateBr(EndBB); 9736 MapperCGF.EmitBlock(ToElseBB); 9737 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9738 LeftToFrom, 9739 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9740 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9741 // In case of from, clear OMP_MAP_TO. 9742 MapperCGF.EmitBlock(FromBB); 9743 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9744 MemberMapType, 9745 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9746 // In case of tofrom, do nothing. 9747 MapperCGF.EmitBlock(EndBB); 9748 LastBB = EndBB; 9749 llvm::PHINode *CurMapType = 9750 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9751 CurMapType->addIncoming(AllocMapType, AllocBB); 9752 CurMapType->addIncoming(ToMapType, ToBB); 9753 CurMapType->addIncoming(FromMapType, FromBB); 9754 CurMapType->addIncoming(MemberMapType, ToElseBB); 9755 9756 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9757 CurSizeArg, CurMapType}; 9758 if (Info.Mappers[I]) { 9759 // Call the corresponding mapper function. 9760 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9761 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9762 assert(MapperFunc && "Expect a valid mapper function is available."); 9763 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9764 } else { 9765 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9766 // data structure. 9767 MapperCGF.EmitRuntimeCall( 9768 OMPBuilder.getOrCreateRuntimeFunction( 9769 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9770 OffloadingArgs); 9771 } 9772 } 9773 9774 // Update the pointer to point to the next element that needs to be mapped, 9775 // and check whether we have mapped all elements. 9776 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9777 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9778 PtrPHI->addIncoming(PtrNext, LastBB); 9779 llvm::Value *IsDone = 9780 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9781 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9782 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9783 9784 MapperCGF.EmitBlock(ExitBB); 9785 // Emit array deletion if this is an array section and \p MapType indicates 9786 // that deletion is required. 9787 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9788 ElementSize, DoneBB, /*IsInit=*/false); 9789 9790 // Emit the function exit block. 9791 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9792 MapperCGF.FinishFunction(); 9793 UDMMap.try_emplace(D, Fn); 9794 if (CGF) { 9795 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9796 Decls.second.push_back(D); 9797 } 9798 } 9799 9800 /// Emit the array initialization or deletion portion for user-defined mapper 9801 /// code generation. First, it evaluates whether an array section is mapped and 9802 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9803 /// true, and \a MapType indicates to not delete this array, array 9804 /// initialization code is generated. If \a IsInit is false, and \a MapType 9805 /// indicates to not this array, array deletion code is generated. 9806 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9807 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9808 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9809 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9810 StringRef Prefix = IsInit ? ".init" : ".del"; 9811 9812 // Evaluate if this is an array section. 9813 llvm::BasicBlock *IsDeleteBB = 9814 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9815 llvm::BasicBlock *BodyBB = 9816 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9817 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9818 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9819 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9820 9821 // Evaluate if we are going to delete this section. 9822 MapperCGF.EmitBlock(IsDeleteBB); 9823 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9824 MapType, 9825 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9826 llvm::Value *DeleteCond; 9827 if (IsInit) { 9828 DeleteCond = MapperCGF.Builder.CreateIsNull( 9829 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9830 } else { 9831 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9832 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9833 } 9834 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9835 9836 MapperCGF.EmitBlock(BodyBB); 9837 // Get the array size by multiplying element size and element number (i.e., \p 9838 // Size). 9839 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9840 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9841 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9842 // memory allocation/deletion purpose only. 9843 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9844 MapType, 9845 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9846 MappableExprsHandler::OMP_MAP_FROM))); 9847 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9848 // data structure. 9849 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9850 MapperCGF.EmitRuntimeCall( 9851 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9852 OMPRTL___tgt_push_mapper_component), 9853 OffloadingArgs); 9854 } 9855 9856 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9857 const OMPDeclareMapperDecl *D) { 9858 auto I = UDMMap.find(D); 9859 if (I != UDMMap.end()) 9860 return I->second; 9861 emitUserDefinedMapper(D); 9862 return UDMMap.lookup(D); 9863 } 9864 9865 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9866 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9867 llvm::Value *DeviceID, 9868 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9869 const OMPLoopDirective &D)> 9870 SizeEmitter) { 9871 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9872 const OMPExecutableDirective *TD = &D; 9873 // Get nested teams distribute kind directive, if any. 9874 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9875 TD = getNestedDistributeDirective(CGM.getContext(), D); 9876 if (!TD) 9877 return; 9878 const auto *LD = cast<OMPLoopDirective>(TD); 9879 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 9880 PrePostActionTy &) { 9881 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9882 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9883 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 9884 CGF.EmitRuntimeCall( 9885 OMPBuilder.getOrCreateRuntimeFunction( 9886 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9887 Args); 9888 } 9889 }; 9890 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9891 } 9892 9893 void CGOpenMPRuntime::emitTargetCall( 9894 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9895 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9896 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9897 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9898 const OMPLoopDirective &D)> 9899 SizeEmitter) { 9900 if (!CGF.HaveInsertPoint()) 9901 return; 9902 9903 assert(OutlinedFn && "Invalid outlined function!"); 9904 9905 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9906 D.hasClausesOfKind<OMPNowaitClause>(); 9907 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9908 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9909 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9910 PrePostActionTy &) { 9911 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9912 }; 9913 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9914 9915 CodeGenFunction::OMPTargetDataInfo InputInfo; 9916 llvm::Value *MapTypesArray = nullptr; 9917 llvm::Value *MapNamesArray = nullptr; 9918 // Fill up the pointer arrays and transfer execution to the device. 9919 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9920 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 9921 &CapturedVars, 9922 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9923 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9924 // Reverse offloading is not supported, so just execute on the host. 9925 if (RequiresOuterTask) { 9926 CapturedVars.clear(); 9927 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9928 } 9929 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9930 return; 9931 } 9932 9933 // On top of the arrays that were filled up, the target offloading call 9934 // takes as arguments the device id as well as the host pointer. The host 9935 // pointer is used by the runtime library to identify the current target 9936 // region, so it only has to be unique and not necessarily point to 9937 // anything. It could be the pointer to the outlined function that 9938 // implements the target region, but we aren't using that so that the 9939 // compiler doesn't need to keep that, and could therefore inline the host 9940 // function if proven worthwhile during optimization. 9941 9942 // From this point on, we need to have an ID of the target region defined. 9943 assert(OutlinedFnID && "Invalid outlined function ID!"); 9944 9945 // Emit device ID if any. 9946 llvm::Value *DeviceID; 9947 if (Device.getPointer()) { 9948 assert((Device.getInt() == OMPC_DEVICE_unknown || 9949 Device.getInt() == OMPC_DEVICE_device_num) && 9950 "Expected device_num modifier."); 9951 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9952 DeviceID = 9953 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9954 } else { 9955 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9956 } 9957 9958 // Emit the number of elements in the offloading arrays. 9959 llvm::Value *PointerNum = 9960 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9961 9962 // Return value of the runtime offloading call. 9963 llvm::Value *Return; 9964 9965 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9966 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9967 9968 // Source location for the ident struct 9969 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9970 9971 // Emit tripcount for the target loop-based directive. 9972 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9973 9974 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9975 // The target region is an outlined function launched by the runtime 9976 // via calls __tgt_target() or __tgt_target_teams(). 9977 // 9978 // __tgt_target() launches a target region with one team and one thread, 9979 // executing a serial region. This master thread may in turn launch 9980 // more threads within its team upon encountering a parallel region, 9981 // however, no additional teams can be launched on the device. 9982 // 9983 // __tgt_target_teams() launches a target region with one or more teams, 9984 // each with one or more threads. This call is required for target 9985 // constructs such as: 9986 // 'target teams' 9987 // 'target' / 'teams' 9988 // 'target teams distribute parallel for' 9989 // 'target parallel' 9990 // and so on. 9991 // 9992 // Note that on the host and CPU targets, the runtime implementation of 9993 // these calls simply call the outlined function without forking threads. 9994 // The outlined functions themselves have runtime calls to 9995 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9996 // the compiler in emitTeamsCall() and emitParallelCall(). 9997 // 9998 // In contrast, on the NVPTX target, the implementation of 9999 // __tgt_target_teams() launches a GPU kernel with the requested number 10000 // of teams and threads so no additional calls to the runtime are required. 10001 if (NumTeams) { 10002 // If we have NumTeams defined this means that we have an enclosed teams 10003 // region. Therefore we also expect to have NumThreads defined. These two 10004 // values should be defined in the presence of a teams directive, 10005 // regardless of having any clauses associated. If the user is using teams 10006 // but no clauses, these two values will be the default that should be 10007 // passed to the runtime library - a 32-bit integer with the value zero. 10008 assert(NumThreads && "Thread limit expression should be available along " 10009 "with number of teams."); 10010 llvm::Value *OffloadingArgs[] = {RTLoc, 10011 DeviceID, 10012 OutlinedFnID, 10013 PointerNum, 10014 InputInfo.BasePointersArray.getPointer(), 10015 InputInfo.PointersArray.getPointer(), 10016 InputInfo.SizesArray.getPointer(), 10017 MapTypesArray, 10018 MapNamesArray, 10019 InputInfo.MappersArray.getPointer(), 10020 NumTeams, 10021 NumThreads}; 10022 Return = CGF.EmitRuntimeCall( 10023 OMPBuilder.getOrCreateRuntimeFunction( 10024 CGM.getModule(), HasNowait 10025 ? OMPRTL___tgt_target_teams_nowait_mapper 10026 : OMPRTL___tgt_target_teams_mapper), 10027 OffloadingArgs); 10028 } else { 10029 llvm::Value *OffloadingArgs[] = {RTLoc, 10030 DeviceID, 10031 OutlinedFnID, 10032 PointerNum, 10033 InputInfo.BasePointersArray.getPointer(), 10034 InputInfo.PointersArray.getPointer(), 10035 InputInfo.SizesArray.getPointer(), 10036 MapTypesArray, 10037 MapNamesArray, 10038 InputInfo.MappersArray.getPointer()}; 10039 Return = CGF.EmitRuntimeCall( 10040 OMPBuilder.getOrCreateRuntimeFunction( 10041 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10042 : OMPRTL___tgt_target_mapper), 10043 OffloadingArgs); 10044 } 10045 10046 // Check the error code and execute the host version if required. 10047 llvm::BasicBlock *OffloadFailedBlock = 10048 CGF.createBasicBlock("omp_offload.failed"); 10049 llvm::BasicBlock *OffloadContBlock = 10050 CGF.createBasicBlock("omp_offload.cont"); 10051 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10052 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10053 10054 CGF.EmitBlock(OffloadFailedBlock); 10055 if (RequiresOuterTask) { 10056 CapturedVars.clear(); 10057 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10058 } 10059 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10060 CGF.EmitBranch(OffloadContBlock); 10061 10062 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10063 }; 10064 10065 // Notify that the host version must be executed. 10066 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10067 RequiresOuterTask](CodeGenFunction &CGF, 10068 PrePostActionTy &) { 10069 if (RequiresOuterTask) { 10070 CapturedVars.clear(); 10071 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10072 } 10073 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10074 }; 10075 10076 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10077 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10078 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10079 // Fill up the arrays with all the captured variables. 10080 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10081 10082 // Get mappable expression information. 10083 MappableExprsHandler MEHandler(D, CGF); 10084 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10085 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10086 10087 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10088 auto CV = CapturedVars.begin(); 10089 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10090 CE = CS.capture_end(); 10091 CI != CE; ++CI, ++RI, ++CV) { 10092 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10093 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10094 10095 // VLA sizes are passed to the outlined region by copy and do not have map 10096 // information associated. 10097 if (CI->capturesVariableArrayType()) { 10098 CurInfo.Exprs.push_back(nullptr); 10099 CurInfo.BasePointers.push_back(*CV); 10100 CurInfo.Pointers.push_back(*CV); 10101 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10102 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10103 // Copy to the device as an argument. No need to retrieve it. 10104 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10105 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10106 MappableExprsHandler::OMP_MAP_IMPLICIT); 10107 CurInfo.Mappers.push_back(nullptr); 10108 } else { 10109 // If we have any information in the map clause, we use it, otherwise we 10110 // just do a default mapping. 10111 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10112 if (!CI->capturesThis()) 10113 MappedVarSet.insert(CI->getCapturedVar()); 10114 else 10115 MappedVarSet.insert(nullptr); 10116 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10117 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10118 // Generate correct mapping for variables captured by reference in 10119 // lambdas. 10120 if (CI->capturesVariable()) 10121 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10122 CurInfo, LambdaPointers); 10123 } 10124 // We expect to have at least an element of information for this capture. 10125 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10126 "Non-existing map pointer for capture!"); 10127 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10128 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10129 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10130 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10131 "Inconsistent map information sizes!"); 10132 10133 // If there is an entry in PartialStruct it means we have a struct with 10134 // individual members mapped. Emit an extra combined entry. 10135 if (PartialStruct.Base.isValid()) 10136 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 10137 10138 // We need to append the results of this capture to what we already have. 10139 CombinedInfo.append(CurInfo); 10140 } 10141 // Adjust MEMBER_OF flags for the lambdas captures. 10142 MEHandler.adjustMemberOfForLambdaCaptures( 10143 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10144 CombinedInfo.Types); 10145 // Map any list items in a map clause that were not captures because they 10146 // weren't referenced within the construct. 10147 MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true, 10148 MappedVarSet); 10149 10150 TargetDataInfo Info; 10151 // Fill up the arrays and create the arguments. 10152 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10153 emitOffloadingArraysArgument( 10154 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10155 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10156 {/*ForEndTask=*/false}); 10157 10158 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10159 InputInfo.BasePointersArray = 10160 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10161 InputInfo.PointersArray = 10162 Address(Info.PointersArray, CGM.getPointerAlign()); 10163 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10164 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10165 MapTypesArray = Info.MapTypesArray; 10166 MapNamesArray = Info.MapNamesArray; 10167 if (RequiresOuterTask) 10168 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10169 else 10170 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10171 }; 10172 10173 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10174 CodeGenFunction &CGF, PrePostActionTy &) { 10175 if (RequiresOuterTask) { 10176 CodeGenFunction::OMPTargetDataInfo InputInfo; 10177 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10178 } else { 10179 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10180 } 10181 }; 10182 10183 // If we have a target function ID it means that we need to support 10184 // offloading, otherwise, just execute on the host. We need to execute on host 10185 // regardless of the conditional in the if clause if, e.g., the user do not 10186 // specify target triples. 10187 if (OutlinedFnID) { 10188 if (IfCond) { 10189 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10190 } else { 10191 RegionCodeGenTy ThenRCG(TargetThenGen); 10192 ThenRCG(CGF); 10193 } 10194 } else { 10195 RegionCodeGenTy ElseRCG(TargetElseGen); 10196 ElseRCG(CGF); 10197 } 10198 } 10199 10200 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10201 StringRef ParentName) { 10202 if (!S) 10203 return; 10204 10205 // Codegen OMP target directives that offload compute to the device. 10206 bool RequiresDeviceCodegen = 10207 isa<OMPExecutableDirective>(S) && 10208 isOpenMPTargetExecutionDirective( 10209 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10210 10211 if (RequiresDeviceCodegen) { 10212 const auto &E = *cast<OMPExecutableDirective>(S); 10213 unsigned DeviceID; 10214 unsigned FileID; 10215 unsigned Line; 10216 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10217 FileID, Line); 10218 10219 // Is this a target region that should not be emitted as an entry point? If 10220 // so just signal we are done with this target region. 10221 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10222 ParentName, Line)) 10223 return; 10224 10225 switch (E.getDirectiveKind()) { 10226 case OMPD_target: 10227 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10228 cast<OMPTargetDirective>(E)); 10229 break; 10230 case OMPD_target_parallel: 10231 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10232 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10233 break; 10234 case OMPD_target_teams: 10235 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10236 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10237 break; 10238 case OMPD_target_teams_distribute: 10239 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10240 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10241 break; 10242 case OMPD_target_teams_distribute_simd: 10243 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10244 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10245 break; 10246 case OMPD_target_parallel_for: 10247 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10248 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10249 break; 10250 case OMPD_target_parallel_for_simd: 10251 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10252 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10253 break; 10254 case OMPD_target_simd: 10255 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10256 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10257 break; 10258 case OMPD_target_teams_distribute_parallel_for: 10259 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10260 CGM, ParentName, 10261 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10262 break; 10263 case OMPD_target_teams_distribute_parallel_for_simd: 10264 CodeGenFunction:: 10265 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10266 CGM, ParentName, 10267 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10268 break; 10269 case OMPD_parallel: 10270 case OMPD_for: 10271 case OMPD_parallel_for: 10272 case OMPD_parallel_master: 10273 case OMPD_parallel_sections: 10274 case OMPD_for_simd: 10275 case OMPD_parallel_for_simd: 10276 case OMPD_cancel: 10277 case OMPD_cancellation_point: 10278 case OMPD_ordered: 10279 case OMPD_threadprivate: 10280 case OMPD_allocate: 10281 case OMPD_task: 10282 case OMPD_simd: 10283 case OMPD_sections: 10284 case OMPD_section: 10285 case OMPD_single: 10286 case OMPD_master: 10287 case OMPD_critical: 10288 case OMPD_taskyield: 10289 case OMPD_barrier: 10290 case OMPD_taskwait: 10291 case OMPD_taskgroup: 10292 case OMPD_atomic: 10293 case OMPD_flush: 10294 case OMPD_depobj: 10295 case OMPD_scan: 10296 case OMPD_teams: 10297 case OMPD_target_data: 10298 case OMPD_target_exit_data: 10299 case OMPD_target_enter_data: 10300 case OMPD_distribute: 10301 case OMPD_distribute_simd: 10302 case OMPD_distribute_parallel_for: 10303 case OMPD_distribute_parallel_for_simd: 10304 case OMPD_teams_distribute: 10305 case OMPD_teams_distribute_simd: 10306 case OMPD_teams_distribute_parallel_for: 10307 case OMPD_teams_distribute_parallel_for_simd: 10308 case OMPD_target_update: 10309 case OMPD_declare_simd: 10310 case OMPD_declare_variant: 10311 case OMPD_begin_declare_variant: 10312 case OMPD_end_declare_variant: 10313 case OMPD_declare_target: 10314 case OMPD_end_declare_target: 10315 case OMPD_declare_reduction: 10316 case OMPD_declare_mapper: 10317 case OMPD_taskloop: 10318 case OMPD_taskloop_simd: 10319 case OMPD_master_taskloop: 10320 case OMPD_master_taskloop_simd: 10321 case OMPD_parallel_master_taskloop: 10322 case OMPD_parallel_master_taskloop_simd: 10323 case OMPD_requires: 10324 case OMPD_unknown: 10325 default: 10326 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10327 } 10328 return; 10329 } 10330 10331 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10332 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10333 return; 10334 10335 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10336 return; 10337 } 10338 10339 // If this is a lambda function, look into its body. 10340 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10341 S = L->getBody(); 10342 10343 // Keep looking for target regions recursively. 10344 for (const Stmt *II : S->children()) 10345 scanForTargetRegionsFunctions(II, ParentName); 10346 } 10347 10348 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10349 // If emitting code for the host, we do not process FD here. Instead we do 10350 // the normal code generation. 10351 if (!CGM.getLangOpts().OpenMPIsDevice) { 10352 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10353 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10354 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10355 // Do not emit device_type(nohost) functions for the host. 10356 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10357 return true; 10358 } 10359 return false; 10360 } 10361 10362 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10363 // Try to detect target regions in the function. 10364 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10365 StringRef Name = CGM.getMangledName(GD); 10366 scanForTargetRegionsFunctions(FD->getBody(), Name); 10367 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10368 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10369 // Do not emit device_type(nohost) functions for the host. 10370 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10371 return true; 10372 } 10373 10374 // Do not to emit function if it is not marked as declare target. 10375 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10376 AlreadyEmittedTargetDecls.count(VD) == 0; 10377 } 10378 10379 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10380 if (!CGM.getLangOpts().OpenMPIsDevice) 10381 return false; 10382 10383 // Check if there are Ctors/Dtors in this declaration and look for target 10384 // regions in it. We use the complete variant to produce the kernel name 10385 // mangling. 10386 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10387 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10388 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10389 StringRef ParentName = 10390 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10391 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10392 } 10393 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10394 StringRef ParentName = 10395 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10396 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10397 } 10398 } 10399 10400 // Do not to emit variable if it is not marked as declare target. 10401 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10402 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10403 cast<VarDecl>(GD.getDecl())); 10404 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10405 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10406 HasRequiresUnifiedSharedMemory)) { 10407 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10408 return true; 10409 } 10410 return false; 10411 } 10412 10413 llvm::Constant * 10414 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10415 const VarDecl *VD) { 10416 assert(VD->getType().isConstant(CGM.getContext()) && 10417 "Expected constant variable."); 10418 StringRef VarName; 10419 llvm::Constant *Addr; 10420 llvm::GlobalValue::LinkageTypes Linkage; 10421 QualType Ty = VD->getType(); 10422 SmallString<128> Buffer; 10423 { 10424 unsigned DeviceID; 10425 unsigned FileID; 10426 unsigned Line; 10427 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10428 FileID, Line); 10429 llvm::raw_svector_ostream OS(Buffer); 10430 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10431 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10432 VarName = OS.str(); 10433 } 10434 Linkage = llvm::GlobalValue::InternalLinkage; 10435 Addr = 10436 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10437 getDefaultFirstprivateAddressSpace()); 10438 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10439 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10440 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10441 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10442 VarName, Addr, VarSize, 10443 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10444 return Addr; 10445 } 10446 10447 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10448 llvm::Constant *Addr) { 10449 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10450 !CGM.getLangOpts().OpenMPIsDevice) 10451 return; 10452 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10453 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10454 if (!Res) { 10455 if (CGM.getLangOpts().OpenMPIsDevice) { 10456 // Register non-target variables being emitted in device code (debug info 10457 // may cause this). 10458 StringRef VarName = CGM.getMangledName(VD); 10459 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10460 } 10461 return; 10462 } 10463 // Register declare target variables. 10464 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10465 StringRef VarName; 10466 CharUnits VarSize; 10467 llvm::GlobalValue::LinkageTypes Linkage; 10468 10469 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10470 !HasRequiresUnifiedSharedMemory) { 10471 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10472 VarName = CGM.getMangledName(VD); 10473 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10474 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10475 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10476 } else { 10477 VarSize = CharUnits::Zero(); 10478 } 10479 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10480 // Temp solution to prevent optimizations of the internal variables. 10481 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10482 std::string RefName = getName({VarName, "ref"}); 10483 if (!CGM.GetGlobalValue(RefName)) { 10484 llvm::Constant *AddrRef = 10485 getOrCreateInternalVariable(Addr->getType(), RefName); 10486 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10487 GVAddrRef->setConstant(/*Val=*/true); 10488 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10489 GVAddrRef->setInitializer(Addr); 10490 CGM.addCompilerUsedGlobal(GVAddrRef); 10491 } 10492 } 10493 } else { 10494 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10495 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10496 HasRequiresUnifiedSharedMemory)) && 10497 "Declare target attribute must link or to with unified memory."); 10498 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10499 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10500 else 10501 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10502 10503 if (CGM.getLangOpts().OpenMPIsDevice) { 10504 VarName = Addr->getName(); 10505 Addr = nullptr; 10506 } else { 10507 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10508 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10509 } 10510 VarSize = CGM.getPointerSize(); 10511 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10512 } 10513 10514 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10515 VarName, Addr, VarSize, Flags, Linkage); 10516 } 10517 10518 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10519 if (isa<FunctionDecl>(GD.getDecl()) || 10520 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10521 return emitTargetFunctions(GD); 10522 10523 return emitTargetGlobalVariable(GD); 10524 } 10525 10526 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10527 for (const VarDecl *VD : DeferredGlobalVariables) { 10528 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10529 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10530 if (!Res) 10531 continue; 10532 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10533 !HasRequiresUnifiedSharedMemory) { 10534 CGM.EmitGlobal(VD); 10535 } else { 10536 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10537 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10538 HasRequiresUnifiedSharedMemory)) && 10539 "Expected link clause or to clause with unified memory."); 10540 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10541 } 10542 } 10543 } 10544 10545 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10546 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10547 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10548 " Expected target-based directive."); 10549 } 10550 10551 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10552 for (const OMPClause *Clause : D->clauselists()) { 10553 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10554 HasRequiresUnifiedSharedMemory = true; 10555 } else if (const auto *AC = 10556 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10557 switch (AC->getAtomicDefaultMemOrderKind()) { 10558 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10559 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10560 break; 10561 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10562 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10563 break; 10564 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10565 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10566 break; 10567 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10568 break; 10569 } 10570 } 10571 } 10572 } 10573 10574 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10575 return RequiresAtomicOrdering; 10576 } 10577 10578 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10579 LangAS &AS) { 10580 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10581 return false; 10582 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10583 switch(A->getAllocatorType()) { 10584 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10585 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10586 // Not supported, fallback to the default mem space. 10587 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10588 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10589 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10590 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10591 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10592 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10593 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10594 AS = LangAS::Default; 10595 return true; 10596 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10597 llvm_unreachable("Expected predefined allocator for the variables with the " 10598 "static storage."); 10599 } 10600 return false; 10601 } 10602 10603 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10604 return HasRequiresUnifiedSharedMemory; 10605 } 10606 10607 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10608 CodeGenModule &CGM) 10609 : CGM(CGM) { 10610 if (CGM.getLangOpts().OpenMPIsDevice) { 10611 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10612 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10613 } 10614 } 10615 10616 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10617 if (CGM.getLangOpts().OpenMPIsDevice) 10618 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10619 } 10620 10621 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10622 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10623 return true; 10624 10625 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10626 // Do not to emit function if it is marked as declare target as it was already 10627 // emitted. 10628 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10629 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10630 if (auto *F = dyn_cast_or_null<llvm::Function>( 10631 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10632 return !F->isDeclaration(); 10633 return false; 10634 } 10635 return true; 10636 } 10637 10638 return !AlreadyEmittedTargetDecls.insert(D).second; 10639 } 10640 10641 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10642 // If we don't have entries or if we are emitting code for the device, we 10643 // don't need to do anything. 10644 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10645 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10646 (OffloadEntriesInfoManager.empty() && 10647 !HasEmittedDeclareTargetRegion && 10648 !HasEmittedTargetRegion)) 10649 return nullptr; 10650 10651 // Create and register the function that handles the requires directives. 10652 ASTContext &C = CGM.getContext(); 10653 10654 llvm::Function *RequiresRegFn; 10655 { 10656 CodeGenFunction CGF(CGM); 10657 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10658 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10659 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10660 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10661 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10662 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10663 // TODO: check for other requires clauses. 10664 // The requires directive takes effect only when a target region is 10665 // present in the compilation unit. Otherwise it is ignored and not 10666 // passed to the runtime. This avoids the runtime from throwing an error 10667 // for mismatching requires clauses across compilation units that don't 10668 // contain at least 1 target region. 10669 assert((HasEmittedTargetRegion || 10670 HasEmittedDeclareTargetRegion || 10671 !OffloadEntriesInfoManager.empty()) && 10672 "Target or declare target region expected."); 10673 if (HasRequiresUnifiedSharedMemory) 10674 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10675 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10676 CGM.getModule(), OMPRTL___tgt_register_requires), 10677 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10678 CGF.FinishFunction(); 10679 } 10680 return RequiresRegFn; 10681 } 10682 10683 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10684 const OMPExecutableDirective &D, 10685 SourceLocation Loc, 10686 llvm::Function *OutlinedFn, 10687 ArrayRef<llvm::Value *> CapturedVars) { 10688 if (!CGF.HaveInsertPoint()) 10689 return; 10690 10691 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10692 CodeGenFunction::RunCleanupsScope Scope(CGF); 10693 10694 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10695 llvm::Value *Args[] = { 10696 RTLoc, 10697 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10698 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10699 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10700 RealArgs.append(std::begin(Args), std::end(Args)); 10701 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10702 10703 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10704 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10705 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10706 } 10707 10708 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10709 const Expr *NumTeams, 10710 const Expr *ThreadLimit, 10711 SourceLocation Loc) { 10712 if (!CGF.HaveInsertPoint()) 10713 return; 10714 10715 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10716 10717 llvm::Value *NumTeamsVal = 10718 NumTeams 10719 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10720 CGF.CGM.Int32Ty, /* isSigned = */ true) 10721 : CGF.Builder.getInt32(0); 10722 10723 llvm::Value *ThreadLimitVal = 10724 ThreadLimit 10725 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10726 CGF.CGM.Int32Ty, /* isSigned = */ true) 10727 : CGF.Builder.getInt32(0); 10728 10729 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10730 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10731 ThreadLimitVal}; 10732 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10733 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10734 PushNumTeamsArgs); 10735 } 10736 10737 void CGOpenMPRuntime::emitTargetDataCalls( 10738 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10739 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10740 if (!CGF.HaveInsertPoint()) 10741 return; 10742 10743 // Action used to replace the default codegen action and turn privatization 10744 // off. 10745 PrePostActionTy NoPrivAction; 10746 10747 // Generate the code for the opening of the data environment. Capture all the 10748 // arguments of the runtime call by reference because they are used in the 10749 // closing of the region. 10750 auto &&BeginThenGen = [this, &D, Device, &Info, 10751 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10752 // Fill up the arrays with all the mapped variables. 10753 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10754 10755 // Get map clause information. 10756 MappableExprsHandler MEHandler(D, CGF); 10757 MEHandler.generateAllInfo(CombinedInfo); 10758 10759 // Fill up the arrays and create the arguments. 10760 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10761 /*IsNonContiguous=*/true); 10762 10763 llvm::Value *BasePointersArrayArg = nullptr; 10764 llvm::Value *PointersArrayArg = nullptr; 10765 llvm::Value *SizesArrayArg = nullptr; 10766 llvm::Value *MapTypesArrayArg = nullptr; 10767 llvm::Value *MapNamesArrayArg = nullptr; 10768 llvm::Value *MappersArrayArg = nullptr; 10769 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10770 SizesArrayArg, MapTypesArrayArg, 10771 MapNamesArrayArg, MappersArrayArg, Info); 10772 10773 // Emit device ID if any. 10774 llvm::Value *DeviceID = nullptr; 10775 if (Device) { 10776 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10777 CGF.Int64Ty, /*isSigned=*/true); 10778 } else { 10779 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10780 } 10781 10782 // Emit the number of elements in the offloading arrays. 10783 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10784 // 10785 // Source location for the ident struct 10786 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10787 10788 llvm::Value *OffloadingArgs[] = {RTLoc, 10789 DeviceID, 10790 PointerNum, 10791 BasePointersArrayArg, 10792 PointersArrayArg, 10793 SizesArrayArg, 10794 MapTypesArrayArg, 10795 MapNamesArrayArg, 10796 MappersArrayArg}; 10797 CGF.EmitRuntimeCall( 10798 OMPBuilder.getOrCreateRuntimeFunction( 10799 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10800 OffloadingArgs); 10801 10802 // If device pointer privatization is required, emit the body of the region 10803 // here. It will have to be duplicated: with and without privatization. 10804 if (!Info.CaptureDeviceAddrMap.empty()) 10805 CodeGen(CGF); 10806 }; 10807 10808 // Generate code for the closing of the data region. 10809 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10810 PrePostActionTy &) { 10811 assert(Info.isValid() && "Invalid data environment closing arguments."); 10812 10813 llvm::Value *BasePointersArrayArg = nullptr; 10814 llvm::Value *PointersArrayArg = nullptr; 10815 llvm::Value *SizesArrayArg = nullptr; 10816 llvm::Value *MapTypesArrayArg = nullptr; 10817 llvm::Value *MapNamesArrayArg = nullptr; 10818 llvm::Value *MappersArrayArg = nullptr; 10819 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10820 SizesArrayArg, MapTypesArrayArg, 10821 MapNamesArrayArg, MappersArrayArg, Info, 10822 {/*ForEndCall=*/true}); 10823 10824 // Emit device ID if any. 10825 llvm::Value *DeviceID = nullptr; 10826 if (Device) { 10827 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10828 CGF.Int64Ty, /*isSigned=*/true); 10829 } else { 10830 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10831 } 10832 10833 // Emit the number of elements in the offloading arrays. 10834 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10835 10836 // Source location for the ident struct 10837 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10838 10839 llvm::Value *OffloadingArgs[] = {RTLoc, 10840 DeviceID, 10841 PointerNum, 10842 BasePointersArrayArg, 10843 PointersArrayArg, 10844 SizesArrayArg, 10845 MapTypesArrayArg, 10846 MapNamesArrayArg, 10847 MappersArrayArg}; 10848 CGF.EmitRuntimeCall( 10849 OMPBuilder.getOrCreateRuntimeFunction( 10850 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10851 OffloadingArgs); 10852 }; 10853 10854 // If we need device pointer privatization, we need to emit the body of the 10855 // region with no privatization in the 'else' branch of the conditional. 10856 // Otherwise, we don't have to do anything. 10857 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10858 PrePostActionTy &) { 10859 if (!Info.CaptureDeviceAddrMap.empty()) { 10860 CodeGen.setAction(NoPrivAction); 10861 CodeGen(CGF); 10862 } 10863 }; 10864 10865 // We don't have to do anything to close the region if the if clause evaluates 10866 // to false. 10867 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10868 10869 if (IfCond) { 10870 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10871 } else { 10872 RegionCodeGenTy RCG(BeginThenGen); 10873 RCG(CGF); 10874 } 10875 10876 // If we don't require privatization of device pointers, we emit the body in 10877 // between the runtime calls. This avoids duplicating the body code. 10878 if (Info.CaptureDeviceAddrMap.empty()) { 10879 CodeGen.setAction(NoPrivAction); 10880 CodeGen(CGF); 10881 } 10882 10883 if (IfCond) { 10884 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10885 } else { 10886 RegionCodeGenTy RCG(EndThenGen); 10887 RCG(CGF); 10888 } 10889 } 10890 10891 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10892 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10893 const Expr *Device) { 10894 if (!CGF.HaveInsertPoint()) 10895 return; 10896 10897 assert((isa<OMPTargetEnterDataDirective>(D) || 10898 isa<OMPTargetExitDataDirective>(D) || 10899 isa<OMPTargetUpdateDirective>(D)) && 10900 "Expecting either target enter, exit data, or update directives."); 10901 10902 CodeGenFunction::OMPTargetDataInfo InputInfo; 10903 llvm::Value *MapTypesArray = nullptr; 10904 llvm::Value *MapNamesArray = nullptr; 10905 // Generate the code for the opening of the data environment. 10906 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10907 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10908 // Emit device ID if any. 10909 llvm::Value *DeviceID = nullptr; 10910 if (Device) { 10911 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10912 CGF.Int64Ty, /*isSigned=*/true); 10913 } else { 10914 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10915 } 10916 10917 // Emit the number of elements in the offloading arrays. 10918 llvm::Constant *PointerNum = 10919 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10920 10921 // Source location for the ident struct 10922 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10923 10924 llvm::Value *OffloadingArgs[] = {RTLoc, 10925 DeviceID, 10926 PointerNum, 10927 InputInfo.BasePointersArray.getPointer(), 10928 InputInfo.PointersArray.getPointer(), 10929 InputInfo.SizesArray.getPointer(), 10930 MapTypesArray, 10931 MapNamesArray, 10932 InputInfo.MappersArray.getPointer()}; 10933 10934 // Select the right runtime function call for each standalone 10935 // directive. 10936 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10937 RuntimeFunction RTLFn; 10938 switch (D.getDirectiveKind()) { 10939 case OMPD_target_enter_data: 10940 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10941 : OMPRTL___tgt_target_data_begin_mapper; 10942 break; 10943 case OMPD_target_exit_data: 10944 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10945 : OMPRTL___tgt_target_data_end_mapper; 10946 break; 10947 case OMPD_target_update: 10948 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10949 : OMPRTL___tgt_target_data_update_mapper; 10950 break; 10951 case OMPD_parallel: 10952 case OMPD_for: 10953 case OMPD_parallel_for: 10954 case OMPD_parallel_master: 10955 case OMPD_parallel_sections: 10956 case OMPD_for_simd: 10957 case OMPD_parallel_for_simd: 10958 case OMPD_cancel: 10959 case OMPD_cancellation_point: 10960 case OMPD_ordered: 10961 case OMPD_threadprivate: 10962 case OMPD_allocate: 10963 case OMPD_task: 10964 case OMPD_simd: 10965 case OMPD_sections: 10966 case OMPD_section: 10967 case OMPD_single: 10968 case OMPD_master: 10969 case OMPD_critical: 10970 case OMPD_taskyield: 10971 case OMPD_barrier: 10972 case OMPD_taskwait: 10973 case OMPD_taskgroup: 10974 case OMPD_atomic: 10975 case OMPD_flush: 10976 case OMPD_depobj: 10977 case OMPD_scan: 10978 case OMPD_teams: 10979 case OMPD_target_data: 10980 case OMPD_distribute: 10981 case OMPD_distribute_simd: 10982 case OMPD_distribute_parallel_for: 10983 case OMPD_distribute_parallel_for_simd: 10984 case OMPD_teams_distribute: 10985 case OMPD_teams_distribute_simd: 10986 case OMPD_teams_distribute_parallel_for: 10987 case OMPD_teams_distribute_parallel_for_simd: 10988 case OMPD_declare_simd: 10989 case OMPD_declare_variant: 10990 case OMPD_begin_declare_variant: 10991 case OMPD_end_declare_variant: 10992 case OMPD_declare_target: 10993 case OMPD_end_declare_target: 10994 case OMPD_declare_reduction: 10995 case OMPD_declare_mapper: 10996 case OMPD_taskloop: 10997 case OMPD_taskloop_simd: 10998 case OMPD_master_taskloop: 10999 case OMPD_master_taskloop_simd: 11000 case OMPD_parallel_master_taskloop: 11001 case OMPD_parallel_master_taskloop_simd: 11002 case OMPD_target: 11003 case OMPD_target_simd: 11004 case OMPD_target_teams_distribute: 11005 case OMPD_target_teams_distribute_simd: 11006 case OMPD_target_teams_distribute_parallel_for: 11007 case OMPD_target_teams_distribute_parallel_for_simd: 11008 case OMPD_target_teams: 11009 case OMPD_target_parallel: 11010 case OMPD_target_parallel_for: 11011 case OMPD_target_parallel_for_simd: 11012 case OMPD_requires: 11013 case OMPD_unknown: 11014 default: 11015 llvm_unreachable("Unexpected standalone target data directive."); 11016 break; 11017 } 11018 CGF.EmitRuntimeCall( 11019 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11020 OffloadingArgs); 11021 }; 11022 11023 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11024 &MapNamesArray](CodeGenFunction &CGF, 11025 PrePostActionTy &) { 11026 // Fill up the arrays with all the mapped variables. 11027 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11028 11029 // Get map clause information. 11030 MappableExprsHandler MEHandler(D, CGF); 11031 MEHandler.generateAllInfo(CombinedInfo); 11032 11033 TargetDataInfo Info; 11034 // Fill up the arrays and create the arguments. 11035 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11036 /*IsNonContiguous=*/true); 11037 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11038 D.hasClausesOfKind<OMPNowaitClause>(); 11039 emitOffloadingArraysArgument( 11040 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11041 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11042 {/*ForEndTask=*/false}); 11043 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11044 InputInfo.BasePointersArray = 11045 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11046 InputInfo.PointersArray = 11047 Address(Info.PointersArray, CGM.getPointerAlign()); 11048 InputInfo.SizesArray = 11049 Address(Info.SizesArray, CGM.getPointerAlign()); 11050 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11051 MapTypesArray = Info.MapTypesArray; 11052 MapNamesArray = Info.MapNamesArray; 11053 if (RequiresOuterTask) 11054 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11055 else 11056 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11057 }; 11058 11059 if (IfCond) { 11060 emitIfClause(CGF, IfCond, TargetThenGen, 11061 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11062 } else { 11063 RegionCodeGenTy ThenRCG(TargetThenGen); 11064 ThenRCG(CGF); 11065 } 11066 } 11067 11068 namespace { 11069 /// Kind of parameter in a function with 'declare simd' directive. 11070 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11071 /// Attribute set of the parameter. 11072 struct ParamAttrTy { 11073 ParamKindTy Kind = Vector; 11074 llvm::APSInt StrideOrArg; 11075 llvm::APSInt Alignment; 11076 }; 11077 } // namespace 11078 11079 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11080 ArrayRef<ParamAttrTy> ParamAttrs) { 11081 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11082 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11083 // of that clause. The VLEN value must be power of 2. 11084 // In other case the notion of the function`s "characteristic data type" (CDT) 11085 // is used to compute the vector length. 11086 // CDT is defined in the following order: 11087 // a) For non-void function, the CDT is the return type. 11088 // b) If the function has any non-uniform, non-linear parameters, then the 11089 // CDT is the type of the first such parameter. 11090 // c) If the CDT determined by a) or b) above is struct, union, or class 11091 // type which is pass-by-value (except for the type that maps to the 11092 // built-in complex data type), the characteristic data type is int. 11093 // d) If none of the above three cases is applicable, the CDT is int. 11094 // The VLEN is then determined based on the CDT and the size of vector 11095 // register of that ISA for which current vector version is generated. The 11096 // VLEN is computed using the formula below: 11097 // VLEN = sizeof(vector_register) / sizeof(CDT), 11098 // where vector register size specified in section 3.2.1 Registers and the 11099 // Stack Frame of original AMD64 ABI document. 11100 QualType RetType = FD->getReturnType(); 11101 if (RetType.isNull()) 11102 return 0; 11103 ASTContext &C = FD->getASTContext(); 11104 QualType CDT; 11105 if (!RetType.isNull() && !RetType->isVoidType()) { 11106 CDT = RetType; 11107 } else { 11108 unsigned Offset = 0; 11109 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11110 if (ParamAttrs[Offset].Kind == Vector) 11111 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11112 ++Offset; 11113 } 11114 if (CDT.isNull()) { 11115 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11116 if (ParamAttrs[I + Offset].Kind == Vector) { 11117 CDT = FD->getParamDecl(I)->getType(); 11118 break; 11119 } 11120 } 11121 } 11122 } 11123 if (CDT.isNull()) 11124 CDT = C.IntTy; 11125 CDT = CDT->getCanonicalTypeUnqualified(); 11126 if (CDT->isRecordType() || CDT->isUnionType()) 11127 CDT = C.IntTy; 11128 return C.getTypeSize(CDT); 11129 } 11130 11131 static void 11132 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11133 const llvm::APSInt &VLENVal, 11134 ArrayRef<ParamAttrTy> ParamAttrs, 11135 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11136 struct ISADataTy { 11137 char ISA; 11138 unsigned VecRegSize; 11139 }; 11140 ISADataTy ISAData[] = { 11141 { 11142 'b', 128 11143 }, // SSE 11144 { 11145 'c', 256 11146 }, // AVX 11147 { 11148 'd', 256 11149 }, // AVX2 11150 { 11151 'e', 512 11152 }, // AVX512 11153 }; 11154 llvm::SmallVector<char, 2> Masked; 11155 switch (State) { 11156 case OMPDeclareSimdDeclAttr::BS_Undefined: 11157 Masked.push_back('N'); 11158 Masked.push_back('M'); 11159 break; 11160 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11161 Masked.push_back('N'); 11162 break; 11163 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11164 Masked.push_back('M'); 11165 break; 11166 } 11167 for (char Mask : Masked) { 11168 for (const ISADataTy &Data : ISAData) { 11169 SmallString<256> Buffer; 11170 llvm::raw_svector_ostream Out(Buffer); 11171 Out << "_ZGV" << Data.ISA << Mask; 11172 if (!VLENVal) { 11173 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11174 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11175 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11176 } else { 11177 Out << VLENVal; 11178 } 11179 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11180 switch (ParamAttr.Kind){ 11181 case LinearWithVarStride: 11182 Out << 's' << ParamAttr.StrideOrArg; 11183 break; 11184 case Linear: 11185 Out << 'l'; 11186 if (ParamAttr.StrideOrArg != 1) 11187 Out << ParamAttr.StrideOrArg; 11188 break; 11189 case Uniform: 11190 Out << 'u'; 11191 break; 11192 case Vector: 11193 Out << 'v'; 11194 break; 11195 } 11196 if (!!ParamAttr.Alignment) 11197 Out << 'a' << ParamAttr.Alignment; 11198 } 11199 Out << '_' << Fn->getName(); 11200 Fn->addFnAttr(Out.str()); 11201 } 11202 } 11203 } 11204 11205 // This are the Functions that are needed to mangle the name of the 11206 // vector functions generated by the compiler, according to the rules 11207 // defined in the "Vector Function ABI specifications for AArch64", 11208 // available at 11209 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11210 11211 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11212 /// 11213 /// TODO: Need to implement the behavior for reference marked with a 11214 /// var or no linear modifiers (1.b in the section). For this, we 11215 /// need to extend ParamKindTy to support the linear modifiers. 11216 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11217 QT = QT.getCanonicalType(); 11218 11219 if (QT->isVoidType()) 11220 return false; 11221 11222 if (Kind == ParamKindTy::Uniform) 11223 return false; 11224 11225 if (Kind == ParamKindTy::Linear) 11226 return false; 11227 11228 // TODO: Handle linear references with modifiers 11229 11230 if (Kind == ParamKindTy::LinearWithVarStride) 11231 return false; 11232 11233 return true; 11234 } 11235 11236 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11237 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11238 QT = QT.getCanonicalType(); 11239 unsigned Size = C.getTypeSize(QT); 11240 11241 // Only scalars and complex within 16 bytes wide set PVB to true. 11242 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11243 return false; 11244 11245 if (QT->isFloatingType()) 11246 return true; 11247 11248 if (QT->isIntegerType()) 11249 return true; 11250 11251 if (QT->isPointerType()) 11252 return true; 11253 11254 // TODO: Add support for complex types (section 3.1.2, item 2). 11255 11256 return false; 11257 } 11258 11259 /// Computes the lane size (LS) of a return type or of an input parameter, 11260 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11261 /// TODO: Add support for references, section 3.2.1, item 1. 11262 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11263 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11264 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11265 if (getAArch64PBV(PTy, C)) 11266 return C.getTypeSize(PTy); 11267 } 11268 if (getAArch64PBV(QT, C)) 11269 return C.getTypeSize(QT); 11270 11271 return C.getTypeSize(C.getUIntPtrType()); 11272 } 11273 11274 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11275 // signature of the scalar function, as defined in 3.2.2 of the 11276 // AAVFABI. 11277 static std::tuple<unsigned, unsigned, bool> 11278 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11279 QualType RetType = FD->getReturnType().getCanonicalType(); 11280 11281 ASTContext &C = FD->getASTContext(); 11282 11283 bool OutputBecomesInput = false; 11284 11285 llvm::SmallVector<unsigned, 8> Sizes; 11286 if (!RetType->isVoidType()) { 11287 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11288 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11289 OutputBecomesInput = true; 11290 } 11291 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11292 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11293 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11294 } 11295 11296 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11297 // The LS of a function parameter / return value can only be a power 11298 // of 2, starting from 8 bits, up to 128. 11299 assert(std::all_of(Sizes.begin(), Sizes.end(), 11300 [](unsigned Size) { 11301 return Size == 8 || Size == 16 || Size == 32 || 11302 Size == 64 || Size == 128; 11303 }) && 11304 "Invalid size"); 11305 11306 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11307 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11308 OutputBecomesInput); 11309 } 11310 11311 /// Mangle the parameter part of the vector function name according to 11312 /// their OpenMP classification. The mangling function is defined in 11313 /// section 3.5 of the AAVFABI. 11314 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11315 SmallString<256> Buffer; 11316 llvm::raw_svector_ostream Out(Buffer); 11317 for (const auto &ParamAttr : ParamAttrs) { 11318 switch (ParamAttr.Kind) { 11319 case LinearWithVarStride: 11320 Out << "ls" << ParamAttr.StrideOrArg; 11321 break; 11322 case Linear: 11323 Out << 'l'; 11324 // Don't print the step value if it is not present or if it is 11325 // equal to 1. 11326 if (ParamAttr.StrideOrArg != 1) 11327 Out << ParamAttr.StrideOrArg; 11328 break; 11329 case Uniform: 11330 Out << 'u'; 11331 break; 11332 case Vector: 11333 Out << 'v'; 11334 break; 11335 } 11336 11337 if (!!ParamAttr.Alignment) 11338 Out << 'a' << ParamAttr.Alignment; 11339 } 11340 11341 return std::string(Out.str()); 11342 } 11343 11344 // Function used to add the attribute. The parameter `VLEN` is 11345 // templated to allow the use of "x" when targeting scalable functions 11346 // for SVE. 11347 template <typename T> 11348 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11349 char ISA, StringRef ParSeq, 11350 StringRef MangledName, bool OutputBecomesInput, 11351 llvm::Function *Fn) { 11352 SmallString<256> Buffer; 11353 llvm::raw_svector_ostream Out(Buffer); 11354 Out << Prefix << ISA << LMask << VLEN; 11355 if (OutputBecomesInput) 11356 Out << "v"; 11357 Out << ParSeq << "_" << MangledName; 11358 Fn->addFnAttr(Out.str()); 11359 } 11360 11361 // Helper function to generate the Advanced SIMD names depending on 11362 // the value of the NDS when simdlen is not present. 11363 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11364 StringRef Prefix, char ISA, 11365 StringRef ParSeq, StringRef MangledName, 11366 bool OutputBecomesInput, 11367 llvm::Function *Fn) { 11368 switch (NDS) { 11369 case 8: 11370 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11371 OutputBecomesInput, Fn); 11372 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11373 OutputBecomesInput, Fn); 11374 break; 11375 case 16: 11376 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11377 OutputBecomesInput, Fn); 11378 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11379 OutputBecomesInput, Fn); 11380 break; 11381 case 32: 11382 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11383 OutputBecomesInput, Fn); 11384 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11385 OutputBecomesInput, Fn); 11386 break; 11387 case 64: 11388 case 128: 11389 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11390 OutputBecomesInput, Fn); 11391 break; 11392 default: 11393 llvm_unreachable("Scalar type is too wide."); 11394 } 11395 } 11396 11397 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11398 static void emitAArch64DeclareSimdFunction( 11399 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11400 ArrayRef<ParamAttrTy> ParamAttrs, 11401 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11402 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11403 11404 // Get basic data for building the vector signature. 11405 const auto Data = getNDSWDS(FD, ParamAttrs); 11406 const unsigned NDS = std::get<0>(Data); 11407 const unsigned WDS = std::get<1>(Data); 11408 const bool OutputBecomesInput = std::get<2>(Data); 11409 11410 // Check the values provided via `simdlen` by the user. 11411 // 1. A `simdlen(1)` doesn't produce vector signatures, 11412 if (UserVLEN == 1) { 11413 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11414 DiagnosticsEngine::Warning, 11415 "The clause simdlen(1) has no effect when targeting aarch64."); 11416 CGM.getDiags().Report(SLoc, DiagID); 11417 return; 11418 } 11419 11420 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11421 // Advanced SIMD output. 11422 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11423 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11424 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11425 "power of 2 when targeting Advanced SIMD."); 11426 CGM.getDiags().Report(SLoc, DiagID); 11427 return; 11428 } 11429 11430 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11431 // limits. 11432 if (ISA == 's' && UserVLEN != 0) { 11433 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11434 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11435 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11436 "lanes in the architectural constraints " 11437 "for SVE (min is 128-bit, max is " 11438 "2048-bit, by steps of 128-bit)"); 11439 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11440 return; 11441 } 11442 } 11443 11444 // Sort out parameter sequence. 11445 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11446 StringRef Prefix = "_ZGV"; 11447 // Generate simdlen from user input (if any). 11448 if (UserVLEN) { 11449 if (ISA == 's') { 11450 // SVE generates only a masked function. 11451 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11452 OutputBecomesInput, Fn); 11453 } else { 11454 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11455 // Advanced SIMD generates one or two functions, depending on 11456 // the `[not]inbranch` clause. 11457 switch (State) { 11458 case OMPDeclareSimdDeclAttr::BS_Undefined: 11459 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11460 OutputBecomesInput, Fn); 11461 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11462 OutputBecomesInput, Fn); 11463 break; 11464 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11465 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11466 OutputBecomesInput, Fn); 11467 break; 11468 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11469 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11470 OutputBecomesInput, Fn); 11471 break; 11472 } 11473 } 11474 } else { 11475 // If no user simdlen is provided, follow the AAVFABI rules for 11476 // generating the vector length. 11477 if (ISA == 's') { 11478 // SVE, section 3.4.1, item 1. 11479 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11480 OutputBecomesInput, Fn); 11481 } else { 11482 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11483 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11484 // two vector names depending on the use of the clause 11485 // `[not]inbranch`. 11486 switch (State) { 11487 case OMPDeclareSimdDeclAttr::BS_Undefined: 11488 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11489 OutputBecomesInput, Fn); 11490 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11491 OutputBecomesInput, Fn); 11492 break; 11493 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11494 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11495 OutputBecomesInput, Fn); 11496 break; 11497 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11498 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11499 OutputBecomesInput, Fn); 11500 break; 11501 } 11502 } 11503 } 11504 } 11505 11506 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11507 llvm::Function *Fn) { 11508 ASTContext &C = CGM.getContext(); 11509 FD = FD->getMostRecentDecl(); 11510 // Map params to their positions in function decl. 11511 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11512 if (isa<CXXMethodDecl>(FD)) 11513 ParamPositions.try_emplace(FD, 0); 11514 unsigned ParamPos = ParamPositions.size(); 11515 for (const ParmVarDecl *P : FD->parameters()) { 11516 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11517 ++ParamPos; 11518 } 11519 while (FD) { 11520 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11521 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11522 // Mark uniform parameters. 11523 for (const Expr *E : Attr->uniforms()) { 11524 E = E->IgnoreParenImpCasts(); 11525 unsigned Pos; 11526 if (isa<CXXThisExpr>(E)) { 11527 Pos = ParamPositions[FD]; 11528 } else { 11529 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11530 ->getCanonicalDecl(); 11531 Pos = ParamPositions[PVD]; 11532 } 11533 ParamAttrs[Pos].Kind = Uniform; 11534 } 11535 // Get alignment info. 11536 auto NI = Attr->alignments_begin(); 11537 for (const Expr *E : Attr->aligneds()) { 11538 E = E->IgnoreParenImpCasts(); 11539 unsigned Pos; 11540 QualType ParmTy; 11541 if (isa<CXXThisExpr>(E)) { 11542 Pos = ParamPositions[FD]; 11543 ParmTy = E->getType(); 11544 } else { 11545 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11546 ->getCanonicalDecl(); 11547 Pos = ParamPositions[PVD]; 11548 ParmTy = PVD->getType(); 11549 } 11550 ParamAttrs[Pos].Alignment = 11551 (*NI) 11552 ? (*NI)->EvaluateKnownConstInt(C) 11553 : llvm::APSInt::getUnsigned( 11554 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11555 .getQuantity()); 11556 ++NI; 11557 } 11558 // Mark linear parameters. 11559 auto SI = Attr->steps_begin(); 11560 auto MI = Attr->modifiers_begin(); 11561 for (const Expr *E : Attr->linears()) { 11562 E = E->IgnoreParenImpCasts(); 11563 unsigned Pos; 11564 // Rescaling factor needed to compute the linear parameter 11565 // value in the mangled name. 11566 unsigned PtrRescalingFactor = 1; 11567 if (isa<CXXThisExpr>(E)) { 11568 Pos = ParamPositions[FD]; 11569 } else { 11570 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11571 ->getCanonicalDecl(); 11572 Pos = ParamPositions[PVD]; 11573 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11574 PtrRescalingFactor = CGM.getContext() 11575 .getTypeSizeInChars(P->getPointeeType()) 11576 .getQuantity(); 11577 } 11578 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11579 ParamAttr.Kind = Linear; 11580 // Assuming a stride of 1, for `linear` without modifiers. 11581 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11582 if (*SI) { 11583 Expr::EvalResult Result; 11584 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11585 if (const auto *DRE = 11586 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11587 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11588 ParamAttr.Kind = LinearWithVarStride; 11589 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11590 ParamPositions[StridePVD->getCanonicalDecl()]); 11591 } 11592 } 11593 } else { 11594 ParamAttr.StrideOrArg = Result.Val.getInt(); 11595 } 11596 } 11597 // If we are using a linear clause on a pointer, we need to 11598 // rescale the value of linear_step with the byte size of the 11599 // pointee type. 11600 if (Linear == ParamAttr.Kind) 11601 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11602 ++SI; 11603 ++MI; 11604 } 11605 llvm::APSInt VLENVal; 11606 SourceLocation ExprLoc; 11607 const Expr *VLENExpr = Attr->getSimdlen(); 11608 if (VLENExpr) { 11609 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11610 ExprLoc = VLENExpr->getExprLoc(); 11611 } 11612 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11613 if (CGM.getTriple().isX86()) { 11614 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11615 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11616 unsigned VLEN = VLENVal.getExtValue(); 11617 StringRef MangledName = Fn->getName(); 11618 if (CGM.getTarget().hasFeature("sve")) 11619 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11620 MangledName, 's', 128, Fn, ExprLoc); 11621 if (CGM.getTarget().hasFeature("neon")) 11622 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11623 MangledName, 'n', 128, Fn, ExprLoc); 11624 } 11625 } 11626 FD = FD->getPreviousDecl(); 11627 } 11628 } 11629 11630 namespace { 11631 /// Cleanup action for doacross support. 11632 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11633 public: 11634 static const int DoacrossFinArgs = 2; 11635 11636 private: 11637 llvm::FunctionCallee RTLFn; 11638 llvm::Value *Args[DoacrossFinArgs]; 11639 11640 public: 11641 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11642 ArrayRef<llvm::Value *> CallArgs) 11643 : RTLFn(RTLFn) { 11644 assert(CallArgs.size() == DoacrossFinArgs); 11645 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11646 } 11647 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11648 if (!CGF.HaveInsertPoint()) 11649 return; 11650 CGF.EmitRuntimeCall(RTLFn, Args); 11651 } 11652 }; 11653 } // namespace 11654 11655 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11656 const OMPLoopDirective &D, 11657 ArrayRef<Expr *> NumIterations) { 11658 if (!CGF.HaveInsertPoint()) 11659 return; 11660 11661 ASTContext &C = CGM.getContext(); 11662 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11663 RecordDecl *RD; 11664 if (KmpDimTy.isNull()) { 11665 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11666 // kmp_int64 lo; // lower 11667 // kmp_int64 up; // upper 11668 // kmp_int64 st; // stride 11669 // }; 11670 RD = C.buildImplicitRecord("kmp_dim"); 11671 RD->startDefinition(); 11672 addFieldToRecordDecl(C, RD, Int64Ty); 11673 addFieldToRecordDecl(C, RD, Int64Ty); 11674 addFieldToRecordDecl(C, RD, Int64Ty); 11675 RD->completeDefinition(); 11676 KmpDimTy = C.getRecordType(RD); 11677 } else { 11678 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11679 } 11680 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11681 QualType ArrayTy = 11682 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11683 11684 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11685 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11686 enum { LowerFD = 0, UpperFD, StrideFD }; 11687 // Fill dims with data. 11688 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11689 LValue DimsLVal = CGF.MakeAddrLValue( 11690 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11691 // dims.upper = num_iterations; 11692 LValue UpperLVal = CGF.EmitLValueForField( 11693 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11694 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11695 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11696 Int64Ty, NumIterations[I]->getExprLoc()); 11697 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11698 // dims.stride = 1; 11699 LValue StrideLVal = CGF.EmitLValueForField( 11700 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11701 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11702 StrideLVal); 11703 } 11704 11705 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11706 // kmp_int32 num_dims, struct kmp_dim * dims); 11707 llvm::Value *Args[] = { 11708 emitUpdateLocation(CGF, D.getBeginLoc()), 11709 getThreadID(CGF, D.getBeginLoc()), 11710 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11711 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11712 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11713 CGM.VoidPtrTy)}; 11714 11715 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11716 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11717 CGF.EmitRuntimeCall(RTLFn, Args); 11718 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11719 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11720 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11721 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11722 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11723 llvm::makeArrayRef(FiniArgs)); 11724 } 11725 11726 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11727 const OMPDependClause *C) { 11728 QualType Int64Ty = 11729 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11730 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11731 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11732 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11733 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11734 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11735 const Expr *CounterVal = C->getLoopData(I); 11736 assert(CounterVal); 11737 llvm::Value *CntVal = CGF.EmitScalarConversion( 11738 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11739 CounterVal->getExprLoc()); 11740 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11741 /*Volatile=*/false, Int64Ty); 11742 } 11743 llvm::Value *Args[] = { 11744 emitUpdateLocation(CGF, C->getBeginLoc()), 11745 getThreadID(CGF, C->getBeginLoc()), 11746 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11747 llvm::FunctionCallee RTLFn; 11748 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11749 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11750 OMPRTL___kmpc_doacross_post); 11751 } else { 11752 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11753 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11754 OMPRTL___kmpc_doacross_wait); 11755 } 11756 CGF.EmitRuntimeCall(RTLFn, Args); 11757 } 11758 11759 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11760 llvm::FunctionCallee Callee, 11761 ArrayRef<llvm::Value *> Args) const { 11762 assert(Loc.isValid() && "Outlined function call location must be valid."); 11763 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11764 11765 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11766 if (Fn->doesNotThrow()) { 11767 CGF.EmitNounwindRuntimeCall(Fn, Args); 11768 return; 11769 } 11770 } 11771 CGF.EmitRuntimeCall(Callee, Args); 11772 } 11773 11774 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11775 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11776 ArrayRef<llvm::Value *> Args) const { 11777 emitCall(CGF, Loc, OutlinedFn, Args); 11778 } 11779 11780 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11781 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11782 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11783 HasEmittedDeclareTargetRegion = true; 11784 } 11785 11786 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11787 const VarDecl *NativeParam, 11788 const VarDecl *TargetParam) const { 11789 return CGF.GetAddrOfLocalVar(NativeParam); 11790 } 11791 11792 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11793 const VarDecl *VD) { 11794 if (!VD) 11795 return Address::invalid(); 11796 Address UntiedAddr = Address::invalid(); 11797 Address UntiedRealAddr = Address::invalid(); 11798 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11799 if (It != FunctionToUntiedTaskStackMap.end()) { 11800 const UntiedLocalVarsAddressesMap &UntiedData = 11801 UntiedLocalVarsStack[It->second]; 11802 auto I = UntiedData.find(VD); 11803 if (I != UntiedData.end()) { 11804 UntiedAddr = I->second.first; 11805 UntiedRealAddr = I->second.second; 11806 } 11807 } 11808 const VarDecl *CVD = VD->getCanonicalDecl(); 11809 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11810 // Use the default allocation. 11811 if (!isAllocatableDecl(VD)) 11812 return UntiedAddr; 11813 llvm::Value *Size; 11814 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11815 if (CVD->getType()->isVariablyModifiedType()) { 11816 Size = CGF.getTypeSize(CVD->getType()); 11817 // Align the size: ((size + align - 1) / align) * align 11818 Size = CGF.Builder.CreateNUWAdd( 11819 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11820 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11821 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11822 } else { 11823 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11824 Size = CGM.getSize(Sz.alignTo(Align)); 11825 } 11826 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11827 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11828 assert(AA->getAllocator() && 11829 "Expected allocator expression for non-default allocator."); 11830 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11831 // According to the standard, the original allocator type is a enum 11832 // (integer). Convert to pointer type, if required. 11833 Allocator = CGF.EmitScalarConversion( 11834 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11835 AA->getAllocator()->getExprLoc()); 11836 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11837 11838 llvm::Value *Addr = 11839 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11840 CGM.getModule(), OMPRTL___kmpc_alloc), 11841 Args, getName({CVD->getName(), ".void.addr"})); 11842 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11843 CGM.getModule(), OMPRTL___kmpc_free); 11844 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11845 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11846 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11847 if (UntiedAddr.isValid()) 11848 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11849 11850 // Cleanup action for allocate support. 11851 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11852 llvm::FunctionCallee RTLFn; 11853 unsigned LocEncoding; 11854 Address Addr; 11855 const Expr *Allocator; 11856 11857 public: 11858 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11859 Address Addr, const Expr *Allocator) 11860 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11861 Allocator(Allocator) {} 11862 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11863 if (!CGF.HaveInsertPoint()) 11864 return; 11865 llvm::Value *Args[3]; 11866 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11867 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11868 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11869 Addr.getPointer(), CGF.VoidPtrTy); 11870 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11871 // According to the standard, the original allocator type is a enum 11872 // (integer). Convert to pointer type, if required. 11873 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11874 CGF.getContext().VoidPtrTy, 11875 Allocator->getExprLoc()); 11876 Args[2] = AllocVal; 11877 11878 CGF.EmitRuntimeCall(RTLFn, Args); 11879 } 11880 }; 11881 Address VDAddr = 11882 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11883 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11884 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11885 VDAddr, AA->getAllocator()); 11886 if (UntiedRealAddr.isValid()) 11887 if (auto *Region = 11888 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11889 Region->emitUntiedSwitch(CGF); 11890 return VDAddr; 11891 } 11892 return UntiedAddr; 11893 } 11894 11895 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11896 const VarDecl *VD) const { 11897 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11898 if (It == FunctionToUntiedTaskStackMap.end()) 11899 return false; 11900 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11901 } 11902 11903 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11904 CodeGenModule &CGM, const OMPLoopDirective &S) 11905 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11906 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11907 if (!NeedToPush) 11908 return; 11909 NontemporalDeclsSet &DS = 11910 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11911 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11912 for (const Stmt *Ref : C->private_refs()) { 11913 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11914 const ValueDecl *VD; 11915 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11916 VD = DRE->getDecl(); 11917 } else { 11918 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11919 assert((ME->isImplicitCXXThis() || 11920 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11921 "Expected member of current class."); 11922 VD = ME->getMemberDecl(); 11923 } 11924 DS.insert(VD); 11925 } 11926 } 11927 } 11928 11929 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11930 if (!NeedToPush) 11931 return; 11932 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11933 } 11934 11935 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11936 CodeGenFunction &CGF, 11937 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 11938 std::pair<Address, Address>> &LocalVars) 11939 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11940 if (!NeedToPush) 11941 return; 11942 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11943 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11944 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11945 } 11946 11947 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11948 if (!NeedToPush) 11949 return; 11950 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11951 } 11952 11953 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11954 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11955 11956 return llvm::any_of( 11957 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11958 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11959 } 11960 11961 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11962 const OMPExecutableDirective &S, 11963 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11964 const { 11965 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11966 // Vars in target/task regions must be excluded completely. 11967 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11968 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11969 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11970 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11971 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11972 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11973 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11974 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11975 } 11976 } 11977 // Exclude vars in private clauses. 11978 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11979 for (const Expr *Ref : C->varlists()) { 11980 if (!Ref->getType()->isScalarType()) 11981 continue; 11982 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11983 if (!DRE) 11984 continue; 11985 NeedToCheckForLPCs.insert(DRE->getDecl()); 11986 } 11987 } 11988 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11989 for (const Expr *Ref : C->varlists()) { 11990 if (!Ref->getType()->isScalarType()) 11991 continue; 11992 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11993 if (!DRE) 11994 continue; 11995 NeedToCheckForLPCs.insert(DRE->getDecl()); 11996 } 11997 } 11998 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11999 for (const Expr *Ref : C->varlists()) { 12000 if (!Ref->getType()->isScalarType()) 12001 continue; 12002 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12003 if (!DRE) 12004 continue; 12005 NeedToCheckForLPCs.insert(DRE->getDecl()); 12006 } 12007 } 12008 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12009 for (const Expr *Ref : C->varlists()) { 12010 if (!Ref->getType()->isScalarType()) 12011 continue; 12012 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12013 if (!DRE) 12014 continue; 12015 NeedToCheckForLPCs.insert(DRE->getDecl()); 12016 } 12017 } 12018 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12019 for (const Expr *Ref : C->varlists()) { 12020 if (!Ref->getType()->isScalarType()) 12021 continue; 12022 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12023 if (!DRE) 12024 continue; 12025 NeedToCheckForLPCs.insert(DRE->getDecl()); 12026 } 12027 } 12028 for (const Decl *VD : NeedToCheckForLPCs) { 12029 for (const LastprivateConditionalData &Data : 12030 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12031 if (Data.DeclToUniqueName.count(VD) > 0) { 12032 if (!Data.Disabled) 12033 NeedToAddForLPCsAsDisabled.insert(VD); 12034 break; 12035 } 12036 } 12037 } 12038 } 12039 12040 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12041 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12042 : CGM(CGF.CGM), 12043 Action((CGM.getLangOpts().OpenMP >= 50 && 12044 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12045 [](const OMPLastprivateClause *C) { 12046 return C->getKind() == 12047 OMPC_LASTPRIVATE_conditional; 12048 })) 12049 ? ActionToDo::PushAsLastprivateConditional 12050 : ActionToDo::DoNotPush) { 12051 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12052 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12053 return; 12054 assert(Action == ActionToDo::PushAsLastprivateConditional && 12055 "Expected a push action."); 12056 LastprivateConditionalData &Data = 12057 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12058 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12059 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12060 continue; 12061 12062 for (const Expr *Ref : C->varlists()) { 12063 Data.DeclToUniqueName.insert(std::make_pair( 12064 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12065 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12066 } 12067 } 12068 Data.IVLVal = IVLVal; 12069 Data.Fn = CGF.CurFn; 12070 } 12071 12072 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12073 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12074 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12075 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12076 if (CGM.getLangOpts().OpenMP < 50) 12077 return; 12078 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12079 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12080 if (!NeedToAddForLPCsAsDisabled.empty()) { 12081 Action = ActionToDo::DisableLastprivateConditional; 12082 LastprivateConditionalData &Data = 12083 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12084 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12085 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12086 Data.Fn = CGF.CurFn; 12087 Data.Disabled = true; 12088 } 12089 } 12090 12091 CGOpenMPRuntime::LastprivateConditionalRAII 12092 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12093 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12094 return LastprivateConditionalRAII(CGF, S); 12095 } 12096 12097 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12098 if (CGM.getLangOpts().OpenMP < 50) 12099 return; 12100 if (Action == ActionToDo::DisableLastprivateConditional) { 12101 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12102 "Expected list of disabled private vars."); 12103 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12104 } 12105 if (Action == ActionToDo::PushAsLastprivateConditional) { 12106 assert( 12107 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12108 "Expected list of lastprivate conditional vars."); 12109 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12110 } 12111 } 12112 12113 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12114 const VarDecl *VD) { 12115 ASTContext &C = CGM.getContext(); 12116 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12117 if (I == LastprivateConditionalToTypes.end()) 12118 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12119 QualType NewType; 12120 const FieldDecl *VDField; 12121 const FieldDecl *FiredField; 12122 LValue BaseLVal; 12123 auto VI = I->getSecond().find(VD); 12124 if (VI == I->getSecond().end()) { 12125 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12126 RD->startDefinition(); 12127 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12128 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12129 RD->completeDefinition(); 12130 NewType = C.getRecordType(RD); 12131 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12132 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12133 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12134 } else { 12135 NewType = std::get<0>(VI->getSecond()); 12136 VDField = std::get<1>(VI->getSecond()); 12137 FiredField = std::get<2>(VI->getSecond()); 12138 BaseLVal = std::get<3>(VI->getSecond()); 12139 } 12140 LValue FiredLVal = 12141 CGF.EmitLValueForField(BaseLVal, FiredField); 12142 CGF.EmitStoreOfScalar( 12143 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12144 FiredLVal); 12145 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12146 } 12147 12148 namespace { 12149 /// Checks if the lastprivate conditional variable is referenced in LHS. 12150 class LastprivateConditionalRefChecker final 12151 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12152 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12153 const Expr *FoundE = nullptr; 12154 const Decl *FoundD = nullptr; 12155 StringRef UniqueDeclName; 12156 LValue IVLVal; 12157 llvm::Function *FoundFn = nullptr; 12158 SourceLocation Loc; 12159 12160 public: 12161 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12162 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12163 llvm::reverse(LPM)) { 12164 auto It = D.DeclToUniqueName.find(E->getDecl()); 12165 if (It == D.DeclToUniqueName.end()) 12166 continue; 12167 if (D.Disabled) 12168 return false; 12169 FoundE = E; 12170 FoundD = E->getDecl()->getCanonicalDecl(); 12171 UniqueDeclName = It->second; 12172 IVLVal = D.IVLVal; 12173 FoundFn = D.Fn; 12174 break; 12175 } 12176 return FoundE == E; 12177 } 12178 bool VisitMemberExpr(const MemberExpr *E) { 12179 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12180 return false; 12181 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12182 llvm::reverse(LPM)) { 12183 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12184 if (It == D.DeclToUniqueName.end()) 12185 continue; 12186 if (D.Disabled) 12187 return false; 12188 FoundE = E; 12189 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12190 UniqueDeclName = It->second; 12191 IVLVal = D.IVLVal; 12192 FoundFn = D.Fn; 12193 break; 12194 } 12195 return FoundE == E; 12196 } 12197 bool VisitStmt(const Stmt *S) { 12198 for (const Stmt *Child : S->children()) { 12199 if (!Child) 12200 continue; 12201 if (const auto *E = dyn_cast<Expr>(Child)) 12202 if (!E->isGLValue()) 12203 continue; 12204 if (Visit(Child)) 12205 return true; 12206 } 12207 return false; 12208 } 12209 explicit LastprivateConditionalRefChecker( 12210 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12211 : LPM(LPM) {} 12212 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12213 getFoundData() const { 12214 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12215 } 12216 }; 12217 } // namespace 12218 12219 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12220 LValue IVLVal, 12221 StringRef UniqueDeclName, 12222 LValue LVal, 12223 SourceLocation Loc) { 12224 // Last updated loop counter for the lastprivate conditional var. 12225 // int<xx> last_iv = 0; 12226 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12227 llvm::Constant *LastIV = 12228 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12229 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12230 IVLVal.getAlignment().getAsAlign()); 12231 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12232 12233 // Last value of the lastprivate conditional. 12234 // decltype(priv_a) last_a; 12235 llvm::Constant *Last = getOrCreateInternalVariable( 12236 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12237 cast<llvm::GlobalVariable>(Last)->setAlignment( 12238 LVal.getAlignment().getAsAlign()); 12239 LValue LastLVal = 12240 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12241 12242 // Global loop counter. Required to handle inner parallel-for regions. 12243 // iv 12244 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12245 12246 // #pragma omp critical(a) 12247 // if (last_iv <= iv) { 12248 // last_iv = iv; 12249 // last_a = priv_a; 12250 // } 12251 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12252 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12253 Action.Enter(CGF); 12254 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12255 // (last_iv <= iv) ? Check if the variable is updated and store new 12256 // value in global var. 12257 llvm::Value *CmpRes; 12258 if (IVLVal.getType()->isSignedIntegerType()) { 12259 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12260 } else { 12261 assert(IVLVal.getType()->isUnsignedIntegerType() && 12262 "Loop iteration variable must be integer."); 12263 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12264 } 12265 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12266 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12267 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12268 // { 12269 CGF.EmitBlock(ThenBB); 12270 12271 // last_iv = iv; 12272 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12273 12274 // last_a = priv_a; 12275 switch (CGF.getEvaluationKind(LVal.getType())) { 12276 case TEK_Scalar: { 12277 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12278 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12279 break; 12280 } 12281 case TEK_Complex: { 12282 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12283 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12284 break; 12285 } 12286 case TEK_Aggregate: 12287 llvm_unreachable( 12288 "Aggregates are not supported in lastprivate conditional."); 12289 } 12290 // } 12291 CGF.EmitBranch(ExitBB); 12292 // There is no need to emit line number for unconditional branch. 12293 (void)ApplyDebugLocation::CreateEmpty(CGF); 12294 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12295 }; 12296 12297 if (CGM.getLangOpts().OpenMPSimd) { 12298 // Do not emit as a critical region as no parallel region could be emitted. 12299 RegionCodeGenTy ThenRCG(CodeGen); 12300 ThenRCG(CGF); 12301 } else { 12302 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12303 } 12304 } 12305 12306 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12307 const Expr *LHS) { 12308 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12309 return; 12310 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12311 if (!Checker.Visit(LHS)) 12312 return; 12313 const Expr *FoundE; 12314 const Decl *FoundD; 12315 StringRef UniqueDeclName; 12316 LValue IVLVal; 12317 llvm::Function *FoundFn; 12318 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12319 Checker.getFoundData(); 12320 if (FoundFn != CGF.CurFn) { 12321 // Special codegen for inner parallel regions. 12322 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12323 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12324 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12325 "Lastprivate conditional is not found in outer region."); 12326 QualType StructTy = std::get<0>(It->getSecond()); 12327 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12328 LValue PrivLVal = CGF.EmitLValue(FoundE); 12329 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12330 PrivLVal.getAddress(CGF), 12331 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12332 LValue BaseLVal = 12333 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12334 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12335 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12336 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12337 FiredLVal, llvm::AtomicOrdering::Unordered, 12338 /*IsVolatile=*/true, /*isInit=*/false); 12339 return; 12340 } 12341 12342 // Private address of the lastprivate conditional in the current context. 12343 // priv_a 12344 LValue LVal = CGF.EmitLValue(FoundE); 12345 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12346 FoundE->getExprLoc()); 12347 } 12348 12349 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12350 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12351 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12352 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12353 return; 12354 auto Range = llvm::reverse(LastprivateConditionalStack); 12355 auto It = llvm::find_if( 12356 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12357 if (It == Range.end() || It->Fn != CGF.CurFn) 12358 return; 12359 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12360 assert(LPCI != LastprivateConditionalToTypes.end() && 12361 "Lastprivates must be registered already."); 12362 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12363 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12364 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12365 for (const auto &Pair : It->DeclToUniqueName) { 12366 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12367 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12368 continue; 12369 auto I = LPCI->getSecond().find(Pair.first); 12370 assert(I != LPCI->getSecond().end() && 12371 "Lastprivate must be rehistered already."); 12372 // bool Cmp = priv_a.Fired != 0; 12373 LValue BaseLVal = std::get<3>(I->getSecond()); 12374 LValue FiredLVal = 12375 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12376 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12377 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12378 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12379 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12380 // if (Cmp) { 12381 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12382 CGF.EmitBlock(ThenBB); 12383 Address Addr = CGF.GetAddrOfLocalVar(VD); 12384 LValue LVal; 12385 if (VD->getType()->isReferenceType()) 12386 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12387 AlignmentSource::Decl); 12388 else 12389 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12390 AlignmentSource::Decl); 12391 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12392 D.getBeginLoc()); 12393 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12394 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12395 // } 12396 } 12397 } 12398 12399 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12400 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12401 SourceLocation Loc) { 12402 if (CGF.getLangOpts().OpenMP < 50) 12403 return; 12404 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12405 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12406 "Unknown lastprivate conditional variable."); 12407 StringRef UniqueName = It->second; 12408 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12409 // The variable was not updated in the region - exit. 12410 if (!GV) 12411 return; 12412 LValue LPLVal = CGF.MakeAddrLValue( 12413 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12414 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12415 CGF.EmitStoreOfScalar(Res, PrivLVal); 12416 } 12417 12418 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12419 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12420 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12421 llvm_unreachable("Not supported in SIMD-only mode"); 12422 } 12423 12424 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12425 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12426 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12427 llvm_unreachable("Not supported in SIMD-only mode"); 12428 } 12429 12430 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12431 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12432 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12433 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12434 bool Tied, unsigned &NumberOfParts) { 12435 llvm_unreachable("Not supported in SIMD-only mode"); 12436 } 12437 12438 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12439 SourceLocation Loc, 12440 llvm::Function *OutlinedFn, 12441 ArrayRef<llvm::Value *> CapturedVars, 12442 const Expr *IfCond) { 12443 llvm_unreachable("Not supported in SIMD-only mode"); 12444 } 12445 12446 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12447 CodeGenFunction &CGF, StringRef CriticalName, 12448 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12449 const Expr *Hint) { 12450 llvm_unreachable("Not supported in SIMD-only mode"); 12451 } 12452 12453 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12454 const RegionCodeGenTy &MasterOpGen, 12455 SourceLocation Loc) { 12456 llvm_unreachable("Not supported in SIMD-only mode"); 12457 } 12458 12459 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12460 SourceLocation Loc) { 12461 llvm_unreachable("Not supported in SIMD-only mode"); 12462 } 12463 12464 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12465 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12466 SourceLocation Loc) { 12467 llvm_unreachable("Not supported in SIMD-only mode"); 12468 } 12469 12470 void CGOpenMPSIMDRuntime::emitSingleRegion( 12471 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12472 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12473 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12474 ArrayRef<const Expr *> AssignmentOps) { 12475 llvm_unreachable("Not supported in SIMD-only mode"); 12476 } 12477 12478 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12479 const RegionCodeGenTy &OrderedOpGen, 12480 SourceLocation Loc, 12481 bool IsThreads) { 12482 llvm_unreachable("Not supported in SIMD-only mode"); 12483 } 12484 12485 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12486 SourceLocation Loc, 12487 OpenMPDirectiveKind Kind, 12488 bool EmitChecks, 12489 bool ForceSimpleCall) { 12490 llvm_unreachable("Not supported in SIMD-only mode"); 12491 } 12492 12493 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12494 CodeGenFunction &CGF, SourceLocation Loc, 12495 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12496 bool Ordered, const DispatchRTInput &DispatchValues) { 12497 llvm_unreachable("Not supported in SIMD-only mode"); 12498 } 12499 12500 void CGOpenMPSIMDRuntime::emitForStaticInit( 12501 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12502 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12503 llvm_unreachable("Not supported in SIMD-only mode"); 12504 } 12505 12506 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12507 CodeGenFunction &CGF, SourceLocation Loc, 12508 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12509 llvm_unreachable("Not supported in SIMD-only mode"); 12510 } 12511 12512 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12513 SourceLocation Loc, 12514 unsigned IVSize, 12515 bool IVSigned) { 12516 llvm_unreachable("Not supported in SIMD-only mode"); 12517 } 12518 12519 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12520 SourceLocation Loc, 12521 OpenMPDirectiveKind DKind) { 12522 llvm_unreachable("Not supported in SIMD-only mode"); 12523 } 12524 12525 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12526 SourceLocation Loc, 12527 unsigned IVSize, bool IVSigned, 12528 Address IL, Address LB, 12529 Address UB, Address ST) { 12530 llvm_unreachable("Not supported in SIMD-only mode"); 12531 } 12532 12533 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12534 llvm::Value *NumThreads, 12535 SourceLocation Loc) { 12536 llvm_unreachable("Not supported in SIMD-only mode"); 12537 } 12538 12539 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12540 ProcBindKind ProcBind, 12541 SourceLocation Loc) { 12542 llvm_unreachable("Not supported in SIMD-only mode"); 12543 } 12544 12545 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12546 const VarDecl *VD, 12547 Address VDAddr, 12548 SourceLocation Loc) { 12549 llvm_unreachable("Not supported in SIMD-only mode"); 12550 } 12551 12552 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12553 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12554 CodeGenFunction *CGF) { 12555 llvm_unreachable("Not supported in SIMD-only mode"); 12556 } 12557 12558 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12559 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12560 llvm_unreachable("Not supported in SIMD-only mode"); 12561 } 12562 12563 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12564 ArrayRef<const Expr *> Vars, 12565 SourceLocation Loc, 12566 llvm::AtomicOrdering AO) { 12567 llvm_unreachable("Not supported in SIMD-only mode"); 12568 } 12569 12570 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12571 const OMPExecutableDirective &D, 12572 llvm::Function *TaskFunction, 12573 QualType SharedsTy, Address Shareds, 12574 const Expr *IfCond, 12575 const OMPTaskDataTy &Data) { 12576 llvm_unreachable("Not supported in SIMD-only mode"); 12577 } 12578 12579 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12580 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12581 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12582 const Expr *IfCond, const OMPTaskDataTy &Data) { 12583 llvm_unreachable("Not supported in SIMD-only mode"); 12584 } 12585 12586 void CGOpenMPSIMDRuntime::emitReduction( 12587 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12588 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12589 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12590 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12591 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12592 ReductionOps, Options); 12593 } 12594 12595 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12596 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12597 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12598 llvm_unreachable("Not supported in SIMD-only mode"); 12599 } 12600 12601 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12602 SourceLocation Loc, 12603 bool IsWorksharingReduction) { 12604 llvm_unreachable("Not supported in SIMD-only mode"); 12605 } 12606 12607 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12608 SourceLocation Loc, 12609 ReductionCodeGen &RCG, 12610 unsigned N) { 12611 llvm_unreachable("Not supported in SIMD-only mode"); 12612 } 12613 12614 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12615 SourceLocation Loc, 12616 llvm::Value *ReductionsPtr, 12617 LValue SharedLVal) { 12618 llvm_unreachable("Not supported in SIMD-only mode"); 12619 } 12620 12621 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12622 SourceLocation Loc) { 12623 llvm_unreachable("Not supported in SIMD-only mode"); 12624 } 12625 12626 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12627 CodeGenFunction &CGF, SourceLocation Loc, 12628 OpenMPDirectiveKind CancelRegion) { 12629 llvm_unreachable("Not supported in SIMD-only mode"); 12630 } 12631 12632 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12633 SourceLocation Loc, const Expr *IfCond, 12634 OpenMPDirectiveKind CancelRegion) { 12635 llvm_unreachable("Not supported in SIMD-only mode"); 12636 } 12637 12638 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12639 const OMPExecutableDirective &D, StringRef ParentName, 12640 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12641 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12642 llvm_unreachable("Not supported in SIMD-only mode"); 12643 } 12644 12645 void CGOpenMPSIMDRuntime::emitTargetCall( 12646 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12647 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12648 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12649 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12650 const OMPLoopDirective &D)> 12651 SizeEmitter) { 12652 llvm_unreachable("Not supported in SIMD-only mode"); 12653 } 12654 12655 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12656 llvm_unreachable("Not supported in SIMD-only mode"); 12657 } 12658 12659 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12660 llvm_unreachable("Not supported in SIMD-only mode"); 12661 } 12662 12663 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12664 return false; 12665 } 12666 12667 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12668 const OMPExecutableDirective &D, 12669 SourceLocation Loc, 12670 llvm::Function *OutlinedFn, 12671 ArrayRef<llvm::Value *> CapturedVars) { 12672 llvm_unreachable("Not supported in SIMD-only mode"); 12673 } 12674 12675 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12676 const Expr *NumTeams, 12677 const Expr *ThreadLimit, 12678 SourceLocation Loc) { 12679 llvm_unreachable("Not supported in SIMD-only mode"); 12680 } 12681 12682 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12683 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12684 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12685 llvm_unreachable("Not supported in SIMD-only mode"); 12686 } 12687 12688 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12689 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12690 const Expr *Device) { 12691 llvm_unreachable("Not supported in SIMD-only mode"); 12692 } 12693 12694 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12695 const OMPLoopDirective &D, 12696 ArrayRef<Expr *> NumIterations) { 12697 llvm_unreachable("Not supported in SIMD-only mode"); 12698 } 12699 12700 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12701 const OMPDependClause *C) { 12702 llvm_unreachable("Not supported in SIMD-only mode"); 12703 } 12704 12705 const VarDecl * 12706 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12707 const VarDecl *NativeParam) const { 12708 llvm_unreachable("Not supported in SIMD-only mode"); 12709 } 12710 12711 Address 12712 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12713 const VarDecl *NativeParam, 12714 const VarDecl *TargetParam) const { 12715 llvm_unreachable("Not supported in SIMD-only mode"); 12716 } 12717